Don't send MTU probes smaller than 512 bytes.
[tinc] / src / net_packet.c
index 131f52e..626114c 100644 (file)
@@ -65,117 +65,19 @@ int udp_discovery_timeout = 30;
 
 #define MAX_SEQNO 1073741824
 
-static void send_udp_probe_packet(node_t *n, int len) {
-       vpn_packet_t packet;
-       packet.offset = DEFAULT_PACKET_OFFSET;
-       memset(DATA(&packet), 0, 14);
-       randomize(DATA(&packet) + 14, len - 14);
-       packet.len = len;
-       packet.priority = 0;
-
-       logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
-
-       send_udppacket(n, &packet);
-}
-
-static void send_mtu_probe_handler(void *data) {
-       node_t *n = data;
-
-       if(!n->status.reachable || !n->status.validkey) {
-               logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
-               n->mtuprobes = 0;
+static void try_fix_mtu(node_t *n) {
+       if(n->mtuprobes < 0)
                return;
-       }
-
-       /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
-          mtuprobes ==    31: sleep pinginterval seconds
-          mtuprobes ==    32: send 1 burst, sleep pingtimeout second
-          mtuprobes ==    33: no response from other side, restart PMTU discovery process */
-
-       n->mtuprobes++;
-       int timeout = 1;
-
-       if(n->mtuprobes > 32) {
-               if(!n->minmtu) {
-                       n->mtuprobes = 31;
-                       timeout = pinginterval;
-                       goto end;
-               }
-
-               logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
-               n->status.udp_confirmed = false;
-               n->mtuprobes = 1;
-               n->minmtu = 0;
-               n->maxmtu = MTU;
-       }
-
-       if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
-               logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
-               n->mtuprobes = 31;
-       }
 
-       if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
+       if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) {
                if(n->minmtu > n->maxmtu)
                        n->minmtu = n->maxmtu;
                else
                        n->maxmtu = n->minmtu;
                n->mtu = n->minmtu;
                logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
-               n->mtuprobes = 31;
+               n->mtuprobes = -1;
        }
-
-       if(n->mtuprobes == 31) {
-               timeout = pinginterval;
-               goto end;
-       } else if(n->mtuprobes == 32) {
-               timeout = pingtimeout;
-       }
-
-       /* After the initial discovery, a fourth packet is added to each batch with a
-          size larger than the currently known PMTU, to test if the PMTU has increased. */
-       if (n->mtuprobes >= 30 && n->maxmtu + 8 < MTU)
-               send_udp_probe_packet(n, n->maxmtu + 8);
-
-       /* Probes are sent in batches of three, with random sizes between the
-          lower and upper boundaries for the MTU thus far discovered. */
-       for (int i = 0; i < 3; i++) {
-               int len = n->maxmtu;
-               if(n->minmtu < n->maxmtu)
-                       len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
-
-               send_udp_probe_packet(n, MAX(len, 64));
-       }
-
-       /* In case local discovery is enabled, another packet is added to each batch,
-          which will be broadcast to the local network. */
-       if(localdiscovery && n->mtuprobes <= 10 && n->prevedge) {
-               n->status.send_locally = true;
-               send_udp_probe_packet(n, 16);
-               n->status.send_locally = false;
-       }
-
-       n->probe_counter = 0;
-       gettimeofday(&n->probe_time, NULL);
-
-       /* Calculate the packet loss of incoming traffic by comparing the rate of
-          packets received to the rate with which the sequence number has increased.
-        */
-
-       if(n->received > n->prev_received)
-               n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
-       else
-               n->packetloss = n->received_seqno <= n->prev_received_seqno;
-
-       n->prev_received_seqno = n->received_seqno;
-       n->prev_received = n->received;
-
-end:
-       timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
-}
-
-void send_mtu_probe(node_t *n) {
-       timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
-       send_mtu_probe_handler(n);
 }
 
 static void udp_probe_timeout_handler(void *data) {
@@ -185,7 +87,7 @@ static void udp_probe_timeout_handler(void *data) {
 
        logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname);
        n->status.udp_confirmed = false;
-       n->mtuprobes = 1;
+       n->mtuprobes = 0;
        n->minmtu = 0;
        n->maxmtu = MTU;
 }
@@ -239,33 +141,25 @@ static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
                        timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
                }
 
-               /* If we haven't established the PMTU yet, restart the discovery process. */
-
-               if(n->mtuprobes > 30) {
-                       if (probelen == n->maxmtu + 8) {
-                               logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
-                               n->maxmtu = MTU;
-                               n->mtuprobes = 10;
-                               return;
-                       }
-
-                       if(n->minmtu)
-                               n->mtuprobes = 30;
-                       else
-                               n->mtuprobes = 1;
+               if(probelen >= n->maxmtu + 1) {
+                       logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
+                       n->maxmtu = MTU;
+                       n->mtuprobes = 0;
+                       return;
                }
 
                /* If applicable, raise the minimum supported MTU */
 
                if(probelen > n->maxmtu)
                        probelen = n->maxmtu;
-               if(n->minmtu < probelen)
+               if(n->minmtu < probelen) {
                        n->minmtu = probelen;
+                       try_fix_mtu(n);
+               }
 
-               /* Calculate RTT and bandwidth.
+               /* Calculate RTT.
                   The RTT is the time between the MTU probe burst was sent and the first
-                  reply is received. The bandwidth is measured using the time between the
-                  arrival of the first and third probe reply (or type 2 probe requests).
+                  reply is received.
                 */
 
                struct timeval now, diff;
@@ -285,11 +179,7 @@ static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
                if(n->probe_counter == 1) {
                        n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
                        n->probe_time = probe_timestamp;
-               } else if(n->probe_counter == 3) {
-                       struct timeval probe_timestamp_diff;
-                       timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
-                       n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
-                       logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
+                       logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->packetloss * 1e2);
                }
        }
 }
@@ -779,6 +669,7 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
                                n->maxmtu = origlen - 1;
                        if(n->mtu >= origlen)
                                n->mtu = origlen - 1;
+                       try_fix_mtu(n);
                } else
                        logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
        }
@@ -843,6 +734,7 @@ static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void
                                relay->maxmtu = len - 1;
                        if(relay->mtu >= len)
                                relay->mtu = len - 1;
+                       try_fix_mtu(relay);
                } else {
                        logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
                        return false;
@@ -954,6 +846,19 @@ static void try_sptps(node_t *n) {
        return;
 }
 
+static void send_udp_probe_packet(node_t *n, int len) {
+       vpn_packet_t packet;
+       packet.offset = DEFAULT_PACKET_OFFSET;
+       memset(DATA(&packet), 0, 14);
+       randomize(DATA(&packet) + 14, len - 14);
+       packet.len = len;
+       packet.priority = 0;
+
+       logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
+
+       send_udppacket(n, &packet);
+}
+
 // This function tries to establish a UDP tunnel to a node so that packets can be sent.
 // If a tunnel is already established, it makes sure it stays up.
 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
@@ -969,7 +874,98 @@ static void try_udp(node_t* n) {
        if(ping_tx_elapsed.tv_sec >= udp_discovery_interval) {
                send_udp_probe_packet(n, MAX(n->minmtu, 16));
                n->udp_ping_sent = now;
+
+               if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
+                       n->status.send_locally = true;
+                       send_udp_probe_packet(n, 16);
+                       n->status.send_locally = false;
+               }
+       }
+}
+
+// This function tries to determines the MTU of a node.
+// By calling this function repeatedly, n->minmtu will be progressively increased, and at some point, n->mtu will be fixed to n->minmtu.
+// If the MTU is already fixed, this function checks if it can be increased.
+static void try_mtu(node_t *n) {
+       if(!(n->options & OPTION_PMTU_DISCOVERY))
+               return;
+
+       if(udp_discovery && !n->status.udp_confirmed) {
+               n->mtuprobes = 0;
+               n->minmtu = 0;
+               n->maxmtu = MTU;
+               return;
+       }
+
+       /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++
+          mtuprobes ==    20: fix MTU, and go to -1
+          mtuprobes ==    -1: send one >maxmtu probe every pingtimeout */
+
+       struct timeval now;
+       gettimeofday(&now, NULL);
+       struct timeval elapsed;
+       timersub(&now, &n->probe_sent_time, &elapsed);
+       if(n->mtuprobes >= 0) {
+               if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333)
+                       return;
+       } else {
+               if(elapsed.tv_sec < pingtimeout)
+                       return;
        }
+
+       try_fix_mtu(n);
+
+       int timeout;
+       if(n->mtuprobes < 0) {
+               /* After the initial discovery, we only send one >maxmtu probe
+                  to detect PMTU increases. */
+               if(n->maxmtu + 1 < MTU)
+                       send_udp_probe_packet(n, n->maxmtu + 1);
+       } else {
+               /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets,
+                  but it will typically increase convergence time in the no-loss case. */
+               const length_t probes_per_cycle = 8;
+
+               /* This magic value was determined using math simulations.
+                  It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe.
+                  Since 1407 is just below the range of tinc MTUs over typical networks,
+                  this fine-tuning allows tinc to cover a lot of ground very quickly. */
+               const float multiplier = 0.97;
+
+               const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1;
+               const length_t minmtu = MAX(n->minmtu, 512);
+               const float interval = n->maxmtu - minmtu;
+
+               /* The core of the discovery algorithm is this exponential.
+                  It produces very large probes early in the cycle, and then it very quickly decreases the probe size.
+                  This reflects the fact that in the most difficult cases, we don't get any feedback for probes that
+                  are too large, and therefore we need to concentrate on small offsets so that we can quickly converge
+                  on the precise MTU as we are approaching it.
+                  The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one
+                  reply per cycle so that we can make progress. */
+               const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1));
+
+               send_udp_probe_packet(n, minmtu + offset);
+               if(n->mtuprobes >= 0)
+                       n->mtuprobes++;
+       }
+
+       n->probe_counter = 0;
+       n->probe_sent_time = now;
+       n->probe_time = now;
+
+       /* Calculate the packet loss of incoming traffic by comparing the rate of
+          packets received to the rate with which the sequence number has increased.
+          TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
+        */
+
+       if(n->received > n->prev_received)
+               n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
+       else
+               n->packetloss = n->received_seqno <= n->prev_received_seqno;
+
+       n->prev_received_seqno = n->received_seqno;
+       n->prev_received = n->received;
 }
 
 // This function tries to establish a tunnel to a node (or its relay) so that packets can be sent (e.g. get SPTPS keys).
@@ -995,8 +991,10 @@ static void try_tx(node_t *n) {
        if(!n->status.sptps && !via->status.validkey && via->last_req_key + 10 <= now.tv_sec) {
                send_req_key(via);
                via->last_req_key = now.tv_sec;
-       } else if(via == n || !n->status.sptps || (via->options >> 24) >= 4)
+       } else if(via == n || !n->status.sptps || (via->options >> 24) >= 4) {
                try_udp(via);
+               try_mtu(via);
+       }
 
        /* If we don't know how to reach "via" yet, then try to reach it through a relay. */
        if(n->status.sptps && !via->status.udp_confirmed && via->nexthop != via && (via->nexthop->options >> 24) >= 4)