Don't send MTU probes smaller than 512 bytes.
[tinc] / src / net_packet.c
index 90fda37..626114c 100644 (file)
@@ -65,94 +65,19 @@ int udp_discovery_timeout = 30;
 
 #define MAX_SEQNO 1073741824
 
-static void send_udp_probe_packet(node_t *n, int len) {
-       vpn_packet_t packet;
-       packet.offset = DEFAULT_PACKET_OFFSET;
-       memset(DATA(&packet), 0, 14);
-       randomize(DATA(&packet) + 14, len - 14);
-       packet.len = len;
-       packet.priority = 0;
-
-       logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
-
-       send_udppacket(n, &packet);
-}
-
-// This function tries to determines the MTU of a node.
-// By calling this function repeatedly, n->minmtu will be progressively increased, and at some point, n->mtu will be fixed to n->minmtu.
-// If the MTU is already fixed, this function checks if it can be increased.
-static void try_mtu(node_t *n) {
-       if(!(n->options & OPTION_PMTU_DISCOVERY))
-               return;
-
-       if(udp_discovery && !n->status.udp_confirmed) {
-               n->mtuprobes = 0;
-               n->minmtu = 0;
-               n->maxmtu = MTU;
+static void try_fix_mtu(node_t *n) {
+       if(n->mtuprobes < 0)
                return;
-       }
-
-       /* mtuprobes == 0..29: initial discovery, send bursts with 1 second interval, mtuprobes++
-          mtuprobes ==    30: fix MTU, and go to 31
-          mtuprobes ==    31: send one >maxmtu probe every pingtimeout */
-
-       struct timeval now;
-       gettimeofday(&now, NULL);
-       struct timeval elapsed;
-       timersub(&now, &n->probe_sent_time, &elapsed);
-       if(n->mtuprobes < 31) {
-               if(n->mtuprobes != 0 && elapsed.tv_sec < 1)
-                       return;
-       } else {
-               if(elapsed.tv_sec < pingtimeout)
-                       return;
-       }
 
-       if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
+       if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) {
                if(n->minmtu > n->maxmtu)
                        n->minmtu = n->maxmtu;
                else
                        n->maxmtu = n->minmtu;
                n->mtu = n->minmtu;
                logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
-               n->mtuprobes = 31;
-       }
-
-       int timeout;
-       if(n->mtuprobes == 31) {
-               /* After the initial discovery, we only send one >maxmtu probe
-                  to detect PMTU increases. */
-               if(n->maxmtu + 8 < MTU)
-                       send_udp_probe_packet(n, n->maxmtu + 8);
-       } else {
-               /* Probes are sent in batches of three, with random sizes between the
-                  lower and upper boundaries for the MTU thus far discovered. */
-               for (int i = 0; i < 3; i++) {
-                       int len = n->maxmtu;
-                       if(n->minmtu < n->maxmtu)
-                               len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
-
-                       send_udp_probe_packet(n, MAX(len, 64));
-               }
-               n->mtuprobes++;
+               n->mtuprobes = -1;
        }
-
-       n->probe_counter = 0;
-       n->probe_sent_time = now;
-       n->probe_time = now;
-
-       /* Calculate the packet loss of incoming traffic by comparing the rate of
-          packets received to the rate with which the sequence number has increased.
-          TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
-        */
-
-       if(n->received > n->prev_received)
-               n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
-       else
-               n->packetloss = n->received_seqno <= n->prev_received_seqno;
-
-       n->prev_received_seqno = n->received_seqno;
-       n->prev_received = n->received;
 }
 
 static void udp_probe_timeout_handler(void *data) {
@@ -216,10 +141,10 @@ static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
                        timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
                }
 
-               if(probelen >= n->maxmtu + 8) {
+               if(probelen >= n->maxmtu + 1) {
                        logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
                        n->maxmtu = MTU;
-                       n->mtuprobes = 10;
+                       n->mtuprobes = 0;
                        return;
                }
 
@@ -227,13 +152,14 @@ static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
 
                if(probelen > n->maxmtu)
                        probelen = n->maxmtu;
-               if(n->minmtu < probelen)
+               if(n->minmtu < probelen) {
                        n->minmtu = probelen;
+                       try_fix_mtu(n);
+               }
 
-               /* Calculate RTT and bandwidth.
+               /* Calculate RTT.
                   The RTT is the time between the MTU probe burst was sent and the first
-                  reply is received. The bandwidth is measured using the time between the
-                  arrival of the first and third probe reply (or type 2 probe requests).
+                  reply is received.
                 */
 
                struct timeval now, diff;
@@ -253,12 +179,7 @@ static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
                if(n->probe_counter == 1) {
                        n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
                        n->probe_time = probe_timestamp;
-               } else if(n->probe_counter == 3) {
-                       /* TODO: this will never fire after initial MTU discovery. */
-                       struct timeval probe_timestamp_diff;
-                       timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
-                       n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
-                       logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
+                       logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->packetloss * 1e2);
                }
        }
 }
@@ -748,6 +669,7 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
                                n->maxmtu = origlen - 1;
                        if(n->mtu >= origlen)
                                n->mtu = origlen - 1;
+                       try_fix_mtu(n);
                } else
                        logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
        }
@@ -812,6 +734,7 @@ static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void
                                relay->maxmtu = len - 1;
                        if(relay->mtu >= len)
                                relay->mtu = len - 1;
+                       try_fix_mtu(relay);
                } else {
                        logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
                        return false;
@@ -923,6 +846,19 @@ static void try_sptps(node_t *n) {
        return;
 }
 
+static void send_udp_probe_packet(node_t *n, int len) {
+       vpn_packet_t packet;
+       packet.offset = DEFAULT_PACKET_OFFSET;
+       memset(DATA(&packet), 0, 14);
+       randomize(DATA(&packet) + 14, len - 14);
+       packet.len = len;
+       packet.priority = 0;
+
+       logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
+
+       send_udppacket(n, &packet);
+}
+
 // This function tries to establish a UDP tunnel to a node so that packets can be sent.
 // If a tunnel is already established, it makes sure it stays up.
 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
@@ -947,6 +883,91 @@ static void try_udp(node_t* n) {
        }
 }
 
+// This function tries to determines the MTU of a node.
+// By calling this function repeatedly, n->minmtu will be progressively increased, and at some point, n->mtu will be fixed to n->minmtu.
+// If the MTU is already fixed, this function checks if it can be increased.
+static void try_mtu(node_t *n) {
+       if(!(n->options & OPTION_PMTU_DISCOVERY))
+               return;
+
+       if(udp_discovery && !n->status.udp_confirmed) {
+               n->mtuprobes = 0;
+               n->minmtu = 0;
+               n->maxmtu = MTU;
+               return;
+       }
+
+       /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++
+          mtuprobes ==    20: fix MTU, and go to -1
+          mtuprobes ==    -1: send one >maxmtu probe every pingtimeout */
+
+       struct timeval now;
+       gettimeofday(&now, NULL);
+       struct timeval elapsed;
+       timersub(&now, &n->probe_sent_time, &elapsed);
+       if(n->mtuprobes >= 0) {
+               if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333)
+                       return;
+       } else {
+               if(elapsed.tv_sec < pingtimeout)
+                       return;
+       }
+
+       try_fix_mtu(n);
+
+       int timeout;
+       if(n->mtuprobes < 0) {
+               /* After the initial discovery, we only send one >maxmtu probe
+                  to detect PMTU increases. */
+               if(n->maxmtu + 1 < MTU)
+                       send_udp_probe_packet(n, n->maxmtu + 1);
+       } else {
+               /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets,
+                  but it will typically increase convergence time in the no-loss case. */
+               const length_t probes_per_cycle = 8;
+
+               /* This magic value was determined using math simulations.
+                  It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe.
+                  Since 1407 is just below the range of tinc MTUs over typical networks,
+                  this fine-tuning allows tinc to cover a lot of ground very quickly. */
+               const float multiplier = 0.97;
+
+               const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1;
+               const length_t minmtu = MAX(n->minmtu, 512);
+               const float interval = n->maxmtu - minmtu;
+
+               /* The core of the discovery algorithm is this exponential.
+                  It produces very large probes early in the cycle, and then it very quickly decreases the probe size.
+                  This reflects the fact that in the most difficult cases, we don't get any feedback for probes that
+                  are too large, and therefore we need to concentrate on small offsets so that we can quickly converge
+                  on the precise MTU as we are approaching it.
+                  The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one
+                  reply per cycle so that we can make progress. */
+               const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1));
+
+               send_udp_probe_packet(n, minmtu + offset);
+               if(n->mtuprobes >= 0)
+                       n->mtuprobes++;
+       }
+
+       n->probe_counter = 0;
+       n->probe_sent_time = now;
+       n->probe_time = now;
+
+       /* Calculate the packet loss of incoming traffic by comparing the rate of
+          packets received to the rate with which the sequence number has increased.
+          TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
+        */
+
+       if(n->received > n->prev_received)
+               n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
+       else
+               n->packetloss = n->received_seqno <= n->prev_received_seqno;
+
+       n->prev_received_seqno = n->received_seqno;
+       n->prev_received = n->received;
+}
+
 // This function tries to establish a tunnel to a node (or its relay) so that packets can be sent (e.g. get SPTPS keys).
 // If a tunnel is already established, it tries to improve it (e.g. by trying to establish a UDP tunnel instead of TCP).
 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if TCP and/or UDP is usable.