X-Git-Url: https://tinc-vpn.org/git/browse?a=blobdiff_plain;f=src%2Fnet_packet.c;h=a7ddcdf0191aa47e7c284e093ddf9a7ccaedf20c;hb=06345f89b9a1e9acaf74cbbf896559b4286c102e;hp=9bebca43f568656cfb0f34a91c64013a420e5dc5;hpb=1b972f22733dc979568bc0ad8ebe0c711887e447;p=tinc diff --git a/src/net_packet.c b/src/net_packet.c index 9bebca43..a7ddcdf0 100644 --- a/src/net_packet.c +++ b/src/net_packet.c @@ -37,6 +37,8 @@ #include "digest.h" #include "device.h" #include "ethernet.h" +#include "ipv4.h" +#include "ipv6.h" #include "graph.h" #include "logger.h" #include "net.h" @@ -69,7 +71,7 @@ static void try_fix_mtu(node_t *n) { if(n->mtuprobes < 0) return; - if(n->mtuprobes == 90 || n->minmtu >= n->maxmtu) { + if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) { if(n->minmtu > n->maxmtu) n->minmtu = n->maxmtu; else @@ -141,10 +143,11 @@ static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) { timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0}); } - if(probelen >= n->maxmtu + 8) { + if(probelen >= n->maxmtu + 1) { logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname); n->maxmtu = MTU; - n->mtuprobes = 30; + /* Set mtuprobes to 1 so that try_mtu() doesn't reset maxmtu */ + n->mtuprobes = 1; return; } @@ -157,10 +160,9 @@ static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) { try_fix_mtu(n); } - /* Calculate RTT and bandwidth. + /* Calculate RTT. The RTT is the time between the MTU probe burst was sent and the first - reply is received. The bandwidth is measured using the time between the - arrival of the first and third probe reply (or type 2 probe requests). + reply is received. */ struct timeval now, diff; @@ -180,12 +182,7 @@ static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) { if(n->probe_counter == 1) { n->rtt = diff.tv_sec + diff.tv_usec * 1e-6; n->probe_time = probe_timestamp; - } else if(n->probe_counter == 3) { - /* TODO: this will never fire - we're not sending batches of three anymore. */ - struct timeval probe_timestamp_diff; - timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff); - n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6); - logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2); + logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->packetloss * 1e2); } } } @@ -889,6 +886,67 @@ static void try_udp(node_t* n) { } } +static length_t choose_initial_maxmtu(node_t *n) { +#ifdef IP_MTU + + int sock = -1; + + const sockaddr_t *sa = NULL; + int sockindex; + choose_udp_address(n, &sa, &sockindex); + if(!sa) + return MTU; + + sock = socket(sa->sa.sa_family, SOCK_DGRAM, IPPROTO_UDP); + if(sock < 0) { + logger(DEBUG_TRAFFIC, LOG_ERR, "Creating MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno)); + return MTU; + } + + if(connect(sock, &sa->sa, SALEN(sa->sa))) { + logger(DEBUG_TRAFFIC, LOG_ERR, "Connecting MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno)); + close(sock); + return MTU; + } + + int ip_mtu; + socklen_t ip_mtu_len = sizeof ip_mtu; + if(getsockopt(sock, IPPROTO_IP, IP_MTU, &ip_mtu, &ip_mtu_len)) { + logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno)); + close(sock); + return MTU; + } + + close(sock); + + /* getsockopt(IP_MTU) returns the MTU of the physical interface. + We need to remove various overheads to get to the tinc MTU. */ + length_t mtu = ip_mtu; + mtu -= (sa->sa.sa_family == AF_INET6) ? sizeof(struct ip6_hdr) : sizeof(struct ip); + mtu -= 8; /* UDP */ + if(n->status.sptps) { + mtu -= SPTPS_DATAGRAM_OVERHEAD; + if((n->options >> 24) >= 4) + mtu -= sizeof(node_id_t) + sizeof(node_id_t); + } + + if (mtu < 512) { + logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) returned absurdly small value: %d", n->name, n->hostname, ip_mtu); + return MTU; + } + if (mtu > MTU) + return MTU; + + logger(DEBUG_TRAFFIC, LOG_INFO, "Using system-provided maximum tinc MTU for %s (%s): %hd", n->name, n->hostname, mtu); + return mtu; + +#else + + return MTU; + +#endif +} + // This function tries to determines the MTU of a node. // By calling this function repeatedly, n->minmtu will be progressively increased, and at some point, n->mtu will be fixed to n->minmtu. // If the MTU is already fixed, this function checks if it can be increased. @@ -903,8 +961,8 @@ static void try_mtu(node_t *n) { return; } - /* mtuprobes == 0..89: initial discovery, send bursts with 1 second interval, mtuprobes++ - mtuprobes == 90: fix MTU, and go to -1 + /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++ + mtuprobes == 20: fix MTU, and go to -1 mtuprobes == -1: send one >maxmtu probe every pingtimeout */ struct timeval now; @@ -925,15 +983,48 @@ static void try_mtu(node_t *n) { if(n->mtuprobes < 0) { /* After the initial discovery, we only send one >maxmtu probe to detect PMTU increases. */ - if(n->maxmtu + 8 < MTU) - send_udp_probe_packet(n, n->maxmtu + 8); + if(n->maxmtu + 1 < MTU) + send_udp_probe_packet(n, n->maxmtu + 1); } else { - /* Probes are sent with random sizes between the - lower and upper boundaries for the MTU thus far discovered. */ - int len = n->maxmtu; - if(n->minmtu < n->maxmtu) - len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu); - send_udp_probe_packet(n, MAX(len, 64)); + /* Before initial discovery begins, set maxmtu to the most likely value. + If it's underestimated, we will correct it after initial discovery. */ + if(n->mtuprobes == 0) + n->maxmtu = choose_initial_maxmtu(n); + + for (;;) { + /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets, + but it will typically increase convergence time in the no-loss case. */ + const length_t probes_per_cycle = 8; + + /* This magic value was determined using math simulations. + It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe. + Since 1407 is just below the range of tinc MTUs over typical networks, + this fine-tuning allows tinc to cover a lot of ground very quickly. + This fine-tuning is only valid for maxmtu = MTU; if maxmtu is smaller, + then it's better to use a multiplier of 1. Indeed, this leads to an interesting scenario + if choose_initial_maxmtu() returns the actual MTU value - it will get confirmed with one single probe. */ + const float multiplier = (n->maxmtu == MTU) ? 0.97 : 1; + + const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1; + const length_t minmtu = MAX(n->minmtu, 512); + const float interval = n->maxmtu - minmtu; + + /* The core of the discovery algorithm is this exponential. + It produces very large probes early in the cycle, and then it very quickly decreases the probe size. + This reflects the fact that in the most difficult cases, we don't get any feedback for probes that + are too large, and therefore we need to concentrate on small offsets so that we can quickly converge + on the precise MTU as we are approaching it. + The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one + reply per cycle so that we can make progress. */ + const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1)); + + length_t maxmtu = n->maxmtu; + send_udp_probe_packet(n, minmtu + offset); + /* If maxmtu changed, it means the probe was rejected by the system because it was too large. + In that case, we recalculate with the new maxmtu and try again. */ + if(n->mtuprobes < 0 || maxmtu == n->maxmtu) + break; + } if(n->mtuprobes >= 0) n->mtuprobes++;