X-Git-Url: https://tinc-vpn.org/git/browse?a=blobdiff_plain;f=src%2Fnet_packet.c;h=f219287a07d7534028d1c83c8e7cb0a50d7215c4;hb=c26bb47af130d48d003afd29af4d7ea6ad0538c5;hp=3078e54a2c91451aba2569fbcaab7a7546e389b9;hpb=5bdc1f2b82869d379812879334dbf2b549ff48db;p=tinc diff --git a/src/net_packet.c b/src/net_packet.c index 3078e54a..f219287a 100644 --- a/src/net_packet.c +++ b/src/net_packet.c @@ -37,6 +37,8 @@ #include "digest.h" #include "device.h" #include "ethernet.h" +#include "ipv4.h" +#include "ipv6.h" #include "graph.h" #include "logger.h" #include "net.h" @@ -60,7 +62,8 @@ static void send_udppacket(node_t *, vpn_packet_t *); unsigned replaywin = 16; bool localdiscovery = true; bool udp_discovery = true; -int udp_discovery_interval = 9; +int udp_discovery_keepalive_interval = 9; +int udp_discovery_interval = 2; int udp_discovery_timeout = 30; #define MAX_SEQNO 1073741824 @@ -69,7 +72,7 @@ static void try_fix_mtu(node_t *n) { if(n->mtuprobes < 0) return; - if(n->mtuprobes == 30 || n->minmtu >= n->maxmtu) { + if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) { if(n->minmtu > n->maxmtu) n->minmtu = n->maxmtu; else @@ -107,7 +110,7 @@ static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) { gettimeofday(&now, NULL); uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4; uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4; - packet->len -= 10; + packet->len = 14; // Minimum size for any probe packet. } else { /* Legacy protocol: n won't understand type 2 probe replies. */ DATA(packet)[0] = 1; @@ -141,10 +144,11 @@ static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) { timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0}); } - if(probelen >= n->maxmtu + 8) { + if(probelen >= n->maxmtu + 1) { logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname); n->maxmtu = MTU; - n->mtuprobes = 10; + /* Set mtuprobes to 1 so that try_mtu() doesn't reset maxmtu */ + n->mtuprobes = 1; return; } @@ -157,10 +161,9 @@ static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) { try_fix_mtu(n); } - /* Calculate RTT and bandwidth. + /* Calculate RTT. The RTT is the time between the MTU probe burst was sent and the first - reply is received. The bandwidth is measured using the time between the - arrival of the first and third probe reply (or type 2 probe requests). + reply is received. */ struct timeval now, diff; @@ -180,12 +183,7 @@ static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) { if(n->probe_counter == 1) { n->rtt = diff.tv_sec + diff.tv_usec * 1e-6; n->probe_time = probe_timestamp; - } else if(n->probe_counter == 3) { - /* TODO: this will never fire after initial MTU discovery. */ - struct timeval probe_timestamp_diff; - timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff); - n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6); - logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2); + logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->packetloss * 1e2); } } } @@ -304,7 +302,7 @@ static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) { #ifdef DISABLE_LEGACY return false; #else - if(!n->status.validkey) { + if(!n->status.validkey_in) { logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname); return false; } @@ -872,12 +870,12 @@ static void try_udp(node_t* n) { if(!udp_discovery) return; - struct timeval now; - gettimeofday(&now, NULL); struct timeval ping_tx_elapsed; timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed); - if(ping_tx_elapsed.tv_sec >= udp_discovery_interval) { + int interval = n->status.udp_confirmed ? udp_discovery_keepalive_interval : udp_discovery_interval; + + if(ping_tx_elapsed.tv_sec >= interval) { send_udp_probe_packet(n, MAX(n->minmtu, 16)); n->udp_ping_sent = now; @@ -889,9 +887,92 @@ static void try_udp(node_t* n) { } } -// This function tries to determines the MTU of a node. -// By calling this function repeatedly, n->minmtu will be progressively increased, and at some point, n->mtu will be fixed to n->minmtu. -// If the MTU is already fixed, this function checks if it can be increased. +static length_t choose_initial_maxmtu(node_t *n) { +#ifdef IP_MTU + + int sock = -1; + + const sockaddr_t *sa = NULL; + int sockindex; + choose_udp_address(n, &sa, &sockindex); + if(!sa) + return MTU; + + sock = socket(sa->sa.sa_family, SOCK_DGRAM, IPPROTO_UDP); + if(sock < 0) { + logger(DEBUG_TRAFFIC, LOG_ERR, "Creating MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno)); + return MTU; + } + + if(connect(sock, &sa->sa, SALEN(sa->sa))) { + logger(DEBUG_TRAFFIC, LOG_ERR, "Connecting MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno)); + close(sock); + return MTU; + } + + int ip_mtu; + socklen_t ip_mtu_len = sizeof ip_mtu; + if(getsockopt(sock, IPPROTO_IP, IP_MTU, &ip_mtu, &ip_mtu_len)) { + logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno)); + close(sock); + return MTU; + } + + close(sock); + + /* getsockopt(IP_MTU) returns the MTU of the physical interface. + We need to remove various overheads to get to the tinc MTU. */ + length_t mtu = ip_mtu; + mtu -= (sa->sa.sa_family == AF_INET6) ? sizeof(struct ip6_hdr) : sizeof(struct ip); + mtu -= 8; /* UDP */ + if(n->status.sptps) { + mtu -= SPTPS_DATAGRAM_OVERHEAD; + if((n->options >> 24) >= 4) + mtu -= sizeof(node_id_t) + sizeof(node_id_t); + } else { + mtu -= digest_length(n->outdigest); + + /* Now it's tricky. We use CBC mode, so the length of the + encrypted payload must be a multiple of the blocksize. The + sequence number is also part of the encrypted payload, so we + must account for it after correcting for the blocksize. + Furthermore, the padding in the last block must be at least + 1 byte. */ + + length_t blocksize = cipher_blocksize(n->outcipher); + + if(blocksize > 1) { + mtu /= blocksize; + mtu *= blocksize; + mtu--; + } + + mtu -= 4; // seqno + } + + if (mtu < 512) { + logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) returned absurdly small value: %d", n->name, n->hostname, ip_mtu); + return MTU; + } + if (mtu > MTU) + return MTU; + + logger(DEBUG_TRAFFIC, LOG_INFO, "Using system-provided maximum tinc MTU for %s (%s): %hd", n->name, n->hostname, mtu); + return mtu; + +#else + + return MTU; + +#endif +} + +/* This function tries to determines the MTU of a node. + By calling this function repeatedly, n->minmtu will be progressively + increased, and at some point, n->mtu will be fixed to n->minmtu. If the MTU + is already fixed, this function checks if it can be increased. +*/ + static void try_mtu(node_t *n) { if(!(n->options & OPTION_PMTU_DISCOVERY)) return; @@ -903,16 +984,14 @@ static void try_mtu(node_t *n) { return; } - /* mtuprobes == 0..29: initial discovery, send bursts with 1 second interval, mtuprobes++ - mtuprobes == 30: fix MTU, and go to -1 + /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++ + mtuprobes == 20: fix MTU, and go to -1 mtuprobes == -1: send one >maxmtu probe every pingtimeout */ - struct timeval now; - gettimeofday(&now, NULL); struct timeval elapsed; timersub(&now, &n->probe_sent_time, &elapsed); if(n->mtuprobes >= 0) { - if(n->mtuprobes != 0 && elapsed.tv_sec < 1) + if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333) return; } else { if(elapsed.tv_sec < pingtimeout) @@ -921,21 +1000,50 @@ static void try_mtu(node_t *n) { try_fix_mtu(n); - int timeout; if(n->mtuprobes < 0) { /* After the initial discovery, we only send one >maxmtu probe to detect PMTU increases. */ - if(n->maxmtu + 8 < MTU) - send_udp_probe_packet(n, n->maxmtu + 8); + if(n->maxmtu + 1 < MTU) + send_udp_probe_packet(n, n->maxmtu + 1); } else { - /* Probes are sent in batches of three, with random sizes between the - lower and upper boundaries for the MTU thus far discovered. */ - for (int i = 0; i < 3; i++) { - int len = n->maxmtu; - if(n->minmtu < n->maxmtu) - len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu); - - send_udp_probe_packet(n, MAX(len, 64)); + /* Before initial discovery begins, set maxmtu to the most likely value. + If it's underestimated, we will correct it after initial discovery. */ + if(n->mtuprobes == 0) + n->maxmtu = choose_initial_maxmtu(n); + + for (;;) { + /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets, + but it will typically increase convergence time in the no-loss case. */ + const length_t probes_per_cycle = 8; + + /* This magic value was determined using math simulations. + It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe. + Since 1407 is just below the range of tinc MTUs over typical networks, + this fine-tuning allows tinc to cover a lot of ground very quickly. + This fine-tuning is only valid for maxmtu = MTU; if maxmtu is smaller, + then it's better to use a multiplier of 1. Indeed, this leads to an interesting scenario + if choose_initial_maxmtu() returns the actual MTU value - it will get confirmed with one single probe. */ + const float multiplier = (n->maxmtu == MTU) ? 0.97 : 1; + + const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1; + const length_t minmtu = MAX(n->minmtu, 512); + const float interval = n->maxmtu - minmtu; + + /* The core of the discovery algorithm is this exponential. + It produces very large probes early in the cycle, and then it very quickly decreases the probe size. + This reflects the fact that in the most difficult cases, we don't get any feedback for probes that + are too large, and therefore we need to concentrate on small offsets so that we can quickly converge + on the precise MTU as we are approaching it. + The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one + reply per cycle so that we can make progress. */ + const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1)); + + length_t maxmtu = n->maxmtu; + send_udp_probe_packet(n, minmtu + offset); + /* If maxmtu changed, it means the probe was rejected by the system because it was too large. + In that case, we recalculate with the new maxmtu and try again. */ + if(n->mtuprobes < 0 || maxmtu == n->maxmtu) + break; } if(n->mtuprobes >= 0) @@ -960,44 +1068,65 @@ static void try_mtu(node_t *n) { n->prev_received = n->received; } -// This function tries to establish a tunnel to a node (or its relay) so that packets can be sent (e.g. get SPTPS keys). -// If a tunnel is already established, it tries to improve it (e.g. by trying to establish a UDP tunnel instead of TCP). -// This function makes no guarantees - it is up to the caller to check the node's state to figure out if TCP and/or UDP is usable. -// By calling this function repeatedly, the tunnel is gradually improved until we hit the wall imposed by the underlying network environment. -// It is recommended to call this function every time a packet is sent (or intended to be sent) to a node, -// so that the tunnel keeps improving as packets flow, and then gracefully downgrades itself as it goes idle. -static void try_tx(node_t *n) { +/* These functions try to establish a tunnel to a node (or its relay) so that + packets can be sent (e.g. exchange keys). + If a tunnel is already established, it tries to improve it (e.g. by trying + to establish a UDP tunnel instead of TCP). This function makes no + guarantees - it is up to the caller to check the node's state to figure out + if TCP and/or UDP is usable. By calling this function repeatedly, the + tunnel is gradually improved until we hit the wall imposed by the underlying + network environment. It is recommended to call this function every time a + packet is sent (or intended to be sent) to a node, so that the tunnel keeps + improving as packets flow, and then gracefully downgrades itself as it goes + idle. +*/ + +static void try_tx_sptps(node_t *n) { /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET - messages anyway, so there's no need for SPTPS at all. Otherwise, get the keys. */ - if(n->status.sptps && !(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))) { - try_sptps(n); - if (!n->status.validkey) - return; - } + messages anyway, so there's no need for SPTPS at all. */ + + if(n->connection && ((myself->options | n->options) & OPTION_TCPONLY)) + return; + + /* Otherwise, try to do SPTPS authentication with n if necessary. */ + + try_sptps(n); + + /* Do we need to relay packets? */ node_t *via = (n->via == myself) ? n->nexthop : n->via; - - if((myself->options | via->options) & OPTION_TCPONLY) + + /* If the relay doesn't support SPTPS, everything goes via TCP anyway. */ + + if((via->options >> 24) < 4) return; - if(!n->status.sptps && !via->status.validkey && via->last_req_key + 10 <= now.tv_sec) { - send_req_key(via); - via->last_req_key = now.tv_sec; - } else if(via == n || !n->status.sptps || (via->options >> 24) >= 4) { - try_udp(via); - try_mtu(via); + /* If we do have a relay, try everything with that one instead. */ + + if(via != n) + return try_tx_sptps(via); + + try_udp(n); + try_mtu(n); +} + +static void try_tx_legacy(node_t *n) { + /* Check if we already have a key, or request one. */ + + if(!n->status.validkey) { + if(n->last_req_key + 10 <= now.tv_sec) { + send_req_key(n); + n->last_req_key = now.tv_sec; + } + return; } - /* If we don't know how to reach "via" yet, then try to reach it through a relay. */ - if(n->status.sptps && !via->status.udp_confirmed && via->nexthop != via && (via->nexthop->options >> 24) >= 4) - try_tx(via->nexthop); + try_udp(n); + try_mtu(n); } -/* - send a packet to the given vpn ip. -*/ void send_packet(node_t *n, vpn_packet_t *packet) { - node_t *via; + // If it's for myself, write it to the tun/tap device. if(n == myself) { if(overwrite_mac) @@ -1008,44 +1137,47 @@ void send_packet(node_t *n, vpn_packet_t *packet) { return; } - logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)", - packet->len, n->name, n->hostname); + logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)", packet->len, n->name, n->hostname); + + // If the node is not reachable, drop it. if(!n->status.reachable) { - logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable", - n->name, n->hostname); + logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable", n->name, n->hostname); return; } + // Keep track of packet statistics. + n->out_packets++; n->out_bytes += packet->len; + // Check if it should be sent as an SPTPS packet. + if(n->status.sptps) { send_sptps_packet(n, packet); - goto end; + try_tx_sptps(n); + return; } - via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via; + // Determine which node to actually send it to. + + node_t *via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via; if(via != n) - logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)", - n->name, via->name, n->via->hostname); + logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)", n->name, via->name, n->via->hostname); + + // Try to send via UDP, unless TCP is forced. if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) { if(!send_tcppacket(via->connection, packet)) terminate_connection(via->connection, true); - } else - send_udppacket(via, packet); + return; + } -end: - /* Try to improve the tunnel. - Note that we do this *after* we send the packet because sending actual packets take priority - with regard to the send buffer space and latency. */ - try_tx(n); + send_udppacket(via, packet); + try_tx_legacy(via); } -/* Broadcast a packet using the minimum spanning tree */ - void broadcast_packet(const node_t *from, vpn_packet_t *packet) { // Always give ourself a copy of the packet. if(from != myself)