X-Git-Url: https://tinc-vpn.org/git/browse?p=tinc;a=blobdiff_plain;f=src%2Fnet_packet.c;h=96f8f10e7b645a5364687671b357146cfc7f6544;hp=2f15bb4ef245acdde94c5cd0c89812b02b3f4e6d;hb=f0afde0467443969eb408090d6b8ee542768ee33;hpb=24d28adf64934c8d726959e25dce8c10dbd10d1f diff --git a/src/net_packet.c b/src/net_packet.c index 2f15bb4e..96f8f10e 100644 --- a/src/net_packet.c +++ b/src/net_packet.c @@ -37,6 +37,8 @@ #include "digest.h" #include "device.h" #include "ethernet.h" +#include "ipv4.h" +#include "ipv6.h" #include "graph.h" #include "logger.h" #include "net.h" @@ -50,6 +52,11 @@ #define MAX(a, b) ((a) > (b) ? (a) : (b)) #endif +/* The minimum size of a probe is 14 bytes, but since we normally use CBC mode + encryption, we can add a few extra random bytes without increasing the + resulting packet size. */ +#define MIN_PROBE_SIZE 18 + int keylifetime = 0; #ifdef HAVE_LZO static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS]; @@ -60,7 +67,8 @@ static void send_udppacket(node_t *, vpn_packet_t *); unsigned replaywin = 16; bool localdiscovery = true; bool udp_discovery = true; -int udp_discovery_interval = 9; +int udp_discovery_keepalive_interval = 9; +int udp_discovery_interval = 2; int udp_discovery_timeout = 30; #define MAX_SEQNO 1073741824 @@ -69,7 +77,7 @@ static void try_fix_mtu(node_t *n) { if(n->mtuprobes < 0) return; - if(n->mtuprobes == 90 || n->minmtu >= n->maxmtu) { + if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) { if(n->minmtu > n->maxmtu) n->minmtu = n->maxmtu; else @@ -87,6 +95,7 @@ static void udp_probe_timeout_handler(void *data) { logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname); n->status.udp_confirmed = false; + n->maxrecentlen = 0; n->mtuprobes = 0; n->minmtu = 0; n->maxmtu = MTU; @@ -94,20 +103,22 @@ static void udp_probe_timeout_handler(void *data) { static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) { if(!DATA(packet)[0]) { - logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname); - /* It's a probe request, send back a reply */ + if(!n->status.sptps && !n->status.validkey) { + // But not if we don't have his key. + logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request from %s (%s) but we don't have his key yet", n->name, n->hostname); + return; + } + + logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname); + /* Type 2 probe replies were introduced in protocol 17.3 */ if ((n->options >> 24) >= 3) { uint8_t *data = DATA(packet); *data++ = 2; uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2; - struct timeval now; - gettimeofday(&now, NULL); - uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4; - uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4; - packet->len -= 10; + packet->len = MIN_PROBE_SIZE; } else { /* Legacy protocol: n won't understand type 2 probe replies. */ DATA(packet)[0] = 1; @@ -141,46 +152,24 @@ static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) { timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0}); } - if(probelen >= n->maxmtu + 8) { + if(probelen > n->maxmtu) { logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname); + n->minmtu = probelen; n->maxmtu = MTU; - n->mtuprobes = 30; + /* Set mtuprobes to 1 so that try_mtu() doesn't reset maxmtu */ + n->mtuprobes = 1; return; + } else if(n->mtuprobes < 0 && probelen == n->maxmtu) { + /* We got a maxmtu sized packet, confirming the PMTU is still valid. */ + n->mtuprobes = -1; } /* If applicable, raise the minimum supported MTU */ - if(probelen > n->maxmtu) - probelen = n->maxmtu; if(n->minmtu < probelen) { n->minmtu = probelen; try_fix_mtu(n); } - - /* Calculate RTT. - The RTT is the time between the MTU probe burst was sent and the first - reply is received. - */ - - struct timeval now, diff; - gettimeofday(&now, NULL); - timersub(&now, &n->probe_time, &diff); - - struct timeval probe_timestamp = now; - if (DATA(packet)[0] == 2 && packet->len >= 11) { - uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4); - uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4); - probe_timestamp.tv_sec = ntohl(sec); - probe_timestamp.tv_usec = ntohl(usec); - } - - n->probe_counter++; - - if(n->probe_counter == 1) { - n->rtt = diff.tv_sec + diff.tv_usec * 1e-6; - n->probe_time = probe_timestamp; - logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->packetloss * 1e2); - } } } @@ -298,7 +287,7 @@ static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) { #ifdef DISABLE_LEGACY return false; #else - if(!n->status.validkey) { + if(!n->status.validkey_in) { logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname); return false; } @@ -399,6 +388,9 @@ static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) { origlen -= MTU/64 + 20; } + if(inpkt->len > n->maxrecentlen) + n->maxrecentlen = inpkt->len; + inpkt->priority = 0; if(!DATA(inpkt)[12] && !DATA(inpkt)[13]) @@ -866,146 +858,279 @@ static void try_udp(node_t* n) { if(!udp_discovery) return; - struct timeval now; - gettimeofday(&now, NULL); struct timeval ping_tx_elapsed; timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed); - if(ping_tx_elapsed.tv_sec >= udp_discovery_interval) { - send_udp_probe_packet(n, MAX(n->minmtu, 16)); + int interval = n->status.udp_confirmed ? udp_discovery_keepalive_interval : udp_discovery_interval; + + if(ping_tx_elapsed.tv_sec >= interval) { + send_udp_probe_packet(n, MIN_PROBE_SIZE); n->udp_ping_sent = now; if(localdiscovery && !n->status.udp_confirmed && n->prevedge) { n->status.send_locally = true; - send_udp_probe_packet(n, 16); + send_udp_probe_packet(n, MIN_PROBE_SIZE); n->status.send_locally = false; } } } -// This function tries to determines the MTU of a node. -// By calling this function repeatedly, n->minmtu will be progressively increased, and at some point, n->mtu will be fixed to n->minmtu. -// If the MTU is already fixed, this function checks if it can be increased. +static length_t choose_initial_maxmtu(node_t *n) { +#ifdef IP_MTU + + int sock = -1; + + const sockaddr_t *sa = NULL; + int sockindex; + choose_udp_address(n, &sa, &sockindex); + if(!sa) + return MTU; + + sock = socket(sa->sa.sa_family, SOCK_DGRAM, IPPROTO_UDP); + if(sock < 0) { + logger(DEBUG_TRAFFIC, LOG_ERR, "Creating MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno)); + return MTU; + } + + if(connect(sock, &sa->sa, SALEN(sa->sa))) { + logger(DEBUG_TRAFFIC, LOG_ERR, "Connecting MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno)); + close(sock); + return MTU; + } + + int ip_mtu; + socklen_t ip_mtu_len = sizeof ip_mtu; + if(getsockopt(sock, IPPROTO_IP, IP_MTU, &ip_mtu, &ip_mtu_len)) { + logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno)); + close(sock); + return MTU; + } + + close(sock); + + /* getsockopt(IP_MTU) returns the MTU of the physical interface. + We need to remove various overheads to get to the tinc MTU. */ + length_t mtu = ip_mtu; + mtu -= (sa->sa.sa_family == AF_INET6) ? sizeof(struct ip6_hdr) : sizeof(struct ip); + mtu -= 8; /* UDP */ + if(n->status.sptps) { + mtu -= SPTPS_DATAGRAM_OVERHEAD; + if((n->options >> 24) >= 4) + mtu -= sizeof(node_id_t) + sizeof(node_id_t); + } else { + mtu -= digest_length(n->outdigest); + + /* Now it's tricky. We use CBC mode, so the length of the + encrypted payload must be a multiple of the blocksize. The + sequence number is also part of the encrypted payload, so we + must account for it after correcting for the blocksize. + Furthermore, the padding in the last block must be at least + 1 byte. */ + + length_t blocksize = cipher_blocksize(n->outcipher); + + if(blocksize > 1) { + mtu /= blocksize; + mtu *= blocksize; + mtu--; + } + + mtu -= 4; // seqno + } + + if (mtu < 512) { + logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) returned absurdly small value: %d", n->name, n->hostname, ip_mtu); + return MTU; + } + if (mtu > MTU) + return MTU; + + logger(DEBUG_TRAFFIC, LOG_INFO, "Using system-provided maximum tinc MTU for %s (%s): %hd", n->name, n->hostname, mtu); + return mtu; + +#else + + return MTU; + +#endif +} + +/* This function tries to determines the MTU of a node. + By calling this function repeatedly, n->minmtu will be progressively + increased, and at some point, n->mtu will be fixed to n->minmtu. If the MTU + is already fixed, this function checks if it can be increased. +*/ + static void try_mtu(node_t *n) { if(!(n->options & OPTION_PMTU_DISCOVERY)) return; if(udp_discovery && !n->status.udp_confirmed) { + n->maxrecentlen = 0; n->mtuprobes = 0; n->minmtu = 0; n->maxmtu = MTU; return; } - /* mtuprobes == 0..89: initial discovery, send bursts with 1 second interval, mtuprobes++ - mtuprobes == 90: fix MTU, and go to -1 - mtuprobes == -1: send one >maxmtu probe every pingtimeout */ + /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++ + mtuprobes == 20: fix MTU, and go to -1 + mtuprobes == -1: send one maxmtu and one maxmtu+1 probe every pinginterval + mtuprobes ==-2..-3: send one maxmtu probe every second + mtuprobes == -4: maxmtu no longer valid, reset minmtu and maxmtu and go to 0 */ - struct timeval now; - gettimeofday(&now, NULL); struct timeval elapsed; - timersub(&now, &n->probe_sent_time, &elapsed); + timersub(&now, &n->mtu_ping_sent, &elapsed); if(n->mtuprobes >= 0) { if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333) return; } else { - if(elapsed.tv_sec < pingtimeout) - return; + if(n->mtuprobes < -1) { + if(elapsed.tv_sec < 1) + return; + } else { + if(elapsed.tv_sec < pinginterval) + return; + } } + n->mtu_ping_sent = now; + try_fix_mtu(n); - int timeout; + if(n->mtuprobes < -3) { + /* We lost three MTU probes, restart discovery */ + logger(DEBUG_TRAFFIC, LOG_INFO, "Decrease in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname); + n->mtuprobes = 0; + n->minmtu = 0; + } + if(n->mtuprobes < 0) { - /* After the initial discovery, we only send one >maxmtu probe - to detect PMTU increases. */ - if(n->maxmtu + 8 < MTU) - send_udp_probe_packet(n, n->maxmtu + 8); + /* After the initial discovery, we only send one maxmtu and one + maxmtu+1 probe to detect PMTU increases. */ + send_udp_probe_packet(n, n->maxmtu); + if(n->mtuprobes == -1 && n->maxmtu + 1 < MTU) + send_udp_probe_packet(n, n->maxmtu + 1); + n->mtuprobes--; } else { - /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets, - but it will typically increase convergence time in the no-loss case. */ - const length_t probes_per_cycle = 8; - - /* This magic value was determined using math simulations. - It will result in a 1339-byte first probe, followed (if there was a reply) by a 1417-byte probe. - Since 1417 is just below the range of tinc MTUs over typical networks, - this fine-tuning allows tinc to cover a lot of ground very quickly. */ - const float multiplier = 0.982; - - const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1; - const length_t minmtu = MAX(n->minmtu, 64); - const float interval = n->maxmtu - minmtu; - - /* The core of the discovery algorithm is this exponential. - It produces very large probes early in the cycle, and then it very quickly decreases the probe size. - This reflects the fact that in the most difficult cases, we don't get any feedback for probes that - are too large, and therefore we need to concentrate on small offsets so that we can quickly converge - on the precise MTU as we are approaching it. - The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one - reply per cycle so that we can make progress. */ - const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1)); - - send_udp_probe_packet(n, minmtu + offset); + /* Before initial discovery begins, set maxmtu to the most likely value. + If it's underestimated, we will correct it after initial discovery. */ + if(n->mtuprobes == 0) + n->maxmtu = choose_initial_maxmtu(n); + + for (;;) { + /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets, + but it will typically increase convergence time in the no-loss case. */ + const length_t probes_per_cycle = 8; + + /* This magic value was determined using math simulations. + It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe. + Since 1407 is just below the range of tinc MTUs over typical networks, + this fine-tuning allows tinc to cover a lot of ground very quickly. + This fine-tuning is only valid for maxmtu = MTU; if maxmtu is smaller, + then it's better to use a multiplier of 1. Indeed, this leads to an interesting scenario + if choose_initial_maxmtu() returns the actual MTU value - it will get confirmed with one single probe. */ + const float multiplier = (n->maxmtu == MTU) ? 0.97 : 1; + + const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1; + const length_t minmtu = MAX(n->minmtu, 512); + const float interval = n->maxmtu - minmtu; + + /* The core of the discovery algorithm is this exponential. + It produces very large probes early in the cycle, and then it very quickly decreases the probe size. + This reflects the fact that in the most difficult cases, we don't get any feedback for probes that + are too large, and therefore we need to concentrate on small offsets so that we can quickly converge + on the precise MTU as we are approaching it. + The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one + reply per cycle so that we can make progress. */ + const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1)); + + length_t maxmtu = n->maxmtu; + send_udp_probe_packet(n, minmtu + offset); + /* If maxmtu changed, it means the probe was rejected by the system because it was too large. + In that case, we recalculate with the new maxmtu and try again. */ + if(n->mtuprobes < 0 || maxmtu == n->maxmtu) + break; + } + if(n->mtuprobes >= 0) n->mtuprobes++; } +} - n->probe_counter = 0; - n->probe_sent_time = now; - n->probe_time = now; +/* These functions try to establish a tunnel to a node (or its relay) so that + packets can be sent (e.g. exchange keys). + If a tunnel is already established, it tries to improve it (e.g. by trying + to establish a UDP tunnel instead of TCP). This function makes no + guarantees - it is up to the caller to check the node's state to figure out + if TCP and/or UDP is usable. By calling this function repeatedly, the + tunnel is gradually improved until we hit the wall imposed by the underlying + network environment. It is recommended to call this function every time a + packet is sent (or intended to be sent) to a node, so that the tunnel keeps + improving as packets flow, and then gracefully downgrades itself as it goes + idle. +*/ - /* Calculate the packet loss of incoming traffic by comparing the rate of - packets received to the rate with which the sequence number has increased. - TODO: this is unrelated to PMTU discovery - it should be moved elsewhere. - */ +static void try_tx_sptps(node_t *n, bool mtu) { + /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET + messages anyway, so there's no need for SPTPS at all. */ - if(n->received > n->prev_received) - n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno); - else - n->packetloss = n->received_seqno <= n->prev_received_seqno; + if(n->connection && ((myself->options | n->options) & OPTION_TCPONLY)) + return; - n->prev_received_seqno = n->received_seqno; - n->prev_received = n->received; -} + /* Otherwise, try to do SPTPS authentication with n if necessary. */ -// This function tries to establish a tunnel to a node (or its relay) so that packets can be sent (e.g. get SPTPS keys). -// If a tunnel is already established, it tries to improve it (e.g. by trying to establish a UDP tunnel instead of TCP). -// This function makes no guarantees - it is up to the caller to check the node's state to figure out if TCP and/or UDP is usable. -// By calling this function repeatedly, the tunnel is gradually improved until we hit the wall imposed by the underlying network environment. -// It is recommended to call this function every time a packet is sent (or intended to be sent) to a node, -// so that the tunnel keeps improving as packets flow, and then gracefully downgrades itself as it goes idle. -static void try_tx(node_t *n) { - /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET - messages anyway, so there's no need for SPTPS at all. Otherwise, get the keys. */ - if(n->status.sptps && !(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))) { - try_sptps(n); - if (!n->status.validkey) - return; - } + try_sptps(n); + + /* Do we need to relay packets? */ node_t *via = (n->via == myself) ? n->nexthop : n->via; - - if((myself->options | via->options) & OPTION_TCPONLY) + + /* If the relay doesn't support SPTPS, everything goes via TCP anyway. */ + + if((via->options >> 24) < 4) return; - if(!n->status.sptps && !via->status.validkey && via->last_req_key + 10 <= now.tv_sec) { - send_req_key(via); - via->last_req_key = now.tv_sec; - } else if(via == n || !n->status.sptps || (via->options >> 24) >= 4) { - try_udp(via); - try_mtu(via); + /* If we do have a relay, try everything with that one instead. */ + + if(via != n) + return try_tx_sptps(via, mtu); + + try_udp(n); + if(mtu) + try_mtu(n); +} + +static void try_tx_legacy(node_t *n, bool mtu) { + /* Does he have our key? If not, send one. */ + + if(!n->status.validkey_in) + send_ans_key(n); + + /* Check if we already have a key, or request one. */ + + if(!n->status.validkey) { + if(n->last_req_key + 10 <= now.tv_sec) { + send_req_key(n); + n->last_req_key = now.tv_sec; + } + return; } - /* If we don't know how to reach "via" yet, then try to reach it through a relay. */ - if(n->status.sptps && !via->status.udp_confirmed && via->nexthop != via && (via->nexthop->options >> 24) >= 4) - try_tx(via->nexthop); + try_udp(n); + if(mtu) + try_mtu(n); +} + +void try_tx(node_t *n, bool mtu) { + if(n->status.sptps) + try_tx_sptps(n, mtu); + else + try_tx_legacy(n, mtu); } -/* - send a packet to the given vpn ip. -*/ void send_packet(node_t *n, vpn_packet_t *packet) { - node_t *via; + // If it's for myself, write it to the tun/tap device. if(n == myself) { if(overwrite_mac) @@ -1016,44 +1141,47 @@ void send_packet(node_t *n, vpn_packet_t *packet) { return; } - logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)", - packet->len, n->name, n->hostname); + logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)", packet->len, n->name, n->hostname); + + // If the node is not reachable, drop it. if(!n->status.reachable) { - logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable", - n->name, n->hostname); + logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable", n->name, n->hostname); return; } + // Keep track of packet statistics. + n->out_packets++; n->out_bytes += packet->len; + // Check if it should be sent as an SPTPS packet. + if(n->status.sptps) { send_sptps_packet(n, packet); - goto end; + try_tx_sptps(n, true); + return; } - via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via; + // Determine which node to actually send it to. + + node_t *via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via; if(via != n) - logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)", - n->name, via->name, n->via->hostname); + logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)", n->name, via->name, n->via->hostname); + + // Try to send via UDP, unless TCP is forced. if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) { if(!send_tcppacket(via->connection, packet)) terminate_connection(via->connection, true); - } else - send_udppacket(via, packet); + return; + } -end: - /* Try to improve the tunnel. - Note that we do this *after* we send the packet because sending actual packets take priority - with regard to the send buffer space and latency. */ - try_tx(n); + send_udppacket(via, packet); + try_tx_legacy(via, true); } -/* Broadcast a packet using the minimum spanning tree */ - void broadcast_packet(const node_t *from, vpn_packet_t *packet) { // Always give ourself a copy of the packet. if(from != myself)