2 net_packet.c -- Handles in- and outgoing VPN packets
3 Copyright (C) 1998-2005 Ivo Timmermans,
4 2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5 2010 Timothy Redaelli <timothy@redaelli.eu>
6 2010 Brandon Black <blblack@gmail.com>
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
35 #include "connection.h"
52 #define MAX(a, b) ((a) > (b) ? (a) : (b))
57 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
60 static void send_udppacket(node_t *, vpn_packet_t *);
62 unsigned replaywin = 16;
63 bool localdiscovery = true;
64 bool udp_discovery = true;
65 int udp_discovery_keepalive_interval = 9;
66 int udp_discovery_interval = 2;
67 int udp_discovery_timeout = 30;
69 #define MAX_SEQNO 1073741824
71 static void try_fix_mtu(node_t *n) {
75 if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) {
76 if(n->minmtu > n->maxmtu)
77 n->minmtu = n->maxmtu;
79 n->maxmtu = n->minmtu;
81 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
86 static void udp_probe_timeout_handler(void *data) {
88 if(!n->status.udp_confirmed)
91 logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname);
92 n->status.udp_confirmed = false;
98 static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
99 if(!DATA(packet)[0]) {
100 /* It's a probe request, send back a reply */
102 if(!n->status.sptps && !n->status.validkey) {
103 // But not if we don't have his key.
104 logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request from %s (%s) but we don't have his key yet", n->name, n->hostname);
108 logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname);
110 /* Type 2 probe replies were introduced in protocol 17.3 */
111 if ((n->options >> 24) >= 3) {
112 uint8_t *data = DATA(packet);
114 uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
116 gettimeofday(&now, NULL);
117 uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
118 uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
119 packet->len = 14; // Minimum size for any probe packet.
121 /* Legacy protocol: n won't understand type 2 probe replies. */
125 /* Temporarily set udp_confirmed, so that the reply is sent
126 back exactly the way it came in. */
128 bool udp_confirmed = n->status.udp_confirmed;
129 n->status.udp_confirmed = true;
130 send_udppacket(n, packet);
131 n->status.udp_confirmed = udp_confirmed;
133 length_t probelen = len;
134 if (DATA(packet)[0] == 2) {
136 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) UDP probe reply from %s (%s)", n->name, n->hostname);
138 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
141 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d UDP probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
143 /* It's a valid reply: now we know bidirectional communication
144 is possible using the address and socket that the reply
146 n->status.udp_confirmed = true;
149 timeout_del(&n->udp_ping_timeout);
150 timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
153 if(probelen >= n->maxmtu + 1) {
154 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
156 /* Set mtuprobes to 1 so that try_mtu() doesn't reset maxmtu */
161 /* If applicable, raise the minimum supported MTU */
163 if(probelen > n->maxmtu)
164 probelen = n->maxmtu;
165 if(n->minmtu < probelen) {
166 n->minmtu = probelen;
171 The RTT is the time between the MTU probe burst was sent and the first
175 struct timeval now, diff;
176 gettimeofday(&now, NULL);
177 timersub(&now, &n->probe_time, &diff);
179 struct timeval probe_timestamp = now;
180 if (DATA(packet)[0] == 2 && packet->len >= 11) {
181 uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
182 uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
183 probe_timestamp.tv_sec = ntohl(sec);
184 probe_timestamp.tv_usec = ntohl(usec);
189 if(n->probe_counter == 1) {
190 n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
191 n->probe_time = probe_timestamp;
192 logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->packetloss * 1e2);
197 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
199 memcpy(dest, source, len);
201 } else if(level == 10) {
203 lzo_uint lzolen = MAXSIZE;
204 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
209 } else if(level < 10) {
211 unsigned long destlen = MAXSIZE;
212 if(compress2(dest, &destlen, source, len, level) == Z_OK)
219 lzo_uint lzolen = MAXSIZE;
220 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
230 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
232 memcpy(dest, source, len);
234 } else if(level > 9) {
236 lzo_uint lzolen = MAXSIZE;
237 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
245 unsigned long destlen = MAXSIZE;
246 if(uncompress(dest, &destlen, source, len) == Z_OK)
258 static void receive_packet(node_t *n, vpn_packet_t *packet) {
259 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
260 packet->len, n->name, n->hostname);
263 n->in_bytes += packet->len;
268 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
270 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
272 #ifdef DISABLE_LEGACY
275 if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
278 return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
282 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
283 vpn_packet_t pkt1, pkt2;
284 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
287 pkt1.offset = DEFAULT_PACKET_OFFSET;
288 pkt2.offset = DEFAULT_PACKET_OFFSET;
290 if(n->status.sptps) {
291 if(!n->sptps.state) {
292 if(!n->status.waitingforkey) {
293 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
296 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
300 inpkt->offset += 2 * sizeof(node_id_t);
301 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
302 logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
308 #ifdef DISABLE_LEGACY
311 if(!n->status.validkey_in) {
312 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
316 /* Check packet length */
318 if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
319 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
320 n->name, n->hostname);
324 /* It's a legacy UDP packet, the data starts after the seqno */
326 inpkt->offset += sizeof(seqno_t);
328 /* Check the message authentication code */
330 if(digest_active(n->indigest)) {
331 inpkt->len -= digest_length(n->indigest);
332 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
333 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
337 /* Decrypt the packet */
339 if(cipher_active(n->incipher)) {
340 vpn_packet_t *outpkt = pkt[nextpkt++];
343 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
344 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
348 outpkt->len = outlen;
352 /* Check the sequence number */
355 memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
356 seqno = ntohl(seqno);
357 inpkt->len -= sizeof seqno;
360 if(seqno != n->received_seqno + 1) {
361 if(seqno >= n->received_seqno + replaywin * 8) {
362 if(n->farfuture++ < replaywin >> 2) {
363 logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
364 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
367 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
368 seqno - n->received_seqno - 1, n->name, n->hostname);
369 memset(n->late, 0, replaywin);
370 } else if (seqno <= n->received_seqno) {
371 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
372 logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
373 n->name, n->hostname, seqno, n->received_seqno);
377 for(int i = n->received_seqno + 1; i < seqno; i++)
378 n->late[(i / 8) % replaywin] |= 1 << i % 8;
383 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
386 if(seqno > n->received_seqno)
387 n->received_seqno = seqno;
391 if(n->received_seqno > MAX_SEQNO)
394 /* Decompress the packet */
396 length_t origlen = inpkt->len;
398 if(n->incompression) {
399 vpn_packet_t *outpkt = pkt[nextpkt++];
401 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
402 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
403 n->name, n->hostname);
409 origlen -= MTU/64 + 20;
414 if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
415 udp_probe_h(n, inpkt, origlen);
417 receive_packet(n, inpkt);
422 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
424 outpkt.offset = DEFAULT_PACKET_OFFSET;
426 if(len > sizeof outpkt.data - outpkt.offset)
430 if(c->options & OPTION_TCPONLY)
433 outpkt.priority = -1;
434 memcpy(DATA(&outpkt), buffer, len);
436 receive_packet(c->node, &outpkt);
439 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
440 if(!n->status.validkey && !n->connection)
446 if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
447 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
451 if(routing_mode == RMODE_ROUTER)
456 if(origpkt->len < offset)
461 if(n->outcompression) {
463 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
465 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
466 } else if(len < origpkt->len - offset) {
467 outpkt.len = len + offset;
469 type |= PKT_COMPRESSED;
473 /* If we have a direct metaconnection to n, and we can't use UDP, then
474 don't bother with SPTPS and just use a "plaintext" PACKET message.
475 We don't really care about end-to-end security since we're not
476 sending the message through any intermediate nodes. */
477 if(n->connection && origpkt->len > n->minmtu)
478 send_tcppacket(n->connection, origpkt);
480 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
484 static void adapt_socket(const sockaddr_t *sa, int *sock) {
485 /* Make sure we have a suitable socket for the chosen address */
486 if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
487 for(int i = 0; i < listen_sockets; i++) {
488 if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
496 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
501 /* If the UDP address is confirmed, use it. */
502 if(n->status.udp_confirmed)
505 /* Send every third packet to n->address; that could be set
506 to the node's reflexive UDP address discovered during key
515 /* Otherwise, address are found in edges to this node.
516 So we pick a random edge and a random socket. */
519 int j = rand() % n->edge_tree->count;
520 edge_t *candidate = NULL;
522 for splay_each(edge_t, e, n->edge_tree) {
524 candidate = e->reverse;
530 *sa = &candidate->address;
531 *sock = rand() % listen_sockets;
534 adapt_socket(*sa, sock);
537 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
540 /* Pick one of the edges from this node at random, then use its local address. */
543 int j = rand() % n->edge_tree->count;
544 edge_t *candidate = NULL;
546 for splay_each(edge_t, e, n->edge_tree) {
553 if (candidate && candidate->local_address.sa.sa_family) {
554 *sa = &candidate->local_address;
555 *sock = rand() % listen_sockets;
556 adapt_socket(*sa, sock);
560 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
561 vpn_packet_t pkt1, pkt2;
562 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
563 vpn_packet_t *inpkt = origpkt;
565 vpn_packet_t *outpkt;
566 int origlen = origpkt->len;
568 #if defined(SOL_IP) && defined(IP_TOS)
569 static int priority = 0;
570 int origpriority = origpkt->priority;
573 pkt1.offset = DEFAULT_PACKET_OFFSET;
574 pkt2.offset = DEFAULT_PACKET_OFFSET;
576 if(!n->status.reachable) {
577 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
582 return send_sptps_packet(n, origpkt);
584 #ifdef DISABLE_LEGACY
587 /* Make sure we have a valid key */
589 if(!n->status.validkey) {
590 logger(DEBUG_TRAFFIC, LOG_INFO,
591 "No valid key known yet for %s (%s), forwarding via TCP",
592 n->name, n->hostname);
593 send_tcppacket(n->nexthop->connection, origpkt);
597 if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
598 logger(DEBUG_TRAFFIC, LOG_INFO,
599 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
600 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
603 send_packet(n->nexthop, origpkt);
605 send_tcppacket(n->nexthop->connection, origpkt);
610 /* Compress the packet */
612 if(n->outcompression) {
613 outpkt = pkt[nextpkt++];
615 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
616 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
617 n->name, n->hostname);
624 /* Add sequence number */
626 seqno_t seqno = htonl(++(n->sent_seqno));
627 memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
628 inpkt->len += sizeof seqno;
630 /* Encrypt the packet */
632 if(cipher_active(n->outcipher)) {
633 outpkt = pkt[nextpkt++];
636 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
637 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
641 outpkt->len = outlen;
645 /* Add the message authentication code */
647 if(digest_active(n->outdigest)) {
648 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
649 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
653 inpkt->len += digest_length(n->outdigest);
656 /* Send the packet */
658 const sockaddr_t *sa = NULL;
661 if(n->status.send_locally)
662 choose_local_address(n, &sa, &sock);
664 choose_udp_address(n, &sa, &sock);
666 #if defined(SOL_IP) && defined(IP_TOS)
667 if(priorityinheritance && origpriority != priority
668 && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
669 priority = origpriority;
670 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
671 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
672 logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
676 if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
677 if(sockmsgsize(sockerrno)) {
678 if(n->maxmtu >= origlen)
679 n->maxmtu = origlen - 1;
680 if(n->mtu >= origlen)
681 n->mtu = origlen - 1;
684 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
688 origpkt->len = origlen;
692 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
693 node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
694 bool direct = from == myself && to == relay;
695 bool relay_supported = (relay->options >> 24) >= 4;
696 bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
698 /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
699 TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
700 This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
702 if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
703 char buf[len * 4 / 3 + 5];
704 b64encode(data, buf, len);
705 /* If no valid key is known yet, send the packets using ANS_KEY requests,
706 to ensure we get to learn the reflexive UDP address. */
707 if(from == myself && !to->status.validkey) {
708 to->incompression = myself->incompression;
709 return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
711 return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
716 if(relay_supported) overhead += sizeof to->id + sizeof from->id;
717 char buf[len + overhead]; char* buf_ptr = buf;
718 if(relay_supported) {
720 /* Inform the recipient that this packet was sent directly. */
721 node_id_t nullid = {};
722 memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
724 memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
726 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
729 /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
730 memcpy(buf_ptr, data, len); buf_ptr += len;
732 const sockaddr_t *sa = NULL;
734 if(relay->status.send_locally)
735 choose_local_address(relay, &sa, &sock);
737 choose_udp_address(relay, &sa, &sock);
738 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
739 if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
740 if(sockmsgsize(sockerrno)) {
741 // Compensate for SPTPS overhead
742 len -= SPTPS_DATAGRAM_OVERHEAD;
743 if(relay->maxmtu >= len)
744 relay->maxmtu = len - 1;
745 if(relay->mtu >= len)
746 relay->mtu = len - 1;
749 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
757 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
758 return send_sptps_data_priv(handle, myself, type, data, len);
761 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
762 node_t *from = handle;
764 if(type == SPTPS_HANDSHAKE) {
765 if(!from->status.validkey) {
766 from->status.validkey = true;
767 from->status.waitingforkey = false;
768 logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
774 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
779 inpkt.offset = DEFAULT_PACKET_OFFSET;
781 if(type == PKT_PROBE) {
783 memcpy(DATA(&inpkt), data, len);
784 udp_probe_h(from, &inpkt, len);
788 if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
789 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
793 /* Check if we have the headers we need */
794 if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
795 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
797 } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
798 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
801 int offset = (type & PKT_MAC) ? 0 : 14;
802 if(type & PKT_COMPRESSED) {
803 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
807 inpkt.len = ulen + offset;
809 if(inpkt.len > MAXSIZE)
812 memcpy(DATA(&inpkt) + offset, data, len);
813 inpkt.len = len + offset;
816 /* Generate the Ethernet packet type if necessary */
818 switch(DATA(&inpkt)[14] >> 4) {
820 DATA(&inpkt)[12] = 0x08;
821 DATA(&inpkt)[13] = 0x00;
824 DATA(&inpkt)[12] = 0x86;
825 DATA(&inpkt)[13] = 0xDD;
828 logger(DEBUG_TRAFFIC, LOG_ERR,
829 "Unknown IP version %d while reading packet from %s (%s)",
830 DATA(&inpkt)[14] >> 4, from->name, from->hostname);
835 receive_packet(from, &inpkt);
839 // This function tries to get SPTPS keys, if they aren't already known.
840 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
841 static void try_sptps(node_t *n) {
842 if(n->status.validkey)
845 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
847 if(!n->status.waitingforkey)
849 else if(n->last_req_key + 10 < now.tv_sec) {
850 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
851 sptps_stop(&n->sptps);
852 n->status.waitingforkey = false;
859 static void send_udp_probe_packet(node_t *n, int len) {
861 packet.offset = DEFAULT_PACKET_OFFSET;
862 memset(DATA(&packet), 0, 14);
863 randomize(DATA(&packet) + 14, len - 14);
867 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
869 send_udppacket(n, &packet);
872 // This function tries to establish a UDP tunnel to a node so that packets can be sent.
873 // If a tunnel is already established, it makes sure it stays up.
874 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
875 static void try_udp(node_t* n) {
879 struct timeval ping_tx_elapsed;
880 timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
882 int interval = n->status.udp_confirmed ? udp_discovery_keepalive_interval : udp_discovery_interval;
884 if(ping_tx_elapsed.tv_sec >= interval) {
885 send_udp_probe_packet(n, MAX(n->minmtu, 16));
886 n->udp_ping_sent = now;
888 if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
889 n->status.send_locally = true;
890 send_udp_probe_packet(n, 16);
891 n->status.send_locally = false;
896 static length_t choose_initial_maxmtu(node_t *n) {
901 const sockaddr_t *sa = NULL;
903 choose_udp_address(n, &sa, &sockindex);
907 sock = socket(sa->sa.sa_family, SOCK_DGRAM, IPPROTO_UDP);
909 logger(DEBUG_TRAFFIC, LOG_ERR, "Creating MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
913 if(connect(sock, &sa->sa, SALEN(sa->sa))) {
914 logger(DEBUG_TRAFFIC, LOG_ERR, "Connecting MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
920 socklen_t ip_mtu_len = sizeof ip_mtu;
921 if(getsockopt(sock, IPPROTO_IP, IP_MTU, &ip_mtu, &ip_mtu_len)) {
922 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
929 /* getsockopt(IP_MTU) returns the MTU of the physical interface.
930 We need to remove various overheads to get to the tinc MTU. */
931 length_t mtu = ip_mtu;
932 mtu -= (sa->sa.sa_family == AF_INET6) ? sizeof(struct ip6_hdr) : sizeof(struct ip);
934 if(n->status.sptps) {
935 mtu -= SPTPS_DATAGRAM_OVERHEAD;
936 if((n->options >> 24) >= 4)
937 mtu -= sizeof(node_id_t) + sizeof(node_id_t);
939 mtu -= digest_length(n->outdigest);
941 /* Now it's tricky. We use CBC mode, so the length of the
942 encrypted payload must be a multiple of the blocksize. The
943 sequence number is also part of the encrypted payload, so we
944 must account for it after correcting for the blocksize.
945 Furthermore, the padding in the last block must be at least
948 length_t blocksize = cipher_blocksize(n->outcipher);
960 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) returned absurdly small value: %d", n->name, n->hostname, ip_mtu);
966 logger(DEBUG_TRAFFIC, LOG_INFO, "Using system-provided maximum tinc MTU for %s (%s): %hd", n->name, n->hostname, mtu);
976 /* This function tries to determines the MTU of a node.
977 By calling this function repeatedly, n->minmtu will be progressively
978 increased, and at some point, n->mtu will be fixed to n->minmtu. If the MTU
979 is already fixed, this function checks if it can be increased.
982 static void try_mtu(node_t *n) {
983 if(!(n->options & OPTION_PMTU_DISCOVERY))
986 if(udp_discovery && !n->status.udp_confirmed) {
993 /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++
994 mtuprobes == 20: fix MTU, and go to -1
995 mtuprobes == -1: send one >maxmtu probe every pingtimeout */
997 struct timeval elapsed;
998 timersub(&now, &n->probe_sent_time, &elapsed);
999 if(n->mtuprobes >= 0) {
1000 if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333)
1003 if(elapsed.tv_sec < pingtimeout)
1009 if(n->mtuprobes < 0) {
1010 /* After the initial discovery, we only send one >maxmtu probe
1011 to detect PMTU increases. */
1012 if(n->maxmtu + 1 < MTU)
1013 send_udp_probe_packet(n, n->maxmtu + 1);
1015 /* Before initial discovery begins, set maxmtu to the most likely value.
1016 If it's underestimated, we will correct it after initial discovery. */
1017 if(n->mtuprobes == 0)
1018 n->maxmtu = choose_initial_maxmtu(n);
1021 /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets,
1022 but it will typically increase convergence time in the no-loss case. */
1023 const length_t probes_per_cycle = 8;
1025 /* This magic value was determined using math simulations.
1026 It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe.
1027 Since 1407 is just below the range of tinc MTUs over typical networks,
1028 this fine-tuning allows tinc to cover a lot of ground very quickly.
1029 This fine-tuning is only valid for maxmtu = MTU; if maxmtu is smaller,
1030 then it's better to use a multiplier of 1. Indeed, this leads to an interesting scenario
1031 if choose_initial_maxmtu() returns the actual MTU value - it will get confirmed with one single probe. */
1032 const float multiplier = (n->maxmtu == MTU) ? 0.97 : 1;
1034 const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1;
1035 const length_t minmtu = MAX(n->minmtu, 512);
1036 const float interval = n->maxmtu - minmtu;
1038 /* The core of the discovery algorithm is this exponential.
1039 It produces very large probes early in the cycle, and then it very quickly decreases the probe size.
1040 This reflects the fact that in the most difficult cases, we don't get any feedback for probes that
1041 are too large, and therefore we need to concentrate on small offsets so that we can quickly converge
1042 on the precise MTU as we are approaching it.
1043 The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one
1044 reply per cycle so that we can make progress. */
1045 const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1));
1047 length_t maxmtu = n->maxmtu;
1048 send_udp_probe_packet(n, minmtu + offset);
1049 /* If maxmtu changed, it means the probe was rejected by the system because it was too large.
1050 In that case, we recalculate with the new maxmtu and try again. */
1051 if(n->mtuprobes < 0 || maxmtu == n->maxmtu)
1055 if(n->mtuprobes >= 0)
1059 n->probe_counter = 0;
1060 n->probe_sent_time = now;
1061 n->probe_time = now;
1063 /* Calculate the packet loss of incoming traffic by comparing the rate of
1064 packets received to the rate with which the sequence number has increased.
1065 TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
1068 if(n->received > n->prev_received)
1069 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
1071 n->packetloss = n->received_seqno <= n->prev_received_seqno;
1073 n->prev_received_seqno = n->received_seqno;
1074 n->prev_received = n->received;
1077 /* These functions try to establish a tunnel to a node (or its relay) so that
1078 packets can be sent (e.g. exchange keys).
1079 If a tunnel is already established, it tries to improve it (e.g. by trying
1080 to establish a UDP tunnel instead of TCP). This function makes no
1081 guarantees - it is up to the caller to check the node's state to figure out
1082 if TCP and/or UDP is usable. By calling this function repeatedly, the
1083 tunnel is gradually improved until we hit the wall imposed by the underlying
1084 network environment. It is recommended to call this function every time a
1085 packet is sent (or intended to be sent) to a node, so that the tunnel keeps
1086 improving as packets flow, and then gracefully downgrades itself as it goes
1090 static void try_tx_sptps(node_t *n) {
1091 /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
1092 messages anyway, so there's no need for SPTPS at all. */
1094 if(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))
1097 /* Otherwise, try to do SPTPS authentication with n if necessary. */
1101 /* Do we need to relay packets? */
1103 node_t *via = (n->via == myself) ? n->nexthop : n->via;
1105 /* If the relay doesn't support SPTPS, everything goes via TCP anyway. */
1107 if((via->options >> 24) < 4)
1110 /* If we do have a relay, try everything with that one instead. */
1113 return try_tx_sptps(via);
1119 static void try_tx_legacy(node_t *n) {
1120 /* Does he have our key? If not, send one. */
1122 if(!n->status.validkey_in)
1125 /* Check if we already have a key, or request one. */
1127 if(!n->status.validkey) {
1128 if(n->last_req_key + 10 <= now.tv_sec) {
1130 n->last_req_key = now.tv_sec;
1139 void send_packet(node_t *n, vpn_packet_t *packet) {
1140 // If it's for myself, write it to the tun/tap device.
1144 memcpy(DATA(packet), mymac.x, ETH_ALEN);
1146 n->out_bytes += packet->len;
1147 devops.write(packet);
1151 logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)", packet->len, n->name, n->hostname);
1153 // If the node is not reachable, drop it.
1155 if(!n->status.reachable) {
1156 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable", n->name, n->hostname);
1160 // Keep track of packet statistics.
1163 n->out_bytes += packet->len;
1165 // Check if it should be sent as an SPTPS packet.
1167 if(n->status.sptps) {
1168 send_sptps_packet(n, packet);
1173 // Determine which node to actually send it to.
1175 node_t *via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
1178 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)", n->name, via->name, n->via->hostname);
1180 // Try to send via UDP, unless TCP is forced.
1182 if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
1183 if(!send_tcppacket(via->connection, packet))
1184 terminate_connection(via->connection, true);
1188 send_udppacket(via, packet);
1192 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1193 // Always give ourself a copy of the packet.
1195 send_packet(myself, packet);
1197 // In TunnelServer mode, do not forward broadcast packets.
1198 // The MST might not be valid and create loops.
1199 if(tunnelserver || broadcast_mode == BMODE_NONE)
1202 logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1203 packet->len, from->name, from->hostname);
1205 switch(broadcast_mode) {
1206 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1207 // This guarantees all nodes receive the broadcast packet, and
1208 // usually distributes the sending of broadcast packets over all nodes.
1210 for list_each(connection_t, c, connection_list)
1211 if(c->edge && c->status.mst && c != from->nexthop->connection)
1212 send_packet(c->node, packet);
1215 // In direct mode, we send copies to each node we know of.
1216 // However, this only reaches nodes that can be reached in a single hop.
1217 // We don't have enough information to forward broadcast packets in this case.
1222 for splay_each(node_t, n, node_tree)
1223 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1224 send_packet(n, packet);
1232 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1235 static time_t last_hard_try = 0;
1237 for splay_each(edge_t, e, edge_weight_tree) {
1238 if(!e->to->status.reachable || e->to == myself)
1241 if(sockaddrcmp_noport(from, &e->address)) {
1242 if(last_hard_try == now.tv_sec)
1247 if(!try_mac(e->to, pkt))
1255 last_hard_try = now.tv_sec;
1257 last_hard_try = now.tv_sec;
1261 void handle_incoming_vpn_data(void *data, int flags) {
1262 listen_socket_t *ls = data;
1265 node_id_t nullid = {};
1266 sockaddr_t addr = {};
1267 socklen_t addrlen = sizeof addr;
1269 bool direct = false;
1272 int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1274 if(len <= 0 || len > MAXSIZE) {
1275 if(!sockwouldblock(sockerrno))
1276 logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1282 sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1284 // Try to figure out who sent this packet.
1286 node_t *n = lookup_node_udp(&addr);
1289 // It might be from a 1.1 node, which might have a source ID in the packet.
1290 pkt.offset = 2 * sizeof(node_id_t);
1291 from = lookup_node_id(SRCID(&pkt));
1292 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1293 if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1302 n = try_harder(&addr, &pkt);
1307 if(debug_level >= DEBUG_PROTOCOL) {
1308 hostname = sockaddr2hostname(&addr);
1309 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1315 if(n->status.sptps) {
1316 pkt.offset = 2 * sizeof(node_id_t);
1318 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1323 from = lookup_node_id(SRCID(&pkt));
1324 to = lookup_node_id(DSTID(&pkt));
1327 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1332 send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1341 if(!receive_udppacket(from, &pkt))
1344 n->sock = ls - listen_socket;
1345 if(direct && sockaddrcmp(&addr, &n->address))
1346 update_node_udp(n, &addr);
1349 void handle_device_data(void *data, int flags) {
1350 vpn_packet_t packet;
1351 packet.offset = DEFAULT_PACKET_OFFSET;
1352 packet.priority = 0;
1354 if(devops.read(&packet)) {
1355 myself->in_packets++;
1356 myself->in_bytes += packet.len;
1357 route(myself, &packet);