2 net_packet.c -- Handles in- and outgoing VPN packets
3 Copyright (C) 1998-2005 Ivo Timmermans,
4 2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5 2010 Timothy Redaelli <timothy@redaelli.eu>
6 2010 Brandon Black <blblack@gmail.com>
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
35 #include "connection.h"
52 #define MAX(a, b) ((a) > (b) ? (a) : (b))
57 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
60 static void send_udppacket(node_t *, vpn_packet_t *);
62 unsigned replaywin = 16;
63 bool localdiscovery = true;
64 bool udp_discovery = true;
65 int udp_discovery_keepalive_interval = 9;
66 int udp_discovery_interval = 2;
67 int udp_discovery_timeout = 30;
69 #define MAX_SEQNO 1073741824
71 static void try_fix_mtu(node_t *n) {
75 if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) {
76 if(n->minmtu > n->maxmtu)
77 n->minmtu = n->maxmtu;
79 n->maxmtu = n->minmtu;
81 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
86 static void udp_probe_timeout_handler(void *data) {
88 if(!n->status.udp_confirmed)
91 logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname);
92 n->status.udp_confirmed = false;
98 static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
99 if(!DATA(packet)[0]) {
100 logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname);
102 /* It's a probe request, send back a reply */
104 /* Type 2 probe replies were introduced in protocol 17.3 */
105 if ((n->options >> 24) >= 3) {
106 uint8_t *data = DATA(packet);
108 uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
110 gettimeofday(&now, NULL);
111 uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
112 uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
115 /* Legacy protocol: n won't understand type 2 probe replies. */
119 /* Temporarily set udp_confirmed, so that the reply is sent
120 back exactly the way it came in. */
122 bool udp_confirmed = n->status.udp_confirmed;
123 n->status.udp_confirmed = true;
124 send_udppacket(n, packet);
125 n->status.udp_confirmed = udp_confirmed;
127 length_t probelen = len;
128 if (DATA(packet)[0] == 2) {
130 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) UDP probe reply from %s (%s)", n->name, n->hostname);
132 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
135 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d UDP probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
137 /* It's a valid reply: now we know bidirectional communication
138 is possible using the address and socket that the reply
140 n->status.udp_confirmed = true;
143 timeout_del(&n->udp_ping_timeout);
144 timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
147 if(probelen >= n->maxmtu + 1) {
148 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
150 /* Set mtuprobes to 1 so that try_mtu() doesn't reset maxmtu */
155 /* If applicable, raise the minimum supported MTU */
157 if(probelen > n->maxmtu)
158 probelen = n->maxmtu;
159 if(n->minmtu < probelen) {
160 n->minmtu = probelen;
165 The RTT is the time between the MTU probe burst was sent and the first
169 struct timeval now, diff;
170 gettimeofday(&now, NULL);
171 timersub(&now, &n->probe_time, &diff);
173 struct timeval probe_timestamp = now;
174 if (DATA(packet)[0] == 2 && packet->len >= 11) {
175 uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
176 uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
177 probe_timestamp.tv_sec = ntohl(sec);
178 probe_timestamp.tv_usec = ntohl(usec);
183 if(n->probe_counter == 1) {
184 n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
185 n->probe_time = probe_timestamp;
186 logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->packetloss * 1e2);
191 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
193 memcpy(dest, source, len);
195 } else if(level == 10) {
197 lzo_uint lzolen = MAXSIZE;
198 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
203 } else if(level < 10) {
205 unsigned long destlen = MAXSIZE;
206 if(compress2(dest, &destlen, source, len, level) == Z_OK)
213 lzo_uint lzolen = MAXSIZE;
214 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
224 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
226 memcpy(dest, source, len);
228 } else if(level > 9) {
230 lzo_uint lzolen = MAXSIZE;
231 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
239 unsigned long destlen = MAXSIZE;
240 if(uncompress(dest, &destlen, source, len) == Z_OK)
252 static void receive_packet(node_t *n, vpn_packet_t *packet) {
253 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
254 packet->len, n->name, n->hostname);
257 n->in_bytes += packet->len;
262 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
264 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
266 #ifdef DISABLE_LEGACY
269 if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
272 return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
276 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
277 vpn_packet_t pkt1, pkt2;
278 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
281 pkt1.offset = DEFAULT_PACKET_OFFSET;
282 pkt2.offset = DEFAULT_PACKET_OFFSET;
284 if(n->status.sptps) {
285 if(!n->sptps.state) {
286 if(!n->status.waitingforkey) {
287 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
290 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
294 inpkt->offset += 2 * sizeof(node_id_t);
295 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
296 logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
302 #ifdef DISABLE_LEGACY
305 if(!n->status.validkey_in) {
306 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
310 /* Check packet length */
312 if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
313 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
314 n->name, n->hostname);
318 /* It's a legacy UDP packet, the data starts after the seqno */
320 inpkt->offset += sizeof(seqno_t);
322 /* Check the message authentication code */
324 if(digest_active(n->indigest)) {
325 inpkt->len -= digest_length(n->indigest);
326 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
327 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
331 /* Decrypt the packet */
333 if(cipher_active(n->incipher)) {
334 vpn_packet_t *outpkt = pkt[nextpkt++];
337 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
338 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
342 outpkt->len = outlen;
346 /* Check the sequence number */
349 memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
350 seqno = ntohl(seqno);
351 inpkt->len -= sizeof seqno;
354 if(seqno != n->received_seqno + 1) {
355 if(seqno >= n->received_seqno + replaywin * 8) {
356 if(n->farfuture++ < replaywin >> 2) {
357 logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
358 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
361 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
362 seqno - n->received_seqno - 1, n->name, n->hostname);
363 memset(n->late, 0, replaywin);
364 } else if (seqno <= n->received_seqno) {
365 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
366 logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
367 n->name, n->hostname, seqno, n->received_seqno);
371 for(int i = n->received_seqno + 1; i < seqno; i++)
372 n->late[(i / 8) % replaywin] |= 1 << i % 8;
377 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
380 if(seqno > n->received_seqno)
381 n->received_seqno = seqno;
385 if(n->received_seqno > MAX_SEQNO)
388 /* Decompress the packet */
390 length_t origlen = inpkt->len;
392 if(n->incompression) {
393 vpn_packet_t *outpkt = pkt[nextpkt++];
395 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
396 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
397 n->name, n->hostname);
403 origlen -= MTU/64 + 20;
408 if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
409 udp_probe_h(n, inpkt, origlen);
411 receive_packet(n, inpkt);
416 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
418 outpkt.offset = DEFAULT_PACKET_OFFSET;
420 if(len > sizeof outpkt.data - outpkt.offset)
424 if(c->options & OPTION_TCPONLY)
427 outpkt.priority = -1;
428 memcpy(DATA(&outpkt), buffer, len);
430 receive_packet(c->node, &outpkt);
433 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
434 if(!n->status.validkey && !n->connection)
440 if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
441 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
445 if(routing_mode == RMODE_ROUTER)
450 if(origpkt->len < offset)
455 if(n->outcompression) {
457 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
459 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
460 } else if(len < origpkt->len - offset) {
461 outpkt.len = len + offset;
463 type |= PKT_COMPRESSED;
467 /* If we have a direct metaconnection to n, and we can't use UDP, then
468 don't bother with SPTPS and just use a "plaintext" PACKET message.
469 We don't really care about end-to-end security since we're not
470 sending the message through any intermediate nodes. */
471 if(n->connection && origpkt->len > n->minmtu)
472 send_tcppacket(n->connection, origpkt);
474 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
478 static void adapt_socket(const sockaddr_t *sa, int *sock) {
479 /* Make sure we have a suitable socket for the chosen address */
480 if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
481 for(int i = 0; i < listen_sockets; i++) {
482 if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
490 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
495 /* If the UDP address is confirmed, use it. */
496 if(n->status.udp_confirmed)
499 /* Send every third packet to n->address; that could be set
500 to the node's reflexive UDP address discovered during key
509 /* Otherwise, address are found in edges to this node.
510 So we pick a random edge and a random socket. */
513 int j = rand() % n->edge_tree->count;
514 edge_t *candidate = NULL;
516 for splay_each(edge_t, e, n->edge_tree) {
518 candidate = e->reverse;
524 *sa = &candidate->address;
525 *sock = rand() % listen_sockets;
528 adapt_socket(*sa, sock);
531 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
534 /* Pick one of the edges from this node at random, then use its local address. */
537 int j = rand() % n->edge_tree->count;
538 edge_t *candidate = NULL;
540 for splay_each(edge_t, e, n->edge_tree) {
547 if (candidate && candidate->local_address.sa.sa_family) {
548 *sa = &candidate->local_address;
549 *sock = rand() % listen_sockets;
550 adapt_socket(*sa, sock);
554 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
555 vpn_packet_t pkt1, pkt2;
556 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
557 vpn_packet_t *inpkt = origpkt;
559 vpn_packet_t *outpkt;
560 int origlen = origpkt->len;
562 #if defined(SOL_IP) && defined(IP_TOS)
563 static int priority = 0;
564 int origpriority = origpkt->priority;
567 pkt1.offset = DEFAULT_PACKET_OFFSET;
568 pkt2.offset = DEFAULT_PACKET_OFFSET;
570 if(!n->status.reachable) {
571 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
576 return send_sptps_packet(n, origpkt);
578 #ifdef DISABLE_LEGACY
581 /* Make sure we have a valid key */
583 if(!n->status.validkey) {
584 logger(DEBUG_TRAFFIC, LOG_INFO,
585 "No valid key known yet for %s (%s), forwarding via TCP",
586 n->name, n->hostname);
587 send_tcppacket(n->nexthop->connection, origpkt);
591 if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
592 logger(DEBUG_TRAFFIC, LOG_INFO,
593 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
594 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
597 send_packet(n->nexthop, origpkt);
599 send_tcppacket(n->nexthop->connection, origpkt);
604 /* Compress the packet */
606 if(n->outcompression) {
607 outpkt = pkt[nextpkt++];
609 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
610 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
611 n->name, n->hostname);
618 /* Add sequence number */
620 seqno_t seqno = htonl(++(n->sent_seqno));
621 memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
622 inpkt->len += sizeof seqno;
624 /* Encrypt the packet */
626 if(cipher_active(n->outcipher)) {
627 outpkt = pkt[nextpkt++];
630 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
631 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
635 outpkt->len = outlen;
639 /* Add the message authentication code */
641 if(digest_active(n->outdigest)) {
642 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
643 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
647 inpkt->len += digest_length(n->outdigest);
650 /* Send the packet */
652 const sockaddr_t *sa = NULL;
655 if(n->status.send_locally)
656 choose_local_address(n, &sa, &sock);
658 choose_udp_address(n, &sa, &sock);
660 #if defined(SOL_IP) && defined(IP_TOS)
661 if(priorityinheritance && origpriority != priority
662 && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
663 priority = origpriority;
664 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
665 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
666 logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
670 if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
671 if(sockmsgsize(sockerrno)) {
672 if(n->maxmtu >= origlen)
673 n->maxmtu = origlen - 1;
674 if(n->mtu >= origlen)
675 n->mtu = origlen - 1;
678 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
682 origpkt->len = origlen;
686 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
687 node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
688 bool direct = from == myself && to == relay;
689 bool relay_supported = (relay->options >> 24) >= 4;
690 bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
692 /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
693 TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
694 This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
696 if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
697 char buf[len * 4 / 3 + 5];
698 b64encode(data, buf, len);
699 /* If no valid key is known yet, send the packets using ANS_KEY requests,
700 to ensure we get to learn the reflexive UDP address. */
701 if(from == myself && !to->status.validkey) {
702 to->incompression = myself->incompression;
703 return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
705 return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
710 if(relay_supported) overhead += sizeof to->id + sizeof from->id;
711 char buf[len + overhead]; char* buf_ptr = buf;
712 if(relay_supported) {
714 /* Inform the recipient that this packet was sent directly. */
715 node_id_t nullid = {};
716 memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
718 memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
720 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
723 /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
724 memcpy(buf_ptr, data, len); buf_ptr += len;
726 const sockaddr_t *sa = NULL;
728 if(relay->status.send_locally)
729 choose_local_address(relay, &sa, &sock);
731 choose_udp_address(relay, &sa, &sock);
732 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
733 if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
734 if(sockmsgsize(sockerrno)) {
735 // Compensate for SPTPS overhead
736 len -= SPTPS_DATAGRAM_OVERHEAD;
737 if(relay->maxmtu >= len)
738 relay->maxmtu = len - 1;
739 if(relay->mtu >= len)
740 relay->mtu = len - 1;
743 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
751 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
752 return send_sptps_data_priv(handle, myself, type, data, len);
755 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
756 node_t *from = handle;
758 if(type == SPTPS_HANDSHAKE) {
759 if(!from->status.validkey) {
760 from->status.validkey = true;
761 from->status.waitingforkey = false;
762 logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
768 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
773 inpkt.offset = DEFAULT_PACKET_OFFSET;
775 if(type == PKT_PROBE) {
777 memcpy(DATA(&inpkt), data, len);
778 udp_probe_h(from, &inpkt, len);
782 if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
783 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
787 /* Check if we have the headers we need */
788 if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
789 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
791 } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
792 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
795 int offset = (type & PKT_MAC) ? 0 : 14;
796 if(type & PKT_COMPRESSED) {
797 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
801 inpkt.len = ulen + offset;
803 if(inpkt.len > MAXSIZE)
806 memcpy(DATA(&inpkt) + offset, data, len);
807 inpkt.len = len + offset;
810 /* Generate the Ethernet packet type if necessary */
812 switch(DATA(&inpkt)[14] >> 4) {
814 DATA(&inpkt)[12] = 0x08;
815 DATA(&inpkt)[13] = 0x00;
818 DATA(&inpkt)[12] = 0x86;
819 DATA(&inpkt)[13] = 0xDD;
822 logger(DEBUG_TRAFFIC, LOG_ERR,
823 "Unknown IP version %d while reading packet from %s (%s)",
824 DATA(&inpkt)[14] >> 4, from->name, from->hostname);
829 receive_packet(from, &inpkt);
833 // This function tries to get SPTPS keys, if they aren't already known.
834 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
835 static void try_sptps(node_t *n) {
836 if(n->status.validkey)
839 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
841 if(!n->status.waitingforkey)
843 else if(n->last_req_key + 10 < now.tv_sec) {
844 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
845 sptps_stop(&n->sptps);
846 n->status.waitingforkey = false;
853 static void send_udp_probe_packet(node_t *n, int len) {
855 packet.offset = DEFAULT_PACKET_OFFSET;
856 memset(DATA(&packet), 0, 14);
857 randomize(DATA(&packet) + 14, len - 14);
861 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
863 send_udppacket(n, &packet);
866 // This function tries to establish a UDP tunnel to a node so that packets can be sent.
867 // If a tunnel is already established, it makes sure it stays up.
868 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
869 static void try_udp(node_t* n) {
873 struct timeval ping_tx_elapsed;
874 timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
876 int interval = n->status.udp_confirmed ? udp_discovery_keepalive_interval : udp_discovery_interval;
878 if(ping_tx_elapsed.tv_sec >= interval) {
879 send_udp_probe_packet(n, MAX(n->minmtu, 16));
880 n->udp_ping_sent = now;
882 if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
883 n->status.send_locally = true;
884 send_udp_probe_packet(n, 16);
885 n->status.send_locally = false;
890 static length_t choose_initial_maxmtu(node_t *n) {
895 const sockaddr_t *sa = NULL;
897 choose_udp_address(n, &sa, &sockindex);
901 sock = socket(sa->sa.sa_family, SOCK_DGRAM, IPPROTO_UDP);
903 logger(DEBUG_TRAFFIC, LOG_ERR, "Creating MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
907 if(connect(sock, &sa->sa, SALEN(sa->sa))) {
908 logger(DEBUG_TRAFFIC, LOG_ERR, "Connecting MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
914 socklen_t ip_mtu_len = sizeof ip_mtu;
915 if(getsockopt(sock, IPPROTO_IP, IP_MTU, &ip_mtu, &ip_mtu_len)) {
916 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
923 /* getsockopt(IP_MTU) returns the MTU of the physical interface.
924 We need to remove various overheads to get to the tinc MTU. */
925 length_t mtu = ip_mtu;
926 mtu -= (sa->sa.sa_family == AF_INET6) ? sizeof(struct ip6_hdr) : sizeof(struct ip);
928 if(n->status.sptps) {
929 mtu -= SPTPS_DATAGRAM_OVERHEAD;
930 if((n->options >> 24) >= 4)
931 mtu -= sizeof(node_id_t) + sizeof(node_id_t);
935 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) returned absurdly small value: %d", n->name, n->hostname, ip_mtu);
941 logger(DEBUG_TRAFFIC, LOG_INFO, "Using system-provided maximum tinc MTU for %s (%s): %hd", n->name, n->hostname, mtu);
951 // This function tries to determines the MTU of a node.
952 // By calling this function repeatedly, n->minmtu will be progressively increased, and at some point, n->mtu will be fixed to n->minmtu.
953 // If the MTU is already fixed, this function checks if it can be increased.
954 static void try_mtu(node_t *n) {
955 if(!(n->options & OPTION_PMTU_DISCOVERY))
958 if(udp_discovery && !n->status.udp_confirmed) {
965 /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++
966 mtuprobes == 20: fix MTU, and go to -1
967 mtuprobes == -1: send one >maxmtu probe every pingtimeout */
969 struct timeval elapsed;
970 timersub(&now, &n->probe_sent_time, &elapsed);
971 if(n->mtuprobes >= 0) {
972 if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333)
975 if(elapsed.tv_sec < pingtimeout)
981 if(n->mtuprobes < 0) {
982 /* After the initial discovery, we only send one >maxmtu probe
983 to detect PMTU increases. */
984 if(n->maxmtu + 1 < MTU)
985 send_udp_probe_packet(n, n->maxmtu + 1);
987 /* Before initial discovery begins, set maxmtu to the most likely value.
988 If it's underestimated, we will correct it after initial discovery. */
989 if(n->mtuprobes == 0)
990 n->maxmtu = choose_initial_maxmtu(n);
993 /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets,
994 but it will typically increase convergence time in the no-loss case. */
995 const length_t probes_per_cycle = 8;
997 /* This magic value was determined using math simulations.
998 It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe.
999 Since 1407 is just below the range of tinc MTUs over typical networks,
1000 this fine-tuning allows tinc to cover a lot of ground very quickly.
1001 This fine-tuning is only valid for maxmtu = MTU; if maxmtu is smaller,
1002 then it's better to use a multiplier of 1. Indeed, this leads to an interesting scenario
1003 if choose_initial_maxmtu() returns the actual MTU value - it will get confirmed with one single probe. */
1004 const float multiplier = (n->maxmtu == MTU) ? 0.97 : 1;
1006 const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1;
1007 const length_t minmtu = MAX(n->minmtu, 512);
1008 const float interval = n->maxmtu - minmtu;
1010 /* The core of the discovery algorithm is this exponential.
1011 It produces very large probes early in the cycle, and then it very quickly decreases the probe size.
1012 This reflects the fact that in the most difficult cases, we don't get any feedback for probes that
1013 are too large, and therefore we need to concentrate on small offsets so that we can quickly converge
1014 on the precise MTU as we are approaching it.
1015 The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one
1016 reply per cycle so that we can make progress. */
1017 const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1));
1019 length_t maxmtu = n->maxmtu;
1020 send_udp_probe_packet(n, minmtu + offset);
1021 /* If maxmtu changed, it means the probe was rejected by the system because it was too large.
1022 In that case, we recalculate with the new maxmtu and try again. */
1023 if(n->mtuprobes < 0 || maxmtu == n->maxmtu)
1027 if(n->mtuprobes >= 0)
1031 n->probe_counter = 0;
1032 n->probe_sent_time = now;
1033 n->probe_time = now;
1035 /* Calculate the packet loss of incoming traffic by comparing the rate of
1036 packets received to the rate with which the sequence number has increased.
1037 TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
1040 if(n->received > n->prev_received)
1041 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
1043 n->packetloss = n->received_seqno <= n->prev_received_seqno;
1045 n->prev_received_seqno = n->received_seqno;
1046 n->prev_received = n->received;
1049 // This function tries to establish a tunnel to a node (or its relay) so that packets can be sent (e.g. get SPTPS keys).
1050 // If a tunnel is already established, it tries to improve it (e.g. by trying to establish a UDP tunnel instead of TCP).
1051 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if TCP and/or UDP is usable.
1052 // By calling this function repeatedly, the tunnel is gradually improved until we hit the wall imposed by the underlying network environment.
1053 // It is recommended to call this function every time a packet is sent (or intended to be sent) to a node,
1054 // so that the tunnel keeps improving as packets flow, and then gracefully downgrades itself as it goes idle.
1055 static void try_tx(node_t *n) {
1056 /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
1057 messages anyway, so there's no need for SPTPS at all. Otherwise, get the keys. */
1058 if(n->status.sptps && !(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))) {
1060 if (!n->status.validkey)
1064 node_t *via = (n->via == myself) ? n->nexthop : n->via;
1066 if((myself->options | via->options) & OPTION_TCPONLY)
1069 if(!n->status.sptps && !via->status.validkey && via->last_req_key + 10 <= now.tv_sec) {
1071 via->last_req_key = now.tv_sec;
1072 } else if(via == n || !n->status.sptps || (via->options >> 24) >= 4) {
1077 /* If we don't know how to reach "via" yet, then try to reach it through a relay. */
1078 if(n->status.sptps && !via->status.udp_confirmed && via->nexthop != via && (via->nexthop->options >> 24) >= 4)
1079 try_tx(via->nexthop);
1083 send a packet to the given vpn ip.
1085 void send_packet(node_t *n, vpn_packet_t *packet) {
1090 memcpy(DATA(packet), mymac.x, ETH_ALEN);
1092 n->out_bytes += packet->len;
1093 devops.write(packet);
1097 logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
1098 packet->len, n->name, n->hostname);
1100 if(!n->status.reachable) {
1101 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
1102 n->name, n->hostname);
1107 n->out_bytes += packet->len;
1109 if(n->status.sptps) {
1110 send_sptps_packet(n, packet);
1114 via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
1117 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
1118 n->name, via->name, n->via->hostname);
1120 if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
1121 if(!send_tcppacket(via->connection, packet))
1122 terminate_connection(via->connection, true);
1124 send_udppacket(via, packet);
1127 /* Try to improve the tunnel.
1128 Note that we do this *after* we send the packet because sending actual packets take priority
1129 with regard to the send buffer space and latency. */
1133 /* Broadcast a packet using the minimum spanning tree */
1135 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1136 // Always give ourself a copy of the packet.
1138 send_packet(myself, packet);
1140 // In TunnelServer mode, do not forward broadcast packets.
1141 // The MST might not be valid and create loops.
1142 if(tunnelserver || broadcast_mode == BMODE_NONE)
1145 logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1146 packet->len, from->name, from->hostname);
1148 switch(broadcast_mode) {
1149 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1150 // This guarantees all nodes receive the broadcast packet, and
1151 // usually distributes the sending of broadcast packets over all nodes.
1153 for list_each(connection_t, c, connection_list)
1154 if(c->edge && c->status.mst && c != from->nexthop->connection)
1155 send_packet(c->node, packet);
1158 // In direct mode, we send copies to each node we know of.
1159 // However, this only reaches nodes that can be reached in a single hop.
1160 // We don't have enough information to forward broadcast packets in this case.
1165 for splay_each(node_t, n, node_tree)
1166 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1167 send_packet(n, packet);
1175 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1178 static time_t last_hard_try = 0;
1180 for splay_each(edge_t, e, edge_weight_tree) {
1181 if(!e->to->status.reachable || e->to == myself)
1184 if(sockaddrcmp_noport(from, &e->address)) {
1185 if(last_hard_try == now.tv_sec)
1190 if(!try_mac(e->to, pkt))
1198 last_hard_try = now.tv_sec;
1200 last_hard_try = now.tv_sec;
1204 void handle_incoming_vpn_data(void *data, int flags) {
1205 listen_socket_t *ls = data;
1208 node_id_t nullid = {};
1209 sockaddr_t addr = {};
1210 socklen_t addrlen = sizeof addr;
1212 bool direct = false;
1215 int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1217 if(len <= 0 || len > MAXSIZE) {
1218 if(!sockwouldblock(sockerrno))
1219 logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1225 sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1227 // Try to figure out who sent this packet.
1229 node_t *n = lookup_node_udp(&addr);
1232 // It might be from a 1.1 node, which might have a source ID in the packet.
1233 pkt.offset = 2 * sizeof(node_id_t);
1234 from = lookup_node_id(SRCID(&pkt));
1235 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1236 if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1245 n = try_harder(&addr, &pkt);
1250 if(debug_level >= DEBUG_PROTOCOL) {
1251 hostname = sockaddr2hostname(&addr);
1252 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1258 if(n->status.sptps) {
1259 pkt.offset = 2 * sizeof(node_id_t);
1261 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1266 from = lookup_node_id(SRCID(&pkt));
1267 to = lookup_node_id(DSTID(&pkt));
1270 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1275 send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1284 if(!receive_udppacket(from, &pkt))
1287 n->sock = ls - listen_socket;
1288 if(direct && sockaddrcmp(&addr, &n->address))
1289 update_node_udp(n, &addr);
1292 void handle_device_data(void *data, int flags) {
1293 vpn_packet_t packet;
1294 packet.offset = DEFAULT_PACKET_OFFSET;
1295 packet.priority = 0;
1297 if(devops.read(&packet)) {
1298 myself->in_packets++;
1299 myself->in_bytes += packet.len;
1300 route(myself, &packet);