2 net_packet.c -- Handles in- and outgoing VPN packets
3 Copyright (C) 1998-2005 Ivo Timmermans,
4 2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5 2010 Timothy Redaelli <timothy@redaelli.eu>
6 2010 Brandon Black <blblack@gmail.com>
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
35 #include "connection.h"
52 #define MAX(a, b) ((a) > (b) ? (a) : (b))
57 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
60 static void send_udppacket(node_t *, vpn_packet_t *);
62 unsigned replaywin = 16;
63 bool localdiscovery = true;
64 bool udp_discovery = true;
65 int udp_discovery_keepalive_interval = 9;
66 int udp_discovery_interval = 2;
67 int udp_discovery_timeout = 30;
69 #define MAX_SEQNO 1073741824
71 static void try_fix_mtu(node_t *n) {
75 if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) {
76 if(n->minmtu > n->maxmtu)
77 n->minmtu = n->maxmtu;
79 n->maxmtu = n->minmtu;
81 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
86 static void udp_probe_timeout_handler(void *data) {
88 if(!n->status.udp_confirmed)
91 logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname);
92 n->status.udp_confirmed = false;
98 static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
99 if(!DATA(packet)[0]) {
100 logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname);
102 /* It's a probe request, send back a reply */
104 /* Type 2 probe replies were introduced in protocol 17.3 */
105 if ((n->options >> 24) >= 3) {
106 uint8_t *data = DATA(packet);
108 uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
110 gettimeofday(&now, NULL);
111 uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
112 uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
115 /* Legacy protocol: n won't understand type 2 probe replies. */
119 /* Temporarily set udp_confirmed, so that the reply is sent
120 back exactly the way it came in. */
122 bool udp_confirmed = n->status.udp_confirmed;
123 n->status.udp_confirmed = true;
124 send_udppacket(n, packet);
125 n->status.udp_confirmed = udp_confirmed;
127 length_t probelen = len;
128 if (DATA(packet)[0] == 2) {
130 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) UDP probe reply from %s (%s)", n->name, n->hostname);
132 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
135 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d UDP probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
137 /* It's a valid reply: now we know bidirectional communication
138 is possible using the address and socket that the reply
140 n->status.udp_confirmed = true;
143 timeout_del(&n->udp_ping_timeout);
144 timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
147 if(probelen >= n->maxmtu + 1) {
148 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
150 /* Set mtuprobes to 1 so that try_mtu() doesn't reset maxmtu */
155 /* If applicable, raise the minimum supported MTU */
157 if(probelen > n->maxmtu)
158 probelen = n->maxmtu;
159 if(n->minmtu < probelen) {
160 n->minmtu = probelen;
165 The RTT is the time between the MTU probe burst was sent and the first
169 struct timeval now, diff;
170 gettimeofday(&now, NULL);
171 timersub(&now, &n->probe_time, &diff);
173 struct timeval probe_timestamp = now;
174 if (DATA(packet)[0] == 2 && packet->len >= 11) {
175 uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
176 uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
177 probe_timestamp.tv_sec = ntohl(sec);
178 probe_timestamp.tv_usec = ntohl(usec);
183 if(n->probe_counter == 1) {
184 n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
185 n->probe_time = probe_timestamp;
186 logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->packetloss * 1e2);
191 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
193 memcpy(dest, source, len);
195 } else if(level == 10) {
197 lzo_uint lzolen = MAXSIZE;
198 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
203 } else if(level < 10) {
205 unsigned long destlen = MAXSIZE;
206 if(compress2(dest, &destlen, source, len, level) == Z_OK)
213 lzo_uint lzolen = MAXSIZE;
214 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
224 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
226 memcpy(dest, source, len);
228 } else if(level > 9) {
230 lzo_uint lzolen = MAXSIZE;
231 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
239 unsigned long destlen = MAXSIZE;
240 if(uncompress(dest, &destlen, source, len) == Z_OK)
252 static void receive_packet(node_t *n, vpn_packet_t *packet) {
253 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
254 packet->len, n->name, n->hostname);
257 n->in_bytes += packet->len;
262 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
264 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
266 #ifdef DISABLE_LEGACY
269 if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
272 return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
276 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
277 vpn_packet_t pkt1, pkt2;
278 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
281 pkt1.offset = DEFAULT_PACKET_OFFSET;
282 pkt2.offset = DEFAULT_PACKET_OFFSET;
284 if(n->status.sptps) {
285 if(!n->sptps.state) {
286 if(!n->status.waitingforkey) {
287 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
290 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
294 inpkt->offset += 2 * sizeof(node_id_t);
295 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
296 logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
302 #ifdef DISABLE_LEGACY
305 if(!n->status.validkey) {
306 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
310 /* Check packet length */
312 if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
313 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
314 n->name, n->hostname);
318 /* It's a legacy UDP packet, the data starts after the seqno */
320 inpkt->offset += sizeof(seqno_t);
322 /* Check the message authentication code */
324 if(digest_active(n->indigest)) {
325 inpkt->len -= digest_length(n->indigest);
326 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
327 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
331 /* Decrypt the packet */
333 if(cipher_active(n->incipher)) {
334 vpn_packet_t *outpkt = pkt[nextpkt++];
337 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
338 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
342 outpkt->len = outlen;
346 /* Check the sequence number */
349 memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
350 seqno = ntohl(seqno);
351 inpkt->len -= sizeof seqno;
354 if(seqno != n->received_seqno + 1) {
355 if(seqno >= n->received_seqno + replaywin * 8) {
356 if(n->farfuture++ < replaywin >> 2) {
357 logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
358 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
361 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
362 seqno - n->received_seqno - 1, n->name, n->hostname);
363 memset(n->late, 0, replaywin);
364 } else if (seqno <= n->received_seqno) {
365 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
366 logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
367 n->name, n->hostname, seqno, n->received_seqno);
371 for(int i = n->received_seqno + 1; i < seqno; i++)
372 n->late[(i / 8) % replaywin] |= 1 << i % 8;
377 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
380 if(seqno > n->received_seqno)
381 n->received_seqno = seqno;
385 if(n->received_seqno > MAX_SEQNO)
388 /* Decompress the packet */
390 length_t origlen = inpkt->len;
392 if(n->incompression) {
393 vpn_packet_t *outpkt = pkt[nextpkt++];
395 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
396 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
397 n->name, n->hostname);
403 origlen -= MTU/64 + 20;
408 if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
409 udp_probe_h(n, inpkt, origlen);
411 receive_packet(n, inpkt);
416 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
418 outpkt.offset = DEFAULT_PACKET_OFFSET;
420 if(len > sizeof outpkt.data - outpkt.offset)
424 if(c->options & OPTION_TCPONLY)
427 outpkt.priority = -1;
428 memcpy(DATA(&outpkt), buffer, len);
430 receive_packet(c->node, &outpkt);
433 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
434 if(!n->status.validkey && !n->connection)
440 if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
441 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
445 if(routing_mode == RMODE_ROUTER)
450 if(origpkt->len < offset)
455 if(n->outcompression) {
457 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
459 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
460 } else if(len < origpkt->len - offset) {
461 outpkt.len = len + offset;
463 type |= PKT_COMPRESSED;
467 /* If we have a direct metaconnection to n, and we can't use UDP, then
468 don't bother with SPTPS and just use a "plaintext" PACKET message.
469 We don't really care about end-to-end security since we're not
470 sending the message through any intermediate nodes. */
471 if(n->connection && origpkt->len > n->minmtu)
472 send_tcppacket(n->connection, origpkt);
474 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
478 static void adapt_socket(const sockaddr_t *sa, int *sock) {
479 /* Make sure we have a suitable socket for the chosen address */
480 if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
481 for(int i = 0; i < listen_sockets; i++) {
482 if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
490 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
495 /* If the UDP address is confirmed, use it. */
496 if(n->status.udp_confirmed)
499 /* Send every third packet to n->address; that could be set
500 to the node's reflexive UDP address discovered during key
509 /* Otherwise, address are found in edges to this node.
510 So we pick a random edge and a random socket. */
513 int j = rand() % n->edge_tree->count;
514 edge_t *candidate = NULL;
516 for splay_each(edge_t, e, n->edge_tree) {
518 candidate = e->reverse;
524 *sa = &candidate->address;
525 *sock = rand() % listen_sockets;
528 adapt_socket(*sa, sock);
531 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
534 /* Pick one of the edges from this node at random, then use its local address. */
537 int j = rand() % n->edge_tree->count;
538 edge_t *candidate = NULL;
540 for splay_each(edge_t, e, n->edge_tree) {
547 if (candidate && candidate->local_address.sa.sa_family) {
548 *sa = &candidate->local_address;
549 *sock = rand() % listen_sockets;
550 adapt_socket(*sa, sock);
554 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
555 vpn_packet_t pkt1, pkt2;
556 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
557 vpn_packet_t *inpkt = origpkt;
559 vpn_packet_t *outpkt;
560 int origlen = origpkt->len;
562 #if defined(SOL_IP) && defined(IP_TOS)
563 static int priority = 0;
564 int origpriority = origpkt->priority;
567 pkt1.offset = DEFAULT_PACKET_OFFSET;
568 pkt2.offset = DEFAULT_PACKET_OFFSET;
570 if(!n->status.reachable) {
571 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
576 return send_sptps_packet(n, origpkt);
578 #ifdef DISABLE_LEGACY
581 /* Make sure we have a valid key */
583 if(!n->status.validkey) {
584 logger(DEBUG_TRAFFIC, LOG_INFO,
585 "No valid key known yet for %s (%s), forwarding via TCP",
586 n->name, n->hostname);
587 send_tcppacket(n->nexthop->connection, origpkt);
591 if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
592 logger(DEBUG_TRAFFIC, LOG_INFO,
593 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
594 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
597 send_packet(n->nexthop, origpkt);
599 send_tcppacket(n->nexthop->connection, origpkt);
604 /* Compress the packet */
606 if(n->outcompression) {
607 outpkt = pkt[nextpkt++];
609 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
610 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
611 n->name, n->hostname);
618 /* Add sequence number */
620 seqno_t seqno = htonl(++(n->sent_seqno));
621 memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
622 inpkt->len += sizeof seqno;
624 /* Encrypt the packet */
626 if(cipher_active(n->outcipher)) {
627 outpkt = pkt[nextpkt++];
630 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
631 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
635 outpkt->len = outlen;
639 /* Add the message authentication code */
641 if(digest_active(n->outdigest)) {
642 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
643 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
647 inpkt->len += digest_length(n->outdigest);
650 /* Send the packet */
652 const sockaddr_t *sa = NULL;
655 if(n->status.send_locally)
656 choose_local_address(n, &sa, &sock);
658 choose_udp_address(n, &sa, &sock);
660 #if defined(SOL_IP) && defined(IP_TOS)
661 if(priorityinheritance && origpriority != priority
662 && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
663 priority = origpriority;
664 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
665 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
666 logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
670 if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
671 if(sockmsgsize(sockerrno)) {
672 if(n->maxmtu >= origlen)
673 n->maxmtu = origlen - 1;
674 if(n->mtu >= origlen)
675 n->mtu = origlen - 1;
678 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
682 origpkt->len = origlen;
686 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
687 node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
688 bool direct = from == myself && to == relay;
689 bool relay_supported = (relay->options >> 24) >= 4;
690 bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
692 /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
693 TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
694 This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
696 if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
697 char buf[len * 4 / 3 + 5];
698 b64encode(data, buf, len);
699 /* If no valid key is known yet, send the packets using ANS_KEY requests,
700 to ensure we get to learn the reflexive UDP address. */
701 if(from == myself && !to->status.validkey) {
702 to->incompression = myself->incompression;
703 return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
705 return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
710 if(relay_supported) overhead += sizeof to->id + sizeof from->id;
711 char buf[len + overhead]; char* buf_ptr = buf;
712 if(relay_supported) {
714 /* Inform the recipient that this packet was sent directly. */
715 node_id_t nullid = {};
716 memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
718 memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
720 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
723 /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
724 memcpy(buf_ptr, data, len); buf_ptr += len;
726 const sockaddr_t *sa = NULL;
728 if(relay->status.send_locally)
729 choose_local_address(relay, &sa, &sock);
731 choose_udp_address(relay, &sa, &sock);
732 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
733 if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
734 if(sockmsgsize(sockerrno)) {
735 // Compensate for SPTPS overhead
736 len -= SPTPS_DATAGRAM_OVERHEAD;
737 if(relay->maxmtu >= len)
738 relay->maxmtu = len - 1;
739 if(relay->mtu >= len)
740 relay->mtu = len - 1;
743 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
751 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
752 return send_sptps_data_priv(handle, myself, type, data, len);
755 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
756 node_t *from = handle;
758 if(type == SPTPS_HANDSHAKE) {
759 if(!from->status.validkey) {
760 from->status.validkey = true;
761 from->status.waitingforkey = false;
762 logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
768 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
773 inpkt.offset = DEFAULT_PACKET_OFFSET;
775 if(type == PKT_PROBE) {
777 memcpy(DATA(&inpkt), data, len);
778 udp_probe_h(from, &inpkt, len);
782 if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
783 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
787 /* Check if we have the headers we need */
788 if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
789 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
791 } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
792 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
795 int offset = (type & PKT_MAC) ? 0 : 14;
796 if(type & PKT_COMPRESSED) {
797 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
801 inpkt.len = ulen + offset;
803 if(inpkt.len > MAXSIZE)
806 memcpy(DATA(&inpkt) + offset, data, len);
807 inpkt.len = len + offset;
810 /* Generate the Ethernet packet type if necessary */
812 switch(DATA(&inpkt)[14] >> 4) {
814 DATA(&inpkt)[12] = 0x08;
815 DATA(&inpkt)[13] = 0x00;
818 DATA(&inpkt)[12] = 0x86;
819 DATA(&inpkt)[13] = 0xDD;
822 logger(DEBUG_TRAFFIC, LOG_ERR,
823 "Unknown IP version %d while reading packet from %s (%s)",
824 DATA(&inpkt)[14] >> 4, from->name, from->hostname);
829 receive_packet(from, &inpkt);
833 // This function tries to get SPTPS keys, if they aren't already known.
834 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
835 static void try_sptps(node_t *n) {
836 if(n->status.validkey)
839 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
841 if(!n->status.waitingforkey)
843 else if(n->last_req_key + 10 < now.tv_sec) {
844 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
845 sptps_stop(&n->sptps);
846 n->status.waitingforkey = false;
853 static void send_udp_probe_packet(node_t *n, int len) {
855 packet.offset = DEFAULT_PACKET_OFFSET;
856 memset(DATA(&packet), 0, 14);
857 randomize(DATA(&packet) + 14, len - 14);
861 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
863 send_udppacket(n, &packet);
866 // This function tries to establish a UDP tunnel to a node so that packets can be sent.
867 // If a tunnel is already established, it makes sure it stays up.
868 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
869 static void try_udp(node_t* n) {
874 gettimeofday(&now, NULL);
875 struct timeval ping_tx_elapsed;
876 timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
878 int interval = n->status.udp_confirmed ? udp_discovery_keepalive_interval : udp_discovery_interval;
880 if(ping_tx_elapsed.tv_sec >= interval) {
881 send_udp_probe_packet(n, MAX(n->minmtu, 16));
882 n->udp_ping_sent = now;
884 if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
885 n->status.send_locally = true;
886 send_udp_probe_packet(n, 16);
887 n->status.send_locally = false;
892 static length_t choose_initial_maxmtu(node_t *n) {
897 const sockaddr_t *sa = NULL;
899 choose_udp_address(n, &sa, &sockindex);
903 sock = socket(sa->sa.sa_family, SOCK_DGRAM, IPPROTO_UDP);
905 logger(DEBUG_TRAFFIC, LOG_ERR, "Creating MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
909 if(connect(sock, &sa->sa, SALEN(sa->sa))) {
910 logger(DEBUG_TRAFFIC, LOG_ERR, "Connecting MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
916 socklen_t ip_mtu_len = sizeof ip_mtu;
917 if(getsockopt(sock, IPPROTO_IP, IP_MTU, &ip_mtu, &ip_mtu_len)) {
918 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
925 /* getsockopt(IP_MTU) returns the MTU of the physical interface.
926 We need to remove various overheads to get to the tinc MTU. */
927 length_t mtu = ip_mtu;
928 mtu -= (sa->sa.sa_family == AF_INET6) ? sizeof(struct ip6_hdr) : sizeof(struct ip);
930 if(n->status.sptps) {
931 mtu -= SPTPS_DATAGRAM_OVERHEAD;
932 if((n->options >> 24) >= 4)
933 mtu -= sizeof(node_id_t) + sizeof(node_id_t);
937 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) returned absurdly small value: %d", n->name, n->hostname, ip_mtu);
943 logger(DEBUG_TRAFFIC, LOG_INFO, "Using system-provided maximum tinc MTU for %s (%s): %hd", n->name, n->hostname, mtu);
953 // This function tries to determines the MTU of a node.
954 // By calling this function repeatedly, n->minmtu will be progressively increased, and at some point, n->mtu will be fixed to n->minmtu.
955 // If the MTU is already fixed, this function checks if it can be increased.
956 static void try_mtu(node_t *n) {
957 if(!(n->options & OPTION_PMTU_DISCOVERY))
960 if(udp_discovery && !n->status.udp_confirmed) {
967 /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++
968 mtuprobes == 20: fix MTU, and go to -1
969 mtuprobes == -1: send one >maxmtu probe every pingtimeout */
972 gettimeofday(&now, NULL);
973 struct timeval elapsed;
974 timersub(&now, &n->probe_sent_time, &elapsed);
975 if(n->mtuprobes >= 0) {
976 if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333)
979 if(elapsed.tv_sec < pingtimeout)
986 if(n->mtuprobes < 0) {
987 /* After the initial discovery, we only send one >maxmtu probe
988 to detect PMTU increases. */
989 if(n->maxmtu + 1 < MTU)
990 send_udp_probe_packet(n, n->maxmtu + 1);
992 /* Before initial discovery begins, set maxmtu to the most likely value.
993 If it's underestimated, we will correct it after initial discovery. */
994 if(n->mtuprobes == 0)
995 n->maxmtu = choose_initial_maxmtu(n);
998 /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets,
999 but it will typically increase convergence time in the no-loss case. */
1000 const length_t probes_per_cycle = 8;
1002 /* This magic value was determined using math simulations.
1003 It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe.
1004 Since 1407 is just below the range of tinc MTUs over typical networks,
1005 this fine-tuning allows tinc to cover a lot of ground very quickly.
1006 This fine-tuning is only valid for maxmtu = MTU; if maxmtu is smaller,
1007 then it's better to use a multiplier of 1. Indeed, this leads to an interesting scenario
1008 if choose_initial_maxmtu() returns the actual MTU value - it will get confirmed with one single probe. */
1009 const float multiplier = (n->maxmtu == MTU) ? 0.97 : 1;
1011 const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1;
1012 const length_t minmtu = MAX(n->minmtu, 512);
1013 const float interval = n->maxmtu - minmtu;
1015 /* The core of the discovery algorithm is this exponential.
1016 It produces very large probes early in the cycle, and then it very quickly decreases the probe size.
1017 This reflects the fact that in the most difficult cases, we don't get any feedback for probes that
1018 are too large, and therefore we need to concentrate on small offsets so that we can quickly converge
1019 on the precise MTU as we are approaching it.
1020 The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one
1021 reply per cycle so that we can make progress. */
1022 const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1));
1024 length_t maxmtu = n->maxmtu;
1025 send_udp_probe_packet(n, minmtu + offset);
1026 /* If maxmtu changed, it means the probe was rejected by the system because it was too large.
1027 In that case, we recalculate with the new maxmtu and try again. */
1028 if(n->mtuprobes < 0 || maxmtu == n->maxmtu)
1032 if(n->mtuprobes >= 0)
1036 n->probe_counter = 0;
1037 n->probe_sent_time = now;
1038 n->probe_time = now;
1040 /* Calculate the packet loss of incoming traffic by comparing the rate of
1041 packets received to the rate with which the sequence number has increased.
1042 TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
1045 if(n->received > n->prev_received)
1046 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
1048 n->packetloss = n->received_seqno <= n->prev_received_seqno;
1050 n->prev_received_seqno = n->received_seqno;
1051 n->prev_received = n->received;
1054 // This function tries to establish a tunnel to a node (or its relay) so that packets can be sent (e.g. get SPTPS keys).
1055 // If a tunnel is already established, it tries to improve it (e.g. by trying to establish a UDP tunnel instead of TCP).
1056 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if TCP and/or UDP is usable.
1057 // By calling this function repeatedly, the tunnel is gradually improved until we hit the wall imposed by the underlying network environment.
1058 // It is recommended to call this function every time a packet is sent (or intended to be sent) to a node,
1059 // so that the tunnel keeps improving as packets flow, and then gracefully downgrades itself as it goes idle.
1060 static void try_tx(node_t *n) {
1061 /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
1062 messages anyway, so there's no need for SPTPS at all. Otherwise, get the keys. */
1063 if(n->status.sptps && !(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))) {
1065 if (!n->status.validkey)
1069 node_t *via = (n->via == myself) ? n->nexthop : n->via;
1071 if((myself->options | via->options) & OPTION_TCPONLY)
1074 if(!n->status.sptps && !via->status.validkey && via->last_req_key + 10 <= now.tv_sec) {
1076 via->last_req_key = now.tv_sec;
1077 } else if(via == n || !n->status.sptps || (via->options >> 24) >= 4) {
1082 /* If we don't know how to reach "via" yet, then try to reach it through a relay. */
1083 if(n->status.sptps && !via->status.udp_confirmed && via->nexthop != via && (via->nexthop->options >> 24) >= 4)
1084 try_tx(via->nexthop);
1088 send a packet to the given vpn ip.
1090 void send_packet(node_t *n, vpn_packet_t *packet) {
1095 memcpy(DATA(packet), mymac.x, ETH_ALEN);
1097 n->out_bytes += packet->len;
1098 devops.write(packet);
1102 logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
1103 packet->len, n->name, n->hostname);
1105 if(!n->status.reachable) {
1106 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
1107 n->name, n->hostname);
1112 n->out_bytes += packet->len;
1114 if(n->status.sptps) {
1115 send_sptps_packet(n, packet);
1119 via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
1122 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
1123 n->name, via->name, n->via->hostname);
1125 if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
1126 if(!send_tcppacket(via->connection, packet))
1127 terminate_connection(via->connection, true);
1129 send_udppacket(via, packet);
1132 /* Try to improve the tunnel.
1133 Note that we do this *after* we send the packet because sending actual packets take priority
1134 with regard to the send buffer space and latency. */
1138 /* Broadcast a packet using the minimum spanning tree */
1140 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1141 // Always give ourself a copy of the packet.
1143 send_packet(myself, packet);
1145 // In TunnelServer mode, do not forward broadcast packets.
1146 // The MST might not be valid and create loops.
1147 if(tunnelserver || broadcast_mode == BMODE_NONE)
1150 logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1151 packet->len, from->name, from->hostname);
1153 switch(broadcast_mode) {
1154 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1155 // This guarantees all nodes receive the broadcast packet, and
1156 // usually distributes the sending of broadcast packets over all nodes.
1158 for list_each(connection_t, c, connection_list)
1159 if(c->edge && c->status.mst && c != from->nexthop->connection)
1160 send_packet(c->node, packet);
1163 // In direct mode, we send copies to each node we know of.
1164 // However, this only reaches nodes that can be reached in a single hop.
1165 // We don't have enough information to forward broadcast packets in this case.
1170 for splay_each(node_t, n, node_tree)
1171 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1172 send_packet(n, packet);
1180 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1183 static time_t last_hard_try = 0;
1185 for splay_each(edge_t, e, edge_weight_tree) {
1186 if(!e->to->status.reachable || e->to == myself)
1189 if(sockaddrcmp_noport(from, &e->address)) {
1190 if(last_hard_try == now.tv_sec)
1195 if(!try_mac(e->to, pkt))
1203 last_hard_try = now.tv_sec;
1205 last_hard_try = now.tv_sec;
1209 void handle_incoming_vpn_data(void *data, int flags) {
1210 listen_socket_t *ls = data;
1213 node_id_t nullid = {};
1214 sockaddr_t addr = {};
1215 socklen_t addrlen = sizeof addr;
1217 bool direct = false;
1220 int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1222 if(len <= 0 || len > MAXSIZE) {
1223 if(!sockwouldblock(sockerrno))
1224 logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1230 sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1232 // Try to figure out who sent this packet.
1234 node_t *n = lookup_node_udp(&addr);
1237 // It might be from a 1.1 node, which might have a source ID in the packet.
1238 pkt.offset = 2 * sizeof(node_id_t);
1239 from = lookup_node_id(SRCID(&pkt));
1240 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1241 if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1250 n = try_harder(&addr, &pkt);
1255 if(debug_level >= DEBUG_PROTOCOL) {
1256 hostname = sockaddr2hostname(&addr);
1257 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1263 if(n->status.sptps) {
1264 pkt.offset = 2 * sizeof(node_id_t);
1266 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1271 from = lookup_node_id(SRCID(&pkt));
1272 to = lookup_node_id(DSTID(&pkt));
1275 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1280 send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1289 if(!receive_udppacket(from, &pkt))
1292 n->sock = ls - listen_socket;
1293 if(direct && sockaddrcmp(&addr, &n->address))
1294 update_node_udp(n, &addr);
1297 void handle_device_data(void *data, int flags) {
1298 vpn_packet_t packet;
1299 packet.offset = DEFAULT_PACKET_OFFSET;
1300 packet.priority = 0;
1302 if(devops.read(&packet)) {
1303 myself->in_packets++;
1304 myself->in_bytes += packet.len;
1305 route(myself, &packet);