2 net_packet.c -- Handles in- and outgoing VPN packets
3 Copyright (C) 1998-2005 Ivo Timmermans,
4 2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5 2010 Timothy Redaelli <timothy@redaelli.eu>
6 2010 Brandon Black <blblack@gmail.com>
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
35 #include "connection.h"
52 #define MAX(a, b) ((a) > (b) ? (a) : (b))
55 /* The minimum size of a probe is 14 bytes, but since we normally use CBC mode
56 encryption, we can add a few extra random bytes without increasing the
57 resulting packet size. */
58 #define MIN_PROBE_SIZE 18
62 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
65 static void send_udppacket(node_t *, vpn_packet_t *);
67 unsigned replaywin = 32;
68 bool localdiscovery = true;
69 bool udp_discovery = true;
70 int udp_discovery_keepalive_interval = 10;
71 int udp_discovery_interval = 2;
72 int udp_discovery_timeout = 30;
74 #define MAX_SEQNO 1073741824
76 static void try_fix_mtu(node_t *n) {
80 if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) {
81 if(n->minmtu > n->maxmtu)
82 n->minmtu = n->maxmtu;
84 n->maxmtu = n->minmtu;
86 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
91 static void udp_probe_timeout_handler(void *data) {
93 if(!n->status.udp_confirmed)
96 logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname);
97 n->status.udp_confirmed = false;
104 static void send_udp_probe_reply(node_t *n, vpn_packet_t *packet, length_t len) {
105 if(!n->status.sptps && !n->status.validkey) {
106 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP probe reply to %s (%s) but we don't have his key yet", n->name, n->hostname);
110 /* Type 2 probe replies were introduced in protocol 17.3 */
111 if ((n->options >> 24) >= 3) {
113 uint16_t len16 = htons(len);
114 memcpy(DATA(packet) + 1, &len16, 2);
115 packet->len = MIN_PROBE_SIZE;
116 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending type 2 probe reply length %u to %s (%s)", len, n->name, n->hostname);
119 /* Legacy protocol: n won't understand type 2 probe replies. */
121 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending type 1 probe reply length %u to %s (%s)", len, n->name, n->hostname);
124 /* Temporarily set udp_confirmed, so that the reply is sent
125 back exactly the way it came in. */
127 bool udp_confirmed = n->status.udp_confirmed;
128 n->status.udp_confirmed = true;
129 send_udppacket(n, packet);
130 n->status.udp_confirmed = udp_confirmed;
133 static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
134 if(!DATA(packet)[0]) {
135 logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname);
136 return send_udp_probe_reply(n, packet, len);
139 if (DATA(packet)[0] == 2) {
140 // It's a type 2 probe reply, use the length field inside the packet
142 memcpy(&len16, DATA(packet) + 1, 2);
146 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d UDP probe reply %d from %s (%s)", DATA(packet)[0], len, n->name, n->hostname);
148 /* It's a valid reply: now we know bidirectional communication
149 is possible using the address and socket that the reply
151 n->status.udp_confirmed = true;
153 // Reset the UDP ping timer.
154 n->udp_ping_sent = now;
157 timeout_del(&n->udp_ping_timeout);
158 timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
161 if(len > n->maxmtu) {
162 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
165 /* Set mtuprobes to 1 so that try_mtu() doesn't reset maxmtu */
168 } else if(n->mtuprobes < 0 && len == n->maxmtu) {
169 /* We got a maxmtu sized packet, confirming the PMTU is still valid. */
171 n->mtu_ping_sent = now;
174 /* If applicable, raise the minimum supported MTU */
176 if(n->minmtu < len) {
182 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
184 memcpy(dest, source, len);
186 } else if(level == 10) {
188 lzo_uint lzolen = MAXSIZE;
189 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
194 } else if(level < 10) {
196 unsigned long destlen = MAXSIZE;
197 if(compress2(dest, &destlen, source, len, level) == Z_OK)
204 lzo_uint lzolen = MAXSIZE;
205 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
215 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
217 memcpy(dest, source, len);
219 } else if(level > 9) {
221 lzo_uint lzolen = MAXSIZE;
222 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
230 unsigned long destlen = MAXSIZE;
231 if(uncompress(dest, &destlen, source, len) == Z_OK)
243 static void receive_packet(node_t *n, vpn_packet_t *packet) {
244 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
245 packet->len, n->name, n->hostname);
248 n->in_bytes += packet->len;
253 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
255 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
257 #ifdef DISABLE_LEGACY
260 if(!n->status.validkey_in || !digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
263 return digest_verify(n->indigest, inpkt->data, inpkt->len - digest_length(n->indigest), inpkt->data + inpkt->len - digest_length(n->indigest));
267 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
268 vpn_packet_t pkt1, pkt2;
269 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
272 pkt1.offset = DEFAULT_PACKET_OFFSET;
273 pkt2.offset = DEFAULT_PACKET_OFFSET;
275 if(n->status.sptps) {
276 if(!n->sptps.state) {
277 if(!n->status.waitingforkey) {
278 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
281 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
285 inpkt->offset += 2 * sizeof(node_id_t);
286 n->status.udppacket = true;
287 bool result = sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t));
288 n->status.udppacket = false;
291 /* Uh-oh. It might be that the tunnel is stuck in some corrupted state,
292 so let's restart SPTPS in case that helps. But don't do that too often
293 to prevent storms, and because that would make life a little too easy
294 for external attackers trying to DoS us. */
295 if(n->last_req_key < now.tv_sec - 10) {
296 logger(DEBUG_PROTOCOL, LOG_ERR, "Failed to decode raw TCP packet from %s (%s), restarting SPTPS", n->name, n->hostname);
304 #ifdef DISABLE_LEGACY
307 if(!n->status.validkey_in) {
308 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
312 /* Check packet length */
314 if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
315 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
316 n->name, n->hostname);
320 /* It's a legacy UDP packet, the data starts after the seqno */
322 inpkt->offset += sizeof(seqno_t);
324 /* Check the message authentication code */
326 if(digest_active(n->indigest)) {
327 inpkt->len -= digest_length(n->indigest);
328 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
329 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
333 /* Decrypt the packet */
335 if(cipher_active(n->incipher)) {
336 vpn_packet_t *outpkt = pkt[nextpkt++];
339 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
340 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
344 outpkt->len = outlen;
348 /* Check the sequence number */
351 memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
352 seqno = ntohl(seqno);
353 inpkt->len -= sizeof seqno;
356 if(seqno != n->received_seqno + 1) {
357 if(seqno >= n->received_seqno + replaywin * 8) {
358 if(n->farfuture++ < replaywin >> 2) {
359 logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
360 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
363 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
364 seqno - n->received_seqno - 1, n->name, n->hostname);
365 memset(n->late, 0, replaywin);
366 } else if (seqno <= n->received_seqno) {
367 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
368 logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
369 n->name, n->hostname, seqno, n->received_seqno);
373 for(int i = n->received_seqno + 1; i < seqno; i++)
374 n->late[(i / 8) % replaywin] |= 1 << i % 8;
379 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
382 if(seqno > n->received_seqno)
383 n->received_seqno = seqno;
387 if(n->received_seqno > MAX_SEQNO)
390 /* Decompress the packet */
392 length_t origlen = inpkt->len;
394 if(n->incompression) {
395 vpn_packet_t *outpkt = pkt[nextpkt++];
397 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
398 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
399 n->name, n->hostname);
405 origlen -= MTU/64 + 20;
408 if(inpkt->len > n->maxrecentlen)
409 n->maxrecentlen = inpkt->len;
413 if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
414 udp_probe_h(n, inpkt, origlen);
416 receive_packet(n, inpkt);
421 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
423 outpkt.offset = DEFAULT_PACKET_OFFSET;
425 if(len > sizeof outpkt.data - outpkt.offset)
429 if(c->options & OPTION_TCPONLY)
432 outpkt.priority = -1;
433 memcpy(DATA(&outpkt), buffer, len);
435 receive_packet(c->node, &outpkt);
438 bool receive_tcppacket_sptps(connection_t *c, const char *data, int len) {
439 if (len < sizeof(node_id_t) + sizeof(node_id_t)) {
440 logger(DEBUG_ALWAYS, LOG_ERR, "Got too short TCP SPTPS packet from %s (%s)", c->name, c->hostname);
444 node_t *to = lookup_node_id((node_id_t *)data);
445 data += sizeof(node_id_t); len -= sizeof(node_id_t);
447 logger(DEBUG_PROTOCOL, LOG_ERR, "Got TCP SPTPS packet from %s (%s) with unknown destination ID", c->name, c->hostname);
451 node_t *from = lookup_node_id((node_id_t *)data);
452 data += sizeof(node_id_t); len -= sizeof(node_id_t);
454 logger(DEBUG_PROTOCOL, LOG_ERR, "Got TCP SPTPS packet from %s (%s) with unknown source ID", c->name, c->hostname);
458 /* Help the sender reach us over UDP.
459 Note that we only do this if we're the destination or the static relay;
460 otherwise every hop would initiate its own UDP info message, resulting in elevated chatter. */
461 if(to->via == myself)
462 send_udp_info(myself, from);
464 /* If we're not the final recipient, relay the packet. */
467 send_sptps_data(to, from, 0, data, len);
472 /* The packet is for us */
474 if(!sptps_receive_data(&from->sptps, data, len)) {
475 /* Uh-oh. It might be that the tunnel is stuck in some corrupted state,
476 so let's restart SPTPS in case that helps. But don't do that too often
477 to prevent storms. */
478 if(from->last_req_key < now.tv_sec - 10) {
479 logger(DEBUG_PROTOCOL, LOG_ERR, "Failed to decode raw TCP packet from %s (%s), restarting SPTPS", from->name, from->hostname);
485 send_mtu_info(myself, from, MTU);
489 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
490 if(!n->status.validkey && !n->connection)
496 if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
497 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
501 if(routing_mode == RMODE_ROUTER)
506 if(origpkt->len < offset)
511 if(n->outcompression) {
513 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
515 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
516 } else if(len < origpkt->len - offset) {
517 outpkt.len = len + offset;
519 type |= PKT_COMPRESSED;
523 /* If we have a direct metaconnection to n, and we can't use UDP, then
524 don't bother with SPTPS and just use a "plaintext" PACKET message.
525 We don't really care about end-to-end security since we're not
526 sending the message through any intermediate nodes. */
527 if(n->connection && origpkt->len > n->minmtu)
528 send_tcppacket(n->connection, origpkt);
530 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
534 static void adapt_socket(const sockaddr_t *sa, int *sock) {
535 /* Make sure we have a suitable socket for the chosen address */
536 if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
537 for(int i = 0; i < listen_sockets; i++) {
538 if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
546 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
551 /* If the UDP address is confirmed, use it. */
552 if(n->status.udp_confirmed)
555 /* Send every third packet to n->address; that could be set
556 to the node's reflexive UDP address discovered during key
565 /* Otherwise, address are found in edges to this node.
566 So we pick a random edge and a random socket. */
569 int j = rand() % n->edge_tree->count;
570 edge_t *candidate = NULL;
572 for splay_each(edge_t, e, n->edge_tree) {
574 candidate = e->reverse;
580 *sa = &candidate->address;
581 *sock = rand() % listen_sockets;
584 adapt_socket(*sa, sock);
587 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
590 /* Pick one of the edges from this node at random, then use its local address. */
593 int j = rand() % n->edge_tree->count;
594 edge_t *candidate = NULL;
596 for splay_each(edge_t, e, n->edge_tree) {
603 if (candidate && candidate->local_address.sa.sa_family) {
604 *sa = &candidate->local_address;
605 *sock = rand() % listen_sockets;
606 adapt_socket(*sa, sock);
610 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
611 vpn_packet_t pkt1, pkt2;
612 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
613 vpn_packet_t *inpkt = origpkt;
615 vpn_packet_t *outpkt;
616 int origlen = origpkt->len;
618 #if defined(SOL_IP) && defined(IP_TOS)
619 static int priority = 0;
620 int origpriority = origpkt->priority;
623 pkt1.offset = DEFAULT_PACKET_OFFSET;
624 pkt2.offset = DEFAULT_PACKET_OFFSET;
626 if(!n->status.reachable) {
627 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
632 return send_sptps_packet(n, origpkt);
634 #ifdef DISABLE_LEGACY
637 /* Make sure we have a valid key */
639 if(!n->status.validkey) {
640 logger(DEBUG_TRAFFIC, LOG_INFO,
641 "No valid key known yet for %s (%s), forwarding via TCP",
642 n->name, n->hostname);
643 send_tcppacket(n->nexthop->connection, origpkt);
647 if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
648 logger(DEBUG_TRAFFIC, LOG_INFO,
649 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
650 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
653 send_packet(n->nexthop, origpkt);
655 send_tcppacket(n->nexthop->connection, origpkt);
660 /* Compress the packet */
662 if(n->outcompression) {
663 outpkt = pkt[nextpkt++];
665 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
666 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
667 n->name, n->hostname);
674 /* Add sequence number */
676 seqno_t seqno = htonl(++(n->sent_seqno));
677 memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
678 inpkt->len += sizeof seqno;
680 /* Encrypt the packet */
682 if(cipher_active(n->outcipher)) {
683 outpkt = pkt[nextpkt++];
686 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
687 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
691 outpkt->len = outlen;
695 /* Add the message authentication code */
697 if(digest_active(n->outdigest)) {
698 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
699 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
703 inpkt->len += digest_length(n->outdigest);
706 /* Send the packet */
708 const sockaddr_t *sa = NULL;
711 if(n->status.send_locally)
712 choose_local_address(n, &sa, &sock);
714 choose_udp_address(n, &sa, &sock);
716 #if defined(SOL_IP) && defined(IP_TOS)
717 if(priorityinheritance && origpriority != priority
718 && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
719 priority = origpriority;
720 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
721 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
722 logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
726 if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
727 if(sockmsgsize(sockerrno)) {
728 if(n->maxmtu >= origlen)
729 n->maxmtu = origlen - 1;
730 if(n->mtu >= origlen)
731 n->mtu = origlen - 1;
734 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
738 origpkt->len = origlen;
742 bool send_sptps_data(node_t *to, node_t *from, int type, const void *data, size_t len) {
743 node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
744 bool direct = from == myself && to == relay;
745 bool relay_supported = (relay->options >> 24) >= 4;
746 bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
748 /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU. */
750 if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
751 if(type != SPTPS_HANDSHAKE && (to->nexthop->connection->options >> 24) >= 7) {
752 char buf[len + sizeof to->id + sizeof from->id]; char* buf_ptr = buf;
753 memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
754 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
755 memcpy(buf_ptr, data, len); buf_ptr += len;
756 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s) (TCP)", from->name, from->hostname, to->name, to->hostname, to->nexthop->name, to->nexthop->hostname);
757 return send_sptps_tcppacket(to->nexthop->connection, buf, sizeof buf);
760 char buf[len * 4 / 3 + 5];
761 b64encode(data, buf, len);
762 /* If this is a handshake packet, use ANS_KEY instead of REQ_KEY, for two reasons:
763 - We don't want intermediate nodes to switch to UDP to relay these packets;
764 - ANS_KEY allows us to learn the reflexive UDP address. */
765 if(type == SPTPS_HANDSHAKE) {
766 to->incompression = myself->incompression;
767 return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
769 return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, SPTPS_PACKET, buf);
774 if(relay_supported) overhead += sizeof to->id + sizeof from->id;
775 char buf[len + overhead]; char* buf_ptr = buf;
776 if(relay_supported) {
778 /* Inform the recipient that this packet was sent directly. */
779 node_id_t nullid = {};
780 memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
782 memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
784 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
787 /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
788 memcpy(buf_ptr, data, len); buf_ptr += len;
790 const sockaddr_t *sa = NULL;
792 if(relay->status.send_locally)
793 choose_local_address(relay, &sa, &sock);
795 choose_udp_address(relay, &sa, &sock);
796 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s) (UDP)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
797 if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
798 if(sockmsgsize(sockerrno)) {
799 // Compensate for SPTPS overhead
800 len -= SPTPS_DATAGRAM_OVERHEAD;
801 if(relay->maxmtu >= len)
802 relay->maxmtu = len - 1;
803 if(relay->mtu >= len)
804 relay->mtu = len - 1;
807 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
815 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
816 node_t *from = handle;
818 if(type == SPTPS_HANDSHAKE) {
819 if(!from->status.validkey) {
820 from->status.validkey = true;
821 from->status.waitingforkey = false;
822 logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
828 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
833 inpkt.offset = DEFAULT_PACKET_OFFSET;
835 if(type == PKT_PROBE) {
836 if(!from->status.udppacket) {
837 logger(DEBUG_ALWAYS, LOG_ERR, "Got SPTPS PROBE packet from %s (%s) via TCP", from->name, from->hostname);
841 memcpy(DATA(&inpkt), data, len);
842 if(inpkt.len > from->maxrecentlen)
843 from->maxrecentlen = inpkt.len;
844 udp_probe_h(from, &inpkt, len);
848 if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
849 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
853 /* Check if we have the headers we need */
854 if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
855 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
857 } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
858 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
861 int offset = (type & PKT_MAC) ? 0 : 14;
862 if(type & PKT_COMPRESSED) {
863 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
867 inpkt.len = ulen + offset;
869 if(inpkt.len > MAXSIZE)
872 memcpy(DATA(&inpkt) + offset, data, len);
873 inpkt.len = len + offset;
876 /* Generate the Ethernet packet type if necessary */
878 switch(DATA(&inpkt)[14] >> 4) {
880 DATA(&inpkt)[12] = 0x08;
881 DATA(&inpkt)[13] = 0x00;
884 DATA(&inpkt)[12] = 0x86;
885 DATA(&inpkt)[13] = 0xDD;
888 logger(DEBUG_TRAFFIC, LOG_ERR,
889 "Unknown IP version %d while reading packet from %s (%s)",
890 DATA(&inpkt)[14] >> 4, from->name, from->hostname);
895 if(from->status.udppacket && inpkt.len > from->maxrecentlen)
896 from->maxrecentlen = inpkt.len;
898 receive_packet(from, &inpkt);
902 // This function tries to get SPTPS keys, if they aren't already known.
903 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
904 static void try_sptps(node_t *n) {
905 if(n->status.validkey)
908 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
910 if(!n->status.waitingforkey)
912 else if(n->last_req_key + 10 < now.tv_sec) {
913 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
914 sptps_stop(&n->sptps);
915 n->status.waitingforkey = false;
922 static void send_udp_probe_packet(node_t *n, int len) {
924 packet.offset = DEFAULT_PACKET_OFFSET;
925 memset(DATA(&packet), 0, 14);
926 randomize(DATA(&packet) + 14, len - 14);
930 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
932 send_udppacket(n, &packet);
935 // This function tries to establish a UDP tunnel to a node so that packets can be sent.
936 // If a tunnel is already established, it makes sure it stays up.
937 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
938 static void try_udp(node_t* n) {
942 /* Send gratuitous probe replies to 1.1 nodes. */
944 if((n->options >> 24) >= 3 && n->status.udp_confirmed) {
945 struct timeval ping_tx_elapsed;
946 timersub(&now, &n->udp_reply_sent, &ping_tx_elapsed);
948 if(ping_tx_elapsed.tv_sec >= udp_discovery_keepalive_interval - 1) {
949 n->udp_reply_sent = now;
950 if(n->maxrecentlen) {
952 pkt.len = n->maxrecentlen;
953 pkt.offset = DEFAULT_PACKET_OFFSET;
954 memset(DATA(&pkt), 0, 14);
955 randomize(DATA(&pkt) + 14, MIN_PROBE_SIZE - 14);
956 send_udp_probe_reply(n, &pkt, pkt.len);
964 struct timeval ping_tx_elapsed;
965 timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
967 int interval = n->status.udp_confirmed ? udp_discovery_keepalive_interval : udp_discovery_interval;
969 if(ping_tx_elapsed.tv_sec >= interval) {
970 send_udp_probe_packet(n, MIN_PROBE_SIZE);
971 n->udp_ping_sent = now;
973 if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
974 n->status.send_locally = true;
975 send_udp_probe_packet(n, MIN_PROBE_SIZE);
976 n->status.send_locally = false;
981 static length_t choose_initial_maxmtu(node_t *n) {
986 const sockaddr_t *sa = NULL;
988 choose_udp_address(n, &sa, &sockindex);
992 sock = socket(sa->sa.sa_family, SOCK_DGRAM, IPPROTO_UDP);
994 logger(DEBUG_TRAFFIC, LOG_ERR, "Creating MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
998 if(connect(sock, &sa->sa, SALEN(sa->sa))) {
999 logger(DEBUG_TRAFFIC, LOG_ERR, "Connecting MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
1005 socklen_t ip_mtu_len = sizeof ip_mtu;
1006 if(getsockopt(sock, IPPROTO_IP, IP_MTU, &ip_mtu, &ip_mtu_len)) {
1007 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
1014 /* getsockopt(IP_MTU) returns the MTU of the physical interface.
1015 We need to remove various overheads to get to the tinc MTU. */
1016 length_t mtu = ip_mtu;
1017 mtu -= (sa->sa.sa_family == AF_INET6) ? sizeof(struct ip6_hdr) : sizeof(struct ip);
1019 if(n->status.sptps) {
1020 mtu -= SPTPS_DATAGRAM_OVERHEAD;
1021 if((n->options >> 24) >= 4)
1022 mtu -= sizeof(node_id_t) + sizeof(node_id_t);
1023 #ifndef DISABLE_LEGACY
1025 mtu -= digest_length(n->outdigest);
1027 /* Now it's tricky. We use CBC mode, so the length of the
1028 encrypted payload must be a multiple of the blocksize. The
1029 sequence number is also part of the encrypted payload, so we
1030 must account for it after correcting for the blocksize.
1031 Furthermore, the padding in the last block must be at least
1034 length_t blocksize = cipher_blocksize(n->outcipher);
1047 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) returned absurdly small value: %d", n->name, n->hostname, ip_mtu);
1053 logger(DEBUG_TRAFFIC, LOG_INFO, "Using system-provided maximum tinc MTU for %s (%s): %hd", n->name, n->hostname, mtu);
1063 /* This function tries to determines the MTU of a node.
1064 By calling this function repeatedly, n->minmtu will be progressively
1065 increased, and at some point, n->mtu will be fixed to n->minmtu. If the MTU
1066 is already fixed, this function checks if it can be increased.
1069 static void try_mtu(node_t *n) {
1070 if(!(n->options & OPTION_PMTU_DISCOVERY))
1073 if(udp_discovery && !n->status.udp_confirmed) {
1074 n->maxrecentlen = 0;
1081 /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++
1082 mtuprobes == 20: fix MTU, and go to -1
1083 mtuprobes == -1: send one maxmtu and one maxmtu+1 probe every pinginterval
1084 mtuprobes ==-2..-3: send one maxmtu probe every second
1085 mtuprobes == -4: maxmtu no longer valid, reset minmtu and maxmtu and go to 0 */
1087 struct timeval elapsed;
1088 timersub(&now, &n->mtu_ping_sent, &elapsed);
1089 if(n->mtuprobes >= 0) {
1090 if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333)
1093 if(n->mtuprobes < -1) {
1094 if(elapsed.tv_sec < 1)
1097 if(elapsed.tv_sec < pinginterval)
1102 n->mtu_ping_sent = now;
1106 if(n->mtuprobes < -3) {
1107 /* We lost three MTU probes, restart discovery */
1108 logger(DEBUG_TRAFFIC, LOG_INFO, "Decrease in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
1113 if(n->mtuprobes < 0) {
1114 /* After the initial discovery, we only send one maxmtu and one
1115 maxmtu+1 probe to detect PMTU increases. */
1116 send_udp_probe_packet(n, n->maxmtu);
1117 if(n->mtuprobes == -1 && n->maxmtu + 1 < MTU)
1118 send_udp_probe_packet(n, n->maxmtu + 1);
1121 /* Before initial discovery begins, set maxmtu to the most likely value.
1122 If it's underestimated, we will correct it after initial discovery. */
1123 if(n->mtuprobes == 0)
1124 n->maxmtu = choose_initial_maxmtu(n);
1127 /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets,
1128 but it will typically increase convergence time in the no-loss case. */
1129 const length_t probes_per_cycle = 8;
1131 /* This magic value was determined using math simulations.
1132 It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe.
1133 Since 1407 is just below the range of tinc MTUs over typical networks,
1134 this fine-tuning allows tinc to cover a lot of ground very quickly.
1135 This fine-tuning is only valid for maxmtu = MTU; if maxmtu is smaller,
1136 then it's better to use a multiplier of 1. Indeed, this leads to an interesting scenario
1137 if choose_initial_maxmtu() returns the actual MTU value - it will get confirmed with one single probe. */
1138 const float multiplier = (n->maxmtu == MTU) ? 0.97 : 1;
1140 const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1;
1141 const length_t minmtu = MAX(n->minmtu, 512);
1142 const float interval = n->maxmtu - minmtu;
1144 /* The core of the discovery algorithm is this exponential.
1145 It produces very large probes early in the cycle, and then it very quickly decreases the probe size.
1146 This reflects the fact that in the most difficult cases, we don't get any feedback for probes that
1147 are too large, and therefore we need to concentrate on small offsets so that we can quickly converge
1148 on the precise MTU as we are approaching it.
1149 The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one
1150 reply per cycle so that we can make progress. */
1151 const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1));
1153 length_t maxmtu = n->maxmtu;
1154 send_udp_probe_packet(n, minmtu + offset);
1155 /* If maxmtu changed, it means the probe was rejected by the system because it was too large.
1156 In that case, we recalculate with the new maxmtu and try again. */
1157 if(n->mtuprobes < 0 || maxmtu == n->maxmtu)
1161 if(n->mtuprobes >= 0)
1166 /* These functions try to establish a tunnel to a node (or its relay) so that
1167 packets can be sent (e.g. exchange keys).
1168 If a tunnel is already established, it tries to improve it (e.g. by trying
1169 to establish a UDP tunnel instead of TCP). This function makes no
1170 guarantees - it is up to the caller to check the node's state to figure out
1171 if TCP and/or UDP is usable. By calling this function repeatedly, the
1172 tunnel is gradually improved until we hit the wall imposed by the underlying
1173 network environment. It is recommended to call this function every time a
1174 packet is sent (or intended to be sent) to a node, so that the tunnel keeps
1175 improving as packets flow, and then gracefully downgrades itself as it goes
1179 static void try_tx_sptps(node_t *n, bool mtu) {
1180 /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
1181 messages anyway, so there's no need for SPTPS at all. */
1183 if(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))
1186 /* Otherwise, try to do SPTPS authentication with n if necessary. */
1190 /* Do we need to statically relay packets? */
1192 node_t *via = (n->via == myself) ? n->nexthop : n->via;
1194 /* If the static relay doesn't support SPTPS, everything goes via TCP anyway. */
1196 if((via->options >> 24) < 4)
1199 /* If we do have a static relay, try everything with that one instead. */
1202 return try_tx_sptps(via, mtu);
1204 /* Otherwise, try to establish UDP connectivity. */
1210 /* If we don't have UDP connectivity (yet), we need to use a dynamic relay (nexthop)
1211 while we try to establish direct connectivity. */
1213 if(!n->status.udp_confirmed && n != n->nexthop && (n->nexthop->options >> 24) >= 4)
1214 try_tx_sptps(n->nexthop, mtu);
1217 static void try_tx_legacy(node_t *n, bool mtu) {
1218 /* Does he have our key? If not, send one. */
1220 if(!n->status.validkey_in)
1223 /* Check if we already have a key, or request one. */
1225 if(!n->status.validkey) {
1226 if(n->last_req_key + 10 <= now.tv_sec) {
1228 n->last_req_key = now.tv_sec;
1238 void try_tx(node_t *n, bool mtu) {
1240 try_tx_sptps(n, mtu);
1242 try_tx_legacy(n, mtu);
1245 void send_packet(node_t *n, vpn_packet_t *packet) {
1246 // If it's for myself, write it to the tun/tap device.
1250 memcpy(DATA(packet), mymac.x, ETH_ALEN);
1252 n->out_bytes += packet->len;
1253 devops.write(packet);
1257 logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)", packet->len, n->name, n->hostname);
1259 // If the node is not reachable, drop it.
1261 if(!n->status.reachable) {
1262 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable", n->name, n->hostname);
1266 // Keep track of packet statistics.
1269 n->out_bytes += packet->len;
1271 // Check if it should be sent as an SPTPS packet.
1273 if(n->status.sptps) {
1274 send_sptps_packet(n, packet);
1275 try_tx_sptps(n, true);
1279 // Determine which node to actually send it to.
1281 node_t *via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
1284 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)", n->name, via->name, n->via->hostname);
1286 // Try to send via UDP, unless TCP is forced.
1288 if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
1289 if(!send_tcppacket(via->connection, packet))
1290 terminate_connection(via->connection, true);
1294 send_udppacket(via, packet);
1295 try_tx_legacy(via, true);
1298 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1299 // Always give ourself a copy of the packet.
1301 send_packet(myself, packet);
1303 // In TunnelServer mode, do not forward broadcast packets.
1304 // The MST might not be valid and create loops.
1305 if(tunnelserver || broadcast_mode == BMODE_NONE)
1308 logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1309 packet->len, from->name, from->hostname);
1311 switch(broadcast_mode) {
1312 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1313 // This guarantees all nodes receive the broadcast packet, and
1314 // usually distributes the sending of broadcast packets over all nodes.
1316 for list_each(connection_t, c, connection_list)
1317 if(c->edge && c->status.mst && c != from->nexthop->connection)
1318 send_packet(c->node, packet);
1321 // In direct mode, we send copies to each node we know of.
1322 // However, this only reaches nodes that can be reached in a single hop.
1323 // We don't have enough information to forward broadcast packets in this case.
1328 for splay_each(node_t, n, node_tree)
1329 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1330 send_packet(n, packet);
1338 /* We got a packet from some IP address, but we don't know who sent it. Try to
1339 verify the message authentication code against all active session keys.
1340 Since this is actually an expensive operation, we only do a full check once
1341 a minute, the rest of the time we only check against nodes for which we know
1342 an IP address that matches the one from the packet. */
1344 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1345 node_t *match = NULL;
1347 static time_t last_hard_try = 0;
1349 for splay_each(node_t, n, node_tree) {
1350 if(!n->status.reachable || n == myself)
1353 if((n->status.sptps && !n->sptps.instate) || !n->status.validkey_in)
1358 for splay_each(edge_t, e, n->edge_tree) {
1361 if(!sockaddrcmp_noport(from, &e->reverse->address)) {
1368 if(last_hard_try == now.tv_sec)
1373 if(!try_mac(n, pkt))
1381 last_hard_try = now.tv_sec;
1386 void handle_incoming_vpn_data(void *data, int flags) {
1387 listen_socket_t *ls = data;
1390 node_id_t nullid = {};
1391 sockaddr_t addr = {};
1392 socklen_t addrlen = sizeof addr;
1394 bool direct = false;
1397 int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1399 if(len <= 0 || len > MAXSIZE) {
1400 if(!sockwouldblock(sockerrno))
1401 logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1407 sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1409 // Try to figure out who sent this packet.
1411 node_t *n = lookup_node_udp(&addr);
1413 if(n && !n->status.udp_confirmed)
1414 n = NULL; // Don't believe it if we don't have confirmation yet.
1417 // It might be from a 1.1 node, which might have a source ID in the packet.
1418 pkt.offset = 2 * sizeof(node_id_t);
1419 from = lookup_node_id(SRCID(&pkt));
1420 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1421 if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1430 n = try_harder(&addr, &pkt);
1435 if(debug_level >= DEBUG_PROTOCOL) {
1436 hostname = sockaddr2hostname(&addr);
1437 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1443 if(n->status.sptps) {
1444 pkt.offset = 2 * sizeof(node_id_t);
1446 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1451 from = lookup_node_id(SRCID(&pkt));
1452 to = lookup_node_id(DSTID(&pkt));
1455 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1459 /* The packet is supposed to come from the originator or its static relay
1460 (i.e. with no dynamic relays in between).
1461 If it did not, "help" the static relay by sending it UDP info.
1462 Note that we only do this if we're the destination or the static relay;
1463 otherwise every hop would initiate its own UDP info message, resulting in elevated chatter. */
1465 if(n != from->via && to->via == myself)
1466 send_udp_info(myself, from);
1468 /* If we're not the final recipient, relay the packet. */
1471 send_sptps_data(to, from, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1472 try_tx_sptps(to, true);
1481 if(!receive_udppacket(from, &pkt))
1484 n->sock = ls - listen_socket;
1485 if(direct && sockaddrcmp(&addr, &n->address))
1486 update_node_udp(n, &addr);
1488 /* If the packet went through a relay, help the sender find the appropriate MTU
1489 through the relay path. */
1492 send_mtu_info(myself, n, MTU);
1495 void handle_device_data(void *data, int flags) {
1496 vpn_packet_t packet;
1497 packet.offset = DEFAULT_PACKET_OFFSET;
1498 packet.priority = 0;
1500 if(devops.read(&packet)) {
1501 myself->in_packets++;
1502 myself->in_bytes += packet.len;
1503 route(myself, &packet);