+static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
+ node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
+ bool direct = from == myself && to == relay;
+ bool relay_supported = (relay->options >> 24) >= 4;
+ bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
+
+ /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU. */
+
+ if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
+ char buf[len * 4 / 3 + 5];
+ b64encode(data, buf, len);
+ /* If no valid key is known yet, send the packets using ANS_KEY requests,
+ to ensure we get to learn the reflexive UDP address. */
+ if(from == myself && !to->status.validkey) {
+ to->incompression = myself->incompression;
+ return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
+ } else {
+ return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
+ }
+ }
+
+ size_t overhead = 0;
+ if(relay_supported) overhead += sizeof to->id + sizeof from->id;
+ char buf[len + overhead]; char* buf_ptr = buf;
+ if(relay_supported) {
+ if(direct) {
+ /* Inform the recipient that this packet was sent directly. */
+ node_id_t nullid = {};
+ memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
+ } else {
+ memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
+ }
+ memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
+
+ }
+ /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
+ memcpy(buf_ptr, data, len); buf_ptr += len;
+
+ const sockaddr_t *sa = NULL;
+ int sock;
+ if(relay->status.send_locally)
+ choose_local_address(relay, &sa, &sock);
+ if(!sa)
+ choose_udp_address(relay, &sa, &sock);
+ logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
+ if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
+ if(sockmsgsize(sockerrno)) {
+ // Compensate for SPTPS overhead
+ len -= SPTPS_DATAGRAM_OVERHEAD;
+ if(relay->maxmtu >= len)
+ relay->maxmtu = len - 1;
+ if(relay->mtu >= len)
+ relay->mtu = len - 1;
+ try_fix_mtu(relay);
+ } else {
+ logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
+ return send_sptps_data_priv(handle, myself, type, data, len);
+}
+
+bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
+ node_t *from = handle;
+
+ if(type == SPTPS_HANDSHAKE) {
+ if(!from->status.validkey) {
+ from->status.validkey = true;
+ from->status.waitingforkey = false;
+ logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
+ }
+ return true;
+ }
+
+ if(len > MTU) {
+ logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
+ return false;
+ }
+
+ vpn_packet_t inpkt;
+ inpkt.offset = DEFAULT_PACKET_OFFSET;
+
+ if(type == PKT_PROBE) {
+ if(!from->status.udppacket) {
+ logger(DEBUG_ALWAYS, LOG_ERR, "Got SPTPS PROBE packet from %s (%s) via TCP", from->name, from->hostname);
+ return false;
+ }
+ inpkt.len = len;
+ memcpy(DATA(&inpkt), data, len);
+ if(inpkt.len > from->maxrecentlen)
+ from->maxrecentlen = inpkt.len;
+ udp_probe_h(from, &inpkt, len);
+ return true;
+ }
+
+ if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
+ logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
+ return false;
+ }
+
+ /* Check if we have the headers we need */
+ if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
+ logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
+ return false;
+ } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
+ logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
+ }
+
+ int offset = (type & PKT_MAC) ? 0 : 14;
+ if(type & PKT_COMPRESSED) {
+ length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
+ if(ulen < 0) {
+ return false;
+ } else {
+ inpkt.len = ulen + offset;
+ }
+ if(inpkt.len > MAXSIZE)
+ abort();
+ } else {
+ memcpy(DATA(&inpkt) + offset, data, len);
+ inpkt.len = len + offset;
+ }
+
+ /* Generate the Ethernet packet type if necessary */
+ if(offset) {
+ switch(DATA(&inpkt)[14] >> 4) {
+ case 4:
+ DATA(&inpkt)[12] = 0x08;
+ DATA(&inpkt)[13] = 0x00;
+ break;
+ case 6:
+ DATA(&inpkt)[12] = 0x86;
+ DATA(&inpkt)[13] = 0xDD;
+ break;
+ default:
+ logger(DEBUG_TRAFFIC, LOG_ERR,
+ "Unknown IP version %d while reading packet from %s (%s)",
+ DATA(&inpkt)[14] >> 4, from->name, from->hostname);
+ return false;
+ }
+ }
+
+ if(from->status.udppacket && inpkt.len > from->maxrecentlen)
+ from->maxrecentlen = inpkt.len;
+
+ receive_packet(from, &inpkt);
+ return true;
+}
+
+// This function tries to get SPTPS keys, if they aren't already known.
+// This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
+static void try_sptps(node_t *n) {
+ if(n->status.validkey)
+ return;
+
+ logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
+
+ if(!n->status.waitingforkey)
+ send_req_key(n);
+ else if(n->last_req_key + 10 < now.tv_sec) {
+ logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
+ sptps_stop(&n->sptps);
+ n->status.waitingforkey = false;
+ send_req_key(n);
+ }
+
+ return;
+}
+
+static void send_udp_probe_packet(node_t *n, int len) {
+ vpn_packet_t packet;
+ packet.offset = DEFAULT_PACKET_OFFSET;
+ memset(DATA(&packet), 0, 14);
+ randomize(DATA(&packet) + 14, len - 14);
+ packet.len = len;
+ packet.priority = 0;
+
+ logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
+
+ send_udppacket(n, &packet);
+}
+
+// This function tries to establish a UDP tunnel to a node so that packets can be sent.
+// If a tunnel is already established, it makes sure it stays up.
+// This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
+static void try_udp(node_t* n) {
+ if(!udp_discovery)
+ return;
+
+ /* Send gratuitous probe replies to 1.1 nodes. */
+
+ if((n->options >> 24) >= 3 && n->status.udp_confirmed) {
+ struct timeval ping_tx_elapsed;
+ timersub(&now, &n->udp_reply_sent, &ping_tx_elapsed);
+
+ if(ping_tx_elapsed.tv_sec >= udp_discovery_keepalive_interval - 1) {
+ n->udp_reply_sent = now;
+ if(n->maxrecentlen) {
+ vpn_packet_t pkt;
+ pkt.len = n->maxrecentlen;
+ pkt.offset = DEFAULT_PACKET_OFFSET;
+ memset(DATA(&pkt), 0, 14);
+ randomize(DATA(&pkt) + 14, MIN_PROBE_SIZE - 14);
+ send_udp_probe_reply(n, &pkt, pkt.len);
+ n->maxrecentlen = 0;
+ }
+ }
+ }
+
+ /* Probe request */
+
+ struct timeval ping_tx_elapsed;
+ timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
+
+ int interval = n->status.udp_confirmed ? udp_discovery_keepalive_interval : udp_discovery_interval;
+
+ if(ping_tx_elapsed.tv_sec >= interval) {
+ send_udp_probe_packet(n, MIN_PROBE_SIZE);
+ n->udp_ping_sent = now;
+
+ if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
+ n->status.send_locally = true;
+ send_udp_probe_packet(n, MIN_PROBE_SIZE);
+ n->status.send_locally = false;
+ }
+ }
+}
+
+static length_t choose_initial_maxmtu(node_t *n) {
+#ifdef IP_MTU
+
+ int sock = -1;
+
+ const sockaddr_t *sa = NULL;
+ int sockindex;
+ choose_udp_address(n, &sa, &sockindex);
+ if(!sa)
+ return MTU;
+
+ sock = socket(sa->sa.sa_family, SOCK_DGRAM, IPPROTO_UDP);
+ if(sock < 0) {
+ logger(DEBUG_TRAFFIC, LOG_ERR, "Creating MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
+ return MTU;
+ }
+
+ if(connect(sock, &sa->sa, SALEN(sa->sa))) {
+ logger(DEBUG_TRAFFIC, LOG_ERR, "Connecting MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
+ close(sock);
+ return MTU;
+ }
+
+ int ip_mtu;
+ socklen_t ip_mtu_len = sizeof ip_mtu;
+ if(getsockopt(sock, IPPROTO_IP, IP_MTU, &ip_mtu, &ip_mtu_len)) {
+ logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
+ close(sock);
+ return MTU;
+ }
+
+ close(sock);
+
+ /* getsockopt(IP_MTU) returns the MTU of the physical interface.
+ We need to remove various overheads to get to the tinc MTU. */
+ length_t mtu = ip_mtu;
+ mtu -= (sa->sa.sa_family == AF_INET6) ? sizeof(struct ip6_hdr) : sizeof(struct ip);
+ mtu -= 8; /* UDP */
+ if(n->status.sptps) {
+ mtu -= SPTPS_DATAGRAM_OVERHEAD;
+ if((n->options >> 24) >= 4)
+ mtu -= sizeof(node_id_t) + sizeof(node_id_t);
+#ifndef DISABLE_LEGACY
+ } else {
+ mtu -= digest_length(n->outdigest);
+
+ /* Now it's tricky. We use CBC mode, so the length of the
+ encrypted payload must be a multiple of the blocksize. The
+ sequence number is also part of the encrypted payload, so we
+ must account for it after correcting for the blocksize.
+ Furthermore, the padding in the last block must be at least
+ 1 byte. */
+
+ length_t blocksize = cipher_blocksize(n->outcipher);
+
+ if(blocksize > 1) {
+ mtu /= blocksize;
+ mtu *= blocksize;
+ mtu--;
+ }
+
+ mtu -= 4; // seqno
+#endif
+ }
+
+ if (mtu < 512) {
+ logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) returned absurdly small value: %d", n->name, n->hostname, ip_mtu);
+ return MTU;
+ }
+ if (mtu > MTU)
+ return MTU;
+
+ logger(DEBUG_TRAFFIC, LOG_INFO, "Using system-provided maximum tinc MTU for %s (%s): %hd", n->name, n->hostname, mtu);
+ return mtu;
+
+#else
+
+ return MTU;
+
+#endif
+}
+
+/* This function tries to determines the MTU of a node.
+ By calling this function repeatedly, n->minmtu will be progressively
+ increased, and at some point, n->mtu will be fixed to n->minmtu. If the MTU
+ is already fixed, this function checks if it can be increased.
+*/
+
+static void try_mtu(node_t *n) {
+ if(!(n->options & OPTION_PMTU_DISCOVERY))
+ return;
+
+ if(udp_discovery && !n->status.udp_confirmed) {
+ n->maxrecentlen = 0;
+ n->mtuprobes = 0;
+ n->minmtu = 0;
+ n->maxmtu = MTU;
+ return;
+ }
+
+ /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++
+ mtuprobes == 20: fix MTU, and go to -1
+ mtuprobes == -1: send one maxmtu and one maxmtu+1 probe every pinginterval
+ mtuprobes ==-2..-3: send one maxmtu probe every second
+ mtuprobes == -4: maxmtu no longer valid, reset minmtu and maxmtu and go to 0 */
+
+ struct timeval elapsed;
+ timersub(&now, &n->mtu_ping_sent, &elapsed);
+ if(n->mtuprobes >= 0) {
+ if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333)
+ return;
+ } else {
+ if(n->mtuprobes < -1) {
+ if(elapsed.tv_sec < 1)
+ return;
+ } else {
+ if(elapsed.tv_sec < pinginterval)
+ return;
+ }
+ }
+
+ n->mtu_ping_sent = now;
+
+ try_fix_mtu(n);
+
+ if(n->mtuprobes < -3) {
+ /* We lost three MTU probes, restart discovery */
+ logger(DEBUG_TRAFFIC, LOG_INFO, "Decrease in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
+ n->mtuprobes = 0;
+ n->minmtu = 0;
+ }
+
+ if(n->mtuprobes < 0) {
+ /* After the initial discovery, we only send one maxmtu and one
+ maxmtu+1 probe to detect PMTU increases. */
+ send_udp_probe_packet(n, n->maxmtu);
+ if(n->mtuprobes == -1 && n->maxmtu + 1 < MTU)
+ send_udp_probe_packet(n, n->maxmtu + 1);
+ n->mtuprobes--;
+ } else {
+ /* Before initial discovery begins, set maxmtu to the most likely value.
+ If it's underestimated, we will correct it after initial discovery. */
+ if(n->mtuprobes == 0)
+ n->maxmtu = choose_initial_maxmtu(n);
+
+ for (;;) {
+ /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets,
+ but it will typically increase convergence time in the no-loss case. */
+ const length_t probes_per_cycle = 8;
+
+ /* This magic value was determined using math simulations.
+ It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe.
+ Since 1407 is just below the range of tinc MTUs over typical networks,
+ this fine-tuning allows tinc to cover a lot of ground very quickly.
+ This fine-tuning is only valid for maxmtu = MTU; if maxmtu is smaller,
+ then it's better to use a multiplier of 1. Indeed, this leads to an interesting scenario
+ if choose_initial_maxmtu() returns the actual MTU value - it will get confirmed with one single probe. */
+ const float multiplier = (n->maxmtu == MTU) ? 0.97 : 1;
+
+ const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1;
+ const length_t minmtu = MAX(n->minmtu, 512);
+ const float interval = n->maxmtu - minmtu;
+
+ /* The core of the discovery algorithm is this exponential.
+ It produces very large probes early in the cycle, and then it very quickly decreases the probe size.
+ This reflects the fact that in the most difficult cases, we don't get any feedback for probes that
+ are too large, and therefore we need to concentrate on small offsets so that we can quickly converge
+ on the precise MTU as we are approaching it.
+ The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one
+ reply per cycle so that we can make progress. */
+ const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1));
+
+ length_t maxmtu = n->maxmtu;
+ send_udp_probe_packet(n, minmtu + offset);
+ /* If maxmtu changed, it means the probe was rejected by the system because it was too large.
+ In that case, we recalculate with the new maxmtu and try again. */
+ if(n->mtuprobes < 0 || maxmtu == n->maxmtu)
+ break;
+ }
+
+ if(n->mtuprobes >= 0)
+ n->mtuprobes++;
+ }
+}
+
+/* These functions try to establish a tunnel to a node (or its relay) so that
+ packets can be sent (e.g. exchange keys).
+ If a tunnel is already established, it tries to improve it (e.g. by trying
+ to establish a UDP tunnel instead of TCP). This function makes no
+ guarantees - it is up to the caller to check the node's state to figure out
+ if TCP and/or UDP is usable. By calling this function repeatedly, the
+ tunnel is gradually improved until we hit the wall imposed by the underlying
+ network environment. It is recommended to call this function every time a
+ packet is sent (or intended to be sent) to a node, so that the tunnel keeps
+ improving as packets flow, and then gracefully downgrades itself as it goes
+ idle.