+ if(len > sizeof(packet.data)) {
+ logger(DEBUG_TRAFFIC, LOG_INFO, "Truncating probe length %lu to %s (%s)", (unsigned long)len, n->name, n->hostname);
+ len = sizeof(packet.data);
+ }
+
+ len = MAX(len, MIN_PROBE_SIZE);
+ packet.offset = DEFAULT_PACKET_OFFSET;
+ memset(DATA(&packet), 0, 14);
+ randomize(DATA(&packet) + 14, len - 14);
+ packet.len = len;
+ packet.priority = 0;
+
+ logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %lu to %s (%s)", (unsigned long)len, n->name, n->hostname);
+
+ send_udppacket(n, &packet);
+}
+
+// This function tries to establish a UDP tunnel to a node so that packets can be sent.
+// If a tunnel is already established, it makes sure it stays up.
+// This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
+static void try_udp(node_t *n) {
+ if(!udp_discovery) {
+ return;
+ }
+
+ /* Send gratuitous probe replies to 1.1 nodes. */
+
+ if((n->options >> 24) >= 3 && n->status.udp_confirmed) {
+ struct timeval ping_tx_elapsed;
+ timersub(&now, &n->udp_reply_sent, &ping_tx_elapsed);
+
+ if(ping_tx_elapsed.tv_sec >= udp_discovery_keepalive_interval - 1) {
+ n->udp_reply_sent = now;
+
+ if(n->maxrecentlen) {
+ vpn_packet_t pkt;
+ pkt.len = n->maxrecentlen;
+ pkt.offset = DEFAULT_PACKET_OFFSET;
+ memset(DATA(&pkt), 0, 14);
+ randomize(DATA(&pkt) + 14, MIN_PROBE_SIZE - 14);
+ send_udp_probe_reply(n, &pkt, pkt.len);
+ n->maxrecentlen = 0;
+ }
+ }
+ }
+
+ /* Probe request */
+
+ struct timeval ping_tx_elapsed;
+ timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
+
+ int interval = n->status.udp_confirmed
+ ? udp_discovery_keepalive_interval
+ : udp_discovery_interval;
+
+ if(ping_tx_elapsed.tv_sec >= interval) {
+ gettimeofday(&now, NULL);
+ n->udp_ping_sent = now; // a probe in flight
+ n->status.ping_sent = true;
+ send_udp_probe_packet(n, MIN_PROBE_SIZE);
+
+ if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
+ n->status.send_locally = true;
+ send_udp_probe_packet(n, MIN_PROBE_SIZE);
+ n->status.send_locally = false;
+ }
+ }
+}
+
+static length_t choose_initial_maxmtu(node_t *n) {
+#ifdef IP_MTU
+
+ int sock = -1;
+
+ const sockaddr_t *sa = NULL;
+ size_t sockindex;
+ choose_udp_address(n, &sa, &sockindex);
+
+ if(!sa) {
+ return MTU;
+ }
+
+ sock = socket(sa->sa.sa_family, SOCK_DGRAM, IPPROTO_UDP);
+
+ if(sock < 0) {
+ logger(DEBUG_TRAFFIC, LOG_ERR, "Creating MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
+ return MTU;
+ }
+
+ if(connect(sock, &sa->sa, SALEN(sa->sa))) {
+ logger(DEBUG_TRAFFIC, LOG_ERR, "Connecting MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
+ closesocket(sock);
+ return MTU;
+ }
+
+ int ip_mtu;
+ socklen_t ip_mtu_len = sizeof(ip_mtu);
+
+ if(getsockopt(sock, IPPROTO_IP, IP_MTU, (void *)&ip_mtu, &ip_mtu_len)) {
+ logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
+ closesocket(sock);
+ return MTU;
+ }
+
+ closesocket(sock);
+
+ if(ip_mtu < MINMTU) {
+ logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) returned absurdly small value: %d", n->name, n->hostname, ip_mtu);
+ return MTU;
+ }
+
+ /* getsockopt(IP_MTU) returns the MTU of the physical interface.
+ We need to remove various overheads to get to the tinc MTU. */
+ length_t mtu = ip_mtu;
+ mtu -= (sa->sa.sa_family == AF_INET6) ? sizeof(struct ip6_hdr) : sizeof(struct ip);
+ mtu -= 8; /* UDP */
+
+ if(n->status.sptps) {
+ mtu -= SPTPS_DATAGRAM_OVERHEAD;
+
+ if((n->options >> 24) >= 4) {
+ mtu -= sizeof(node_id_t) + sizeof(node_id_t);
+ }
+
+#ifndef DISABLE_LEGACY
+ } else {
+ mtu -= digest_length(n->outdigest);
+
+ /* Now it's tricky. We use CBC mode, so the length of the
+ encrypted payload must be a multiple of the blocksize. The
+ sequence number is also part of the encrypted payload, so we
+ must account for it after correcting for the blocksize.
+ Furthermore, the padding in the last block must be at least
+ 1 byte. */
+
+ length_t blocksize = cipher_blocksize(n->outcipher);
+
+ if(blocksize > 1) {
+ mtu /= blocksize;
+ mtu *= blocksize;
+ mtu--;
+ }
+
+ mtu -= 4; // seqno
+#endif
+ }
+
+ if(mtu > MTU) {
+ return MTU;
+ }
+
+ logger(DEBUG_TRAFFIC, LOG_INFO, "Using system-provided maximum tinc MTU for %s (%s): %hd", n->name, n->hostname, mtu);
+ return mtu;
+
+#else
+ (void)n;
+ return MTU;
+#endif
+}
+
+/* This function tries to determines the MTU of a node.
+ By calling this function repeatedly, n->minmtu will be progressively
+ increased, and at some point, n->mtu will be fixed to n->minmtu. If the MTU
+ is already fixed, this function checks if it can be increased.
+*/
+
+static void try_mtu(node_t *n) {
+ if(!(n->options & OPTION_PMTU_DISCOVERY)) {
+ return;
+ }
+
+ if(udp_discovery && !n->status.udp_confirmed) {
+ n->maxrecentlen = 0;
+ n->mtuprobes = 0;
+ n->minmtu = 0;
+ n->maxmtu = MTU;
+ return;
+ }
+
+ /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++
+ mtuprobes == 20: fix MTU, and go to -1
+ mtuprobes == -1: send one maxmtu and one maxmtu+1 probe every pinginterval
+ mtuprobes ==-2..-3: send one maxmtu probe every second
+ mtuprobes == -4: maxmtu no longer valid, reset minmtu and maxmtu and go to 0 */
+
+ struct timeval elapsed;
+ timersub(&now, &n->mtu_ping_sent, &elapsed);
+
+ if(n->mtuprobes >= 0) {
+ if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333) {
+ return;
+ }
+ } else {
+ if(n->mtuprobes < -1) {
+ if(elapsed.tv_sec < 1) {
+ return;
+ }
+ } else {
+ if(elapsed.tv_sec < pinginterval) {
+ return;
+ }
+ }
+ }
+
+ n->mtu_ping_sent = now;
+
+ try_fix_mtu(n);
+
+ if(n->mtuprobes < -3) {
+ /* We lost three MTU probes, restart discovery */
+ logger(DEBUG_TRAFFIC, LOG_INFO, "Decrease in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
+ n->mtuprobes = 0;
+ n->minmtu = 0;
+ }
+
+ if(n->mtuprobes < 0) {
+ /* After the initial discovery, we only send one maxmtu and one
+ maxmtu+1 probe to detect PMTU increases. */
+ send_udp_probe_packet(n, n->maxmtu);
+
+ if(n->mtuprobes == -1 && n->maxmtu + 1 < MTU) {
+ send_udp_probe_packet(n, n->maxmtu + 1);
+ }
+
+ n->mtuprobes--;
+ } else {
+ /* Before initial discovery begins, set maxmtu to the most likely value.
+ If it's underestimated, we will correct it after initial discovery. */
+ if(n->mtuprobes == 0) {
+ n->maxmtu = choose_initial_maxmtu(n);
+ }
+
+ for(;;) {
+ /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets,
+ but it will typically increase convergence time in the no-loss case. */
+ const length_t probes_per_cycle = 8;
+
+ /* This magic value was determined using math simulations.
+ It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe.
+ Since 1407 is just below the range of tinc MTUs over typical networks,
+ this fine-tuning allows tinc to cover a lot of ground very quickly.
+ This fine-tuning is only valid for maxmtu = MTU; if maxmtu is smaller,
+ then it's better to use a multiplier of 1. Indeed, this leads to an interesting scenario
+ if choose_initial_maxmtu() returns the actual MTU value - it will get confirmed with one single probe. */
+ const float multiplier = (n->maxmtu == MTU) ? 0.97f : 1.0f;
+
+ const float cycle_position = (float) probes_per_cycle - (float)(n->mtuprobes % probes_per_cycle) - 1.0f;
+ const length_t minmtu = MAX(n->minmtu, MINMTU);
+ const float interval = (float)(n->maxmtu - minmtu);
+
+ length_t offset = 0;
+
+ /* powf can be underflowed if n->maxmtu is less than 512 due to the minmtu MAX bound */
+ if(interval > 0) {
+ /* The core of the discovery algorithm is this exponential.
+ It produces very large probes early in the cycle, and then it very quickly decreases the probe size.
+ This reflects the fact that in the most difficult cases, we don't get any feedback for probes that
+ are too large, and therefore we need to concentrate on small offsets so that we can quickly converge
+ on the precise MTU as we are approaching it.
+ The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one
+ reply per cycle so that we can make progress. */
+ offset = lrintf(powf(interval, multiplier * cycle_position / (float)(probes_per_cycle - 1)));
+ }
+
+ length_t maxmtu = n->maxmtu;
+ send_udp_probe_packet(n, minmtu + offset);
+
+ /* If maxmtu changed, it means the probe was rejected by the system because it was too large.
+ In that case, we recalculate with the new maxmtu and try again. */
+ if(n->mtuprobes < 0 || maxmtu == n->maxmtu) {
+ break;
+ }
+ }
+
+ if(n->mtuprobes >= 0) {
+ n->mtuprobes++;
+ }
+ }
+}
+
+/* These functions try to establish a tunnel to a node (or its relay) so that
+ packets can be sent (e.g. exchange keys).
+ If a tunnel is already established, it tries to improve it (e.g. by trying
+ to establish a UDP tunnel instead of TCP). This function makes no
+ guarantees - it is up to the caller to check the node's state to figure out
+ if TCP and/or UDP is usable. By calling this function repeatedly, the
+ tunnel is gradually improved until we hit the wall imposed by the underlying
+ network environment. It is recommended to call this function every time a
+ packet is sent (or intended to be sent) to a node, so that the tunnel keeps
+ improving as packets flow, and then gracefully downgrades itself as it goes
+ idle.
+*/
+
+static void try_tx_sptps(node_t *n, bool mtu) {
+ /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
+ messages anyway, so there's no need for SPTPS at all. */
+
+ if(n->connection && ((myself->options | n->options) & OPTION_TCPONLY)) {
+ return;
+ }
+
+ /* Otherwise, try to do SPTPS authentication with n if necessary. */
+
+ try_sptps(n);
+
+ /* Do we need to statically relay packets? */
+
+ node_t *via = (n->via == myself) ? n->nexthop : n->via;
+
+ /* If we do have a static relay, try everything with that one instead, if it supports relaying. */
+
+ if(via != n) {
+ if((via->options >> 24) < 4) {
+ return;
+ }
+
+ try_tx(via, mtu);
+ return;
+ }
+
+ /* Otherwise, try to establish UDP connectivity. */
+
+ try_udp(n);
+
+ if(mtu) {
+ try_mtu(n);
+ }
+
+ /* If we don't have UDP connectivity (yet), we need to use a dynamic relay (nexthop)
+ while we try to establish direct connectivity. */
+
+ if(!n->status.udp_confirmed && n != n->nexthop && (n->nexthop->options >> 24) >= 4) {
+ try_tx(n->nexthop, mtu);
+ }
+}
+
+static void try_tx_legacy(node_t *n, bool mtu) {
+ /* Does he have our key? If not, send one. */
+
+ if(!n->status.validkey_in) {
+ send_ans_key(n);
+ }
+
+ /* Check if we already have a key, or request one. */
+
+ if(!n->status.validkey) {
+ if(n->last_req_key + 10 <= now.tv_sec) {
+ send_req_key(n);
+ n->last_req_key = now.tv_sec;
+ }
+
+ return;
+ }
+
+ try_udp(n);
+
+ if(mtu) {
+ try_mtu(n);
+ }
+}
+
+void try_tx(node_t *n, bool mtu) {
+ if(!n->status.reachable) {
+ return;
+ }
+
+ if(n->status.sptps) {
+ try_tx_sptps(n, mtu);
+ } else {
+ try_tx_legacy(n, mtu);
+ }
+}
+
+void send_packet(node_t *n, vpn_packet_t *packet) {
+ // If it's for myself, write it to the tun/tap device.
+
+ if(n == myself) {
+ if(overwrite_mac) {
+ memcpy(DATA(packet), mymac.x, ETH_ALEN);
+ // Use an arbitrary fake source address.
+ memcpy(DATA(packet) + ETH_ALEN, DATA(packet), ETH_ALEN);
+ DATA(packet)[ETH_ALEN * 2 - 1] ^= 0xFF;
+ }
+
+ n->out_packets++;
+ n->out_bytes += packet->len;
+ devops.write(packet);
+ return;
+ }
+
+ logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)", packet->len, n->name, n->hostname);
+
+ // If the node is not reachable, drop it.
+
+ if(!n->status.reachable) {
+ logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable", n->name, n->hostname);
+ return;
+ }
+
+ // Keep track of packet statistics.
+
+ n->out_packets++;
+ n->out_bytes += packet->len;
+
+ // Check if it should be sent as an SPTPS packet.
+
+ if(n->status.sptps) {
+ send_sptps_packet(n, packet);
+ try_tx(n, true);
+ return;
+ }
+
+ // Determine which node to actually send it to.
+
+ node_t *via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
+
+ if(via != n) {
+ logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)", n->name, via->name, n->via->hostname);
+ }
+
+ // Try to send via UDP, unless TCP is forced.
+
+ if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
+ if(!send_tcppacket(via->connection, packet)) {
+ terminate_connection(via->connection, true);
+ }
+
+ return;
+ }
+
+ send_udppacket(via, packet);
+ try_tx(via, true);
+}
+
+void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
+ // Always give ourself a copy of the packet.
+ if(from != myself) {
+ send_packet(myself, packet);
+ }
+
+ // In TunnelServer mode, do not forward broadcast packets.
+ // The MST might not be valid and create loops.
+ if(tunnelserver || broadcast_mode == BMODE_NONE) {
+ return;
+ }
+
+ logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
+ packet->len, from->name, from->hostname);
+
+ switch(broadcast_mode) {
+ // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
+ // This guarantees all nodes receive the broadcast packet, and
+ // usually distributes the sending of broadcast packets over all nodes.
+ case BMODE_MST:
+ for list_each(connection_t, c, &connection_list)
+ if(c->edge && c->status.mst && c != from->nexthop->connection) {
+ send_packet(c->node, packet);
+ }
+
+ break;
+
+ // In direct mode, we send copies to each node we know of.
+ // However, this only reaches nodes that can be reached in a single hop.
+ // We don't have enough information to forward broadcast packets in this case.
+ case BMODE_DIRECT:
+ if(from != myself) {
+ break;
+ }
+
+ for splay_each(node_t, n, &node_tree)
+ if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n)) {
+ send_packet(n, packet);
+ }
+
+ break;
+
+ case BMODE_NONE:
+ default:
+ break;
+ }
+}
+
+/* We got a packet from some IP address, but we don't know who sent it. Try to
+ verify the message authentication code against all active session keys.
+ Since this is actually an expensive operation, we only do a full check once
+ a minute, the rest of the time we only check against nodes for which we know
+ an IP address that matches the one from the packet. */
+
+static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
+ node_t *match = NULL;
+ bool hard = false;
+ static time_t last_hard_try = 0;
+
+ for splay_each(node_t, n, &node_tree) {
+ if(!n->status.reachable || n == myself) {
+ continue;
+ }
+
+ if(!n->status.validkey_in && !(n->status.sptps && n->sptps.instate)) {
+ continue;
+ }
+
+ bool soft = false;
+
+ for splay_each(edge_t, e, &n->edge_tree) {
+ if(!e->reverse) {
+ continue;
+ }
+
+ if(!sockaddrcmp_noport(from, &e->reverse->address)) {
+ soft = true;
+ break;
+ }
+ }
+
+ if(!soft) {
+ if(last_hard_try == now.tv_sec) {
+ continue;
+ }
+
+ hard = true;
+ }
+
+ if(!try_mac(n, pkt)) {
+ continue;
+ }
+
+ match = n;
+ break;
+ }
+
+ if(hard) {
+ last_hard_try = now.tv_sec;
+ }
+
+ return match;
+}
+
+static void handle_incoming_vpn_packet(listen_socket_t *ls, vpn_packet_t *pkt, sockaddr_t *addr) {
+ char *hostname;
+ node_id_t nullid = {0};
+ node_t *from, *to;
+ bool direct = false;
+
+ sockaddrunmap(addr); /* Some braindead IPv6 implementations do stupid things. */
+
+ // Try to figure out who sent this packet.
+
+ node_t *n = lookup_node_udp(addr);
+
+ if(n && !n->status.udp_confirmed) {
+ n = NULL; // Don't believe it if we don't have confirmation yet.
+ }
+
+ if(!n) {
+ // It might be from a 1.1 node, which might have a source ID in the packet.
+ pkt->offset = 2 * sizeof(node_id_t);
+ from = lookup_node_id(SRCID(pkt));
+
+ if(from && from->status.sptps && !memcmp(DSTID(pkt), &nullid, sizeof(nullid))) {
+ if(sptps_verify_datagram(&from->sptps, DATA(pkt), pkt->len - 2 * sizeof(node_id_t))) {
+ n = from;
+ } else {
+ goto skip_harder;
+ }
+ }
+ }
+
+ if(!n) {
+ pkt->offset = 0;
+ n = try_harder(addr, pkt);
+ }
+
+skip_harder: