Proactively send our own key when we request another node's key.
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "ipv4.h"
41 #include "ipv6.h"
42 #include "graph.h"
43 #include "logger.h"
44 #include "net.h"
45 #include "netutl.h"
46 #include "protocol.h"
47 #include "route.h"
48 #include "utils.h"
49 #include "xalloc.h"
50
51 #ifndef MAX
52 #define MAX(a, b) ((a) > (b) ? (a) : (b))
53 #endif
54
55 int keylifetime = 0;
56 #ifdef HAVE_LZO
57 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
58 #endif
59
60 static void send_udppacket(node_t *, vpn_packet_t *);
61
62 unsigned replaywin = 16;
63 bool localdiscovery = true;
64 bool udp_discovery = true;
65 int udp_discovery_keepalive_interval = 9;
66 int udp_discovery_interval = 2;
67 int udp_discovery_timeout = 30;
68
69 #define MAX_SEQNO 1073741824
70
71 static void try_fix_mtu(node_t *n) {
72         if(n->mtuprobes < 0)
73                 return;
74
75         if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) {
76                 if(n->minmtu > n->maxmtu)
77                         n->minmtu = n->maxmtu;
78                 else
79                         n->maxmtu = n->minmtu;
80                 n->mtu = n->minmtu;
81                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
82                 n->mtuprobes = -1;
83         }
84 }
85
86 static void udp_probe_timeout_handler(void *data) {
87         node_t *n = data;
88         if(!n->status.udp_confirmed)
89                 return;
90
91         logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname);
92         n->status.udp_confirmed = false;
93         n->mtuprobes = 0;
94         n->minmtu = 0;
95         n->maxmtu = MTU;
96 }
97
98 static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
99         if(!DATA(packet)[0]) {
100                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname);
101
102                 /* It's a probe request, send back a reply */
103
104                 /* Type 2 probe replies were introduced in protocol 17.3 */
105                 if ((n->options >> 24) >= 3) {
106                         uint8_t *data = DATA(packet);
107                         *data++ = 2;
108                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
109                         struct timeval now;
110                         gettimeofday(&now, NULL);
111                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
112                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
113                         packet->len = 14; // Minimum size for any probe packet.
114                 } else {
115                         /* Legacy protocol: n won't understand type 2 probe replies. */
116                         DATA(packet)[0] = 1;
117                 }
118
119                 /* Temporarily set udp_confirmed, so that the reply is sent
120                    back exactly the way it came in. */
121
122                 bool udp_confirmed = n->status.udp_confirmed;
123                 n->status.udp_confirmed = true;
124                 send_udppacket(n, packet);
125                 n->status.udp_confirmed = udp_confirmed;
126         } else {
127                 length_t probelen = len;
128                 if (DATA(packet)[0] == 2) {
129                         if (len < 3)
130                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) UDP probe reply from %s (%s)", n->name, n->hostname);
131                         else {
132                                 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
133                         }
134                 }
135                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d UDP probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
136
137                 /* It's a valid reply: now we know bidirectional communication
138                    is possible using the address and socket that the reply
139                    packet used. */
140                 n->status.udp_confirmed = true;
141
142                 if(udp_discovery) {
143                         timeout_del(&n->udp_ping_timeout);
144                         timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
145                 }
146
147                 if(probelen >= n->maxmtu + 1) {
148                         logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
149                         n->maxmtu = MTU;
150                         /* Set mtuprobes to 1 so that try_mtu() doesn't reset maxmtu */
151                         n->mtuprobes = 1;
152                         return;
153                 }
154
155                 /* If applicable, raise the minimum supported MTU */
156
157                 if(probelen > n->maxmtu)
158                         probelen = n->maxmtu;
159                 if(n->minmtu < probelen) {
160                         n->minmtu = probelen;
161                         try_fix_mtu(n);
162                 }
163
164                 /* Calculate RTT.
165                    The RTT is the time between the MTU probe burst was sent and the first
166                    reply is received.
167                  */
168
169                 struct timeval now, diff;
170                 gettimeofday(&now, NULL);
171                 timersub(&now, &n->probe_time, &diff);
172
173                 struct timeval probe_timestamp = now;
174                 if (DATA(packet)[0] == 2 && packet->len >= 11) {
175                         uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
176                         uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
177                         probe_timestamp.tv_sec = ntohl(sec);
178                         probe_timestamp.tv_usec = ntohl(usec);
179                 }
180                 
181                 n->probe_counter++;
182
183                 if(n->probe_counter == 1) {
184                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
185                         n->probe_time = probe_timestamp;
186                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->packetloss * 1e2);
187                 }
188         }
189 }
190
191 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
192         if(level == 0) {
193                 memcpy(dest, source, len);
194                 return len;
195         } else if(level == 10) {
196 #ifdef HAVE_LZO
197                 lzo_uint lzolen = MAXSIZE;
198                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
199                 return lzolen;
200 #else
201                 return -1;
202 #endif
203         } else if(level < 10) {
204 #ifdef HAVE_ZLIB
205                 unsigned long destlen = MAXSIZE;
206                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
207                         return destlen;
208                 else
209 #endif
210                         return -1;
211         } else {
212 #ifdef HAVE_LZO
213                 lzo_uint lzolen = MAXSIZE;
214                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
215                 return lzolen;
216 #else
217                 return -1;
218 #endif
219         }
220
221         return -1;
222 }
223
224 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
225         if(level == 0) {
226                 memcpy(dest, source, len);
227                 return len;
228         } else if(level > 9) {
229 #ifdef HAVE_LZO
230                 lzo_uint lzolen = MAXSIZE;
231                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
232                         return lzolen;
233                 else
234 #endif
235                         return -1;
236         }
237 #ifdef HAVE_ZLIB
238         else {
239                 unsigned long destlen = MAXSIZE;
240                 if(uncompress(dest, &destlen, source, len) == Z_OK)
241                         return destlen;
242                 else
243                         return -1;
244         }
245 #endif
246
247         return -1;
248 }
249
250 /* VPN packet I/O */
251
252 static void receive_packet(node_t *n, vpn_packet_t *packet) {
253         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
254                            packet->len, n->name, n->hostname);
255
256         n->in_packets++;
257         n->in_bytes += packet->len;
258
259         route(n, packet);
260 }
261
262 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
263         if(n->status.sptps)
264                 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
265
266 #ifdef DISABLE_LEGACY
267         return false;
268 #else
269         if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
270                 return false;
271
272         return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
273 #endif
274 }
275
276 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
277         vpn_packet_t pkt1, pkt2;
278         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
279         int nextpkt = 0;
280         size_t outlen;
281         pkt1.offset = DEFAULT_PACKET_OFFSET;
282         pkt2.offset = DEFAULT_PACKET_OFFSET;
283
284         if(n->status.sptps) {
285                 if(!n->sptps.state) {
286                         if(!n->status.waitingforkey) {
287                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
288                                 send_req_key(n);
289                         } else {
290                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
291                         }
292                         return false;
293                 }
294                 inpkt->offset += 2 * sizeof(node_id_t);
295                 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
296                         logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
297                         return false;
298                 }
299                 return true;
300         }
301
302 #ifdef DISABLE_LEGACY
303         return false;
304 #else
305         if(!n->status.validkey_in) {
306                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
307                 return false;
308         }
309
310         /* Check packet length */
311
312         if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
313                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
314                                         n->name, n->hostname);
315                 return false;
316         }
317
318         /* It's a legacy UDP packet, the data starts after the seqno */
319
320         inpkt->offset += sizeof(seqno_t);
321
322         /* Check the message authentication code */
323
324         if(digest_active(n->indigest)) {
325                 inpkt->len -= digest_length(n->indigest);
326                 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
327                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
328                         return false;
329                 }
330         }
331         /* Decrypt the packet */
332
333         if(cipher_active(n->incipher)) {
334                 vpn_packet_t *outpkt = pkt[nextpkt++];
335                 outlen = MAXSIZE;
336
337                 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
338                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
339                         return false;
340                 }
341
342                 outpkt->len = outlen;
343                 inpkt = outpkt;
344         }
345
346         /* Check the sequence number */
347
348         seqno_t seqno;
349         memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
350         seqno = ntohl(seqno);
351         inpkt->len -= sizeof seqno;
352
353         if(replaywin) {
354                 if(seqno != n->received_seqno + 1) {
355                         if(seqno >= n->received_seqno + replaywin * 8) {
356                                 if(n->farfuture++ < replaywin >> 2) {
357                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
358                                                 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
359                                         return false;
360                                 }
361                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
362                                                 seqno - n->received_seqno - 1, n->name, n->hostname);
363                                 memset(n->late, 0, replaywin);
364                         } else if (seqno <= n->received_seqno) {
365                                 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
366                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
367                                                 n->name, n->hostname, seqno, n->received_seqno);
368                                         return false;
369                                 }
370                         } else {
371                                 for(int i = n->received_seqno + 1; i < seqno; i++)
372                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
373                         }
374                 }
375
376                 n->farfuture = 0;
377                 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
378         }
379
380         if(seqno > n->received_seqno)
381                 n->received_seqno = seqno;
382
383         n->received++;
384
385         if(n->received_seqno > MAX_SEQNO)
386                 regenerate_key();
387
388         /* Decompress the packet */
389
390         length_t origlen = inpkt->len;
391
392         if(n->incompression) {
393                 vpn_packet_t *outpkt = pkt[nextpkt++];
394
395                 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
396                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
397                                                  n->name, n->hostname);
398                         return false;
399                 }
400
401                 inpkt = outpkt;
402
403                 origlen -= MTU/64 + 20;
404         }
405
406         inpkt->priority = 0;
407
408         if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
409                 udp_probe_h(n, inpkt, origlen);
410         else
411                 receive_packet(n, inpkt);
412         return true;
413 #endif
414 }
415
416 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
417         vpn_packet_t outpkt;
418         outpkt.offset = DEFAULT_PACKET_OFFSET;
419
420         if(len > sizeof outpkt.data - outpkt.offset)
421                 return;
422
423         outpkt.len = len;
424         if(c->options & OPTION_TCPONLY)
425                 outpkt.priority = 0;
426         else
427                 outpkt.priority = -1;
428         memcpy(DATA(&outpkt), buffer, len);
429
430         receive_packet(c->node, &outpkt);
431 }
432
433 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
434         if(!n->status.validkey && !n->connection)
435                 return;
436
437         uint8_t type = 0;
438         int offset = 0;
439
440         if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
441                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
442                 return;
443         }
444
445         if(routing_mode == RMODE_ROUTER)
446                 offset = 14;
447         else
448                 type = PKT_MAC;
449
450         if(origpkt->len < offset)
451                 return;
452
453         vpn_packet_t outpkt;
454
455         if(n->outcompression) {
456                 outpkt.offset = 0;
457                 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
458                 if(len < 0) {
459                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
460                 } else if(len < origpkt->len - offset) {
461                         outpkt.len = len + offset;
462                         origpkt = &outpkt;
463                         type |= PKT_COMPRESSED;
464                 }
465         }
466
467         /* If we have a direct metaconnection to n, and we can't use UDP, then
468            don't bother with SPTPS and just use a "plaintext" PACKET message.
469            We don't really care about end-to-end security since we're not
470            sending the message through any intermediate nodes. */
471         if(n->connection && origpkt->len > n->minmtu)
472                 send_tcppacket(n->connection, origpkt);
473         else
474                 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
475         return;
476 }
477
478 static void adapt_socket(const sockaddr_t *sa, int *sock) {
479         /* Make sure we have a suitable socket for the chosen address */
480         if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
481                 for(int i = 0; i < listen_sockets; i++) {
482                         if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
483                                 *sock = i;
484                                 break;
485                         }
486                 }
487         }
488 }
489
490 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
491         /* Latest guess */
492         *sa = &n->address;
493         *sock = n->sock;
494
495         /* If the UDP address is confirmed, use it. */
496         if(n->status.udp_confirmed)
497                 return;
498
499         /* Send every third packet to n->address; that could be set
500            to the node's reflexive UDP address discovered during key
501            exchange. */
502
503         static int x = 0;
504         if(++x >= 3) {
505                 x = 0;
506                 return;
507         }
508
509         /* Otherwise, address are found in edges to this node.
510            So we pick a random edge and a random socket. */
511
512         int i = 0;
513         int j = rand() % n->edge_tree->count;
514         edge_t *candidate = NULL;
515
516         for splay_each(edge_t, e, n->edge_tree) {
517                 if(i++ == j) {
518                         candidate = e->reverse;
519                         break;
520                 }
521         }
522
523         if(candidate) {
524                 *sa = &candidate->address;
525                 *sock = rand() % listen_sockets;
526         }
527
528         adapt_socket(*sa, sock);
529 }
530
531 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
532         *sa = NULL;
533
534         /* Pick one of the edges from this node at random, then use its local address. */
535
536         int i = 0;
537         int j = rand() % n->edge_tree->count;
538         edge_t *candidate = NULL;
539
540         for splay_each(edge_t, e, n->edge_tree) {
541                 if(i++ == j) {
542                         candidate = e;
543                         break;
544                 }
545         }
546
547         if (candidate && candidate->local_address.sa.sa_family) {
548                 *sa = &candidate->local_address;
549                 *sock = rand() % listen_sockets;
550                 adapt_socket(*sa, sock);
551         }
552 }
553
554 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
555         vpn_packet_t pkt1, pkt2;
556         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
557         vpn_packet_t *inpkt = origpkt;
558         int nextpkt = 0;
559         vpn_packet_t *outpkt;
560         int origlen = origpkt->len;
561         size_t outlen;
562 #if defined(SOL_IP) && defined(IP_TOS)
563         static int priority = 0;
564         int origpriority = origpkt->priority;
565 #endif
566
567         pkt1.offset = DEFAULT_PACKET_OFFSET;
568         pkt2.offset = DEFAULT_PACKET_OFFSET;
569
570         if(!n->status.reachable) {
571                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
572                 return;
573         }
574
575         if(n->status.sptps)
576                 return send_sptps_packet(n, origpkt);
577
578 #ifdef DISABLE_LEGACY
579         return;
580 #else
581         /* Make sure we have a valid key */
582
583         if(!n->status.validkey) {
584                 logger(DEBUG_TRAFFIC, LOG_INFO,
585                                    "No valid key known yet for %s (%s), forwarding via TCP",
586                                    n->name, n->hostname);
587                 send_tcppacket(n->nexthop->connection, origpkt);
588                 return;
589         }
590
591         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
592                 logger(DEBUG_TRAFFIC, LOG_INFO,
593                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
594                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
595
596                 if(n != n->nexthop)
597                         send_packet(n->nexthop, origpkt);
598                 else
599                         send_tcppacket(n->nexthop->connection, origpkt);
600
601                 return;
602         }
603
604         /* Compress the packet */
605
606         if(n->outcompression) {
607                 outpkt = pkt[nextpkt++];
608
609                 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
610                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
611                                    n->name, n->hostname);
612                         return;
613                 }
614
615                 inpkt = outpkt;
616         }
617
618         /* Add sequence number */
619
620         seqno_t seqno = htonl(++(n->sent_seqno));
621         memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
622         inpkt->len += sizeof seqno;
623
624         /* Encrypt the packet */
625
626         if(cipher_active(n->outcipher)) {
627                 outpkt = pkt[nextpkt++];
628                 outlen = MAXSIZE;
629
630                 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
631                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
632                         goto end;
633                 }
634
635                 outpkt->len = outlen;
636                 inpkt = outpkt;
637         }
638
639         /* Add the message authentication code */
640
641         if(digest_active(n->outdigest)) {
642                 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
643                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
644                         goto end;
645                 }
646
647                 inpkt->len += digest_length(n->outdigest);
648         }
649
650         /* Send the packet */
651
652         const sockaddr_t *sa = NULL;
653         int sock;
654
655         if(n->status.send_locally)
656                 choose_local_address(n, &sa, &sock);
657         if(!sa)
658                 choose_udp_address(n, &sa, &sock);
659
660 #if defined(SOL_IP) && defined(IP_TOS)
661         if(priorityinheritance && origpriority != priority
662            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
663                 priority = origpriority;
664                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
665                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
666                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
667         }
668 #endif
669
670         if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
671                 if(sockmsgsize(sockerrno)) {
672                         if(n->maxmtu >= origlen)
673                                 n->maxmtu = origlen - 1;
674                         if(n->mtu >= origlen)
675                                 n->mtu = origlen - 1;
676                         try_fix_mtu(n);
677                 } else
678                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
679         }
680
681 end:
682         origpkt->len = origlen;
683 #endif
684 }
685
686 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
687         node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
688         bool direct = from == myself && to == relay;
689         bool relay_supported = (relay->options >> 24) >= 4;
690         bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
691
692         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
693            TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
694                  This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
695
696         if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
697                 char buf[len * 4 / 3 + 5];
698                 b64encode(data, buf, len);
699                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
700                    to ensure we get to learn the reflexive UDP address. */
701                 if(from == myself && !to->status.validkey) {
702                         to->incompression = myself->incompression;
703                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
704                 } else {
705                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
706                 }
707         }
708
709         size_t overhead = 0;
710         if(relay_supported) overhead += sizeof to->id + sizeof from->id;
711         char buf[len + overhead]; char* buf_ptr = buf;
712         if(relay_supported) {
713                 if(direct) {
714                         /* Inform the recipient that this packet was sent directly. */
715                         node_id_t nullid = {};
716                         memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
717                 } else {
718                         memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
719                 }
720                 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
721
722         }
723         /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
724         memcpy(buf_ptr, data, len); buf_ptr += len;
725
726         const sockaddr_t *sa = NULL;
727         int sock;
728         if(relay->status.send_locally)
729                 choose_local_address(relay, &sa, &sock);
730         if(!sa)
731                 choose_udp_address(relay, &sa, &sock);
732         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
733         if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
734                 if(sockmsgsize(sockerrno)) {
735                         // Compensate for SPTPS overhead
736                         len -= SPTPS_DATAGRAM_OVERHEAD;
737                         if(relay->maxmtu >= len)
738                                 relay->maxmtu = len - 1;
739                         if(relay->mtu >= len)
740                                 relay->mtu = len - 1;
741                         try_fix_mtu(relay);
742                 } else {
743                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
744                         return false;
745                 }
746         }
747
748         return true;
749 }
750
751 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
752         return send_sptps_data_priv(handle, myself, type, data, len);
753 }
754
755 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
756         node_t *from = handle;
757
758         if(type == SPTPS_HANDSHAKE) {
759                 if(!from->status.validkey) {
760                         from->status.validkey = true;
761                         from->status.waitingforkey = false;
762                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
763                 }
764                 return true;
765         }
766
767         if(len > MTU) {
768                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
769                 return false;
770         }
771
772         vpn_packet_t inpkt;
773         inpkt.offset = DEFAULT_PACKET_OFFSET;
774
775         if(type == PKT_PROBE) {
776                 inpkt.len = len;
777                 memcpy(DATA(&inpkt), data, len);
778                 udp_probe_h(from, &inpkt, len);
779                 return true;
780         }
781
782         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
783                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
784                 return false;
785         }
786
787         /* Check if we have the headers we need */
788         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
789                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
790                 return false;
791         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
792                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
793         }
794
795         int offset = (type & PKT_MAC) ? 0 : 14;
796         if(type & PKT_COMPRESSED) {
797                 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
798                 if(ulen < 0) {
799                         return false;
800                 } else {
801                         inpkt.len = ulen + offset;
802                 }
803                 if(inpkt.len > MAXSIZE)
804                         abort();
805         } else {
806                 memcpy(DATA(&inpkt) + offset, data, len);
807                 inpkt.len = len + offset;
808         }
809
810         /* Generate the Ethernet packet type if necessary */
811         if(offset) {
812                 switch(DATA(&inpkt)[14] >> 4) {
813                         case 4:
814                                 DATA(&inpkt)[12] = 0x08;
815                                 DATA(&inpkt)[13] = 0x00;
816                                 break;
817                         case 6:
818                                 DATA(&inpkt)[12] = 0x86;
819                                 DATA(&inpkt)[13] = 0xDD;
820                                 break;
821                         default:
822                                 logger(DEBUG_TRAFFIC, LOG_ERR,
823                                                    "Unknown IP version %d while reading packet from %s (%s)",
824                                                    DATA(&inpkt)[14] >> 4, from->name, from->hostname);
825                                 return false;
826                 }
827         }
828
829         receive_packet(from, &inpkt);
830         return true;
831 }
832
833 // This function tries to get SPTPS keys, if they aren't already known.
834 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
835 static void try_sptps(node_t *n) {
836         if(n->status.validkey)
837                 return;
838
839         logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
840
841         if(!n->status.waitingforkey)
842                 send_req_key(n);
843         else if(n->last_req_key + 10 < now.tv_sec) {
844                 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
845                 sptps_stop(&n->sptps);
846                 n->status.waitingforkey = false;
847                 send_req_key(n);
848         }
849
850         return;
851 }
852
853 static void send_udp_probe_packet(node_t *n, int len) {
854         vpn_packet_t packet;
855         packet.offset = DEFAULT_PACKET_OFFSET;
856         memset(DATA(&packet), 0, 14);
857         randomize(DATA(&packet) + 14, len - 14);
858         packet.len = len;
859         packet.priority = 0;
860
861         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
862
863         send_udppacket(n, &packet);
864 }
865
866 // This function tries to establish a UDP tunnel to a node so that packets can be sent.
867 // If a tunnel is already established, it makes sure it stays up.
868 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
869 static void try_udp(node_t* n) {
870         if(!udp_discovery)
871                 return;
872
873         struct timeval ping_tx_elapsed;
874         timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
875
876         int interval = n->status.udp_confirmed ? udp_discovery_keepalive_interval : udp_discovery_interval;
877
878         if(ping_tx_elapsed.tv_sec >= interval) {
879                 send_udp_probe_packet(n, MAX(n->minmtu, 16));
880                 n->udp_ping_sent = now;
881
882                 if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
883                         n->status.send_locally = true;
884                         send_udp_probe_packet(n, 16);
885                         n->status.send_locally = false;
886                 }
887         }
888 }
889
890 static length_t choose_initial_maxmtu(node_t *n) {
891 #ifdef IP_MTU
892
893         int sock = -1;
894
895         const sockaddr_t *sa = NULL;
896         int sockindex;
897         choose_udp_address(n, &sa, &sockindex);
898         if(!sa)
899                 return MTU;
900
901         sock = socket(sa->sa.sa_family, SOCK_DGRAM, IPPROTO_UDP);
902         if(sock < 0) {
903                 logger(DEBUG_TRAFFIC, LOG_ERR, "Creating MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
904                 return MTU;
905         }
906
907         if(connect(sock, &sa->sa, SALEN(sa->sa))) {
908                 logger(DEBUG_TRAFFIC, LOG_ERR, "Connecting MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
909                 close(sock);
910                 return MTU;
911         }
912
913         int ip_mtu;
914         socklen_t ip_mtu_len = sizeof ip_mtu;
915         if(getsockopt(sock, IPPROTO_IP, IP_MTU, &ip_mtu, &ip_mtu_len)) {
916                 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
917                 close(sock);
918                 return MTU;
919         }
920
921         close(sock);
922
923         /* getsockopt(IP_MTU) returns the MTU of the physical interface.
924            We need to remove various overheads to get to the tinc MTU. */
925         length_t mtu = ip_mtu;
926         mtu -= (sa->sa.sa_family == AF_INET6) ? sizeof(struct ip6_hdr) : sizeof(struct ip);
927         mtu -= 8; /* UDP */
928         if(n->status.sptps) {
929                 mtu -= SPTPS_DATAGRAM_OVERHEAD;
930                 if((n->options >> 24) >= 4)
931                         mtu -= sizeof(node_id_t) + sizeof(node_id_t);
932         } else {
933                 mtu -= digest_length(n->outdigest);
934
935                 /* Now it's tricky. We use CBC mode, so the length of the
936                    encrypted payload must be a multiple of the blocksize. The
937                    sequence number is also part of the encrypted payload, so we
938                    must account for it after correcting for the blocksize.
939                    Furthermore, the padding in the last block must be at least
940                    1 byte. */
941
942                 length_t blocksize = cipher_blocksize(n->outcipher);
943
944                 if(blocksize > 1) {
945                         mtu /= blocksize;
946                         mtu *= blocksize;
947                         mtu--;
948                 }
949
950                 mtu -= 4; // seqno
951         }
952
953         if (mtu < 512) {
954                 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) returned absurdly small value: %d", n->name, n->hostname, ip_mtu);
955                 return MTU;
956         }
957         if (mtu > MTU)
958                 return MTU;
959
960         logger(DEBUG_TRAFFIC, LOG_INFO, "Using system-provided maximum tinc MTU for %s (%s): %hd", n->name, n->hostname, mtu);
961         return mtu;
962
963 #else
964
965         return MTU;
966
967 #endif
968 }
969
970 /* This function tries to determines the MTU of a node.
971    By calling this function repeatedly, n->minmtu will be progressively
972    increased, and at some point, n->mtu will be fixed to n->minmtu.  If the MTU
973    is already fixed, this function checks if it can be increased.
974 */
975
976 static void try_mtu(node_t *n) {
977         if(!(n->options & OPTION_PMTU_DISCOVERY))
978                 return;
979
980         if(udp_discovery && !n->status.udp_confirmed) {
981                 n->mtuprobes = 0;
982                 n->minmtu = 0;
983                 n->maxmtu = MTU;
984                 return;
985         }
986
987         /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++
988            mtuprobes ==    20: fix MTU, and go to -1
989            mtuprobes ==    -1: send one >maxmtu probe every pingtimeout */
990
991         struct timeval elapsed;
992         timersub(&now, &n->probe_sent_time, &elapsed);
993         if(n->mtuprobes >= 0) {
994                 if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333)
995                         return;
996         } else {
997                 if(elapsed.tv_sec < pingtimeout)
998                         return;
999         }
1000
1001         try_fix_mtu(n);
1002
1003         if(n->mtuprobes < 0) {
1004                 /* After the initial discovery, we only send one >maxmtu probe
1005                    to detect PMTU increases. */
1006                 if(n->maxmtu + 1 < MTU)
1007                         send_udp_probe_packet(n, n->maxmtu + 1);
1008         } else {
1009                 /* Before initial discovery begins, set maxmtu to the most likely value.
1010                    If it's underestimated, we will correct it after initial discovery. */
1011                 if(n->mtuprobes == 0)
1012                         n->maxmtu = choose_initial_maxmtu(n);
1013
1014                 for (;;) {
1015                         /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets,
1016                            but it will typically increase convergence time in the no-loss case. */
1017                         const length_t probes_per_cycle = 8;
1018
1019                         /* This magic value was determined using math simulations.
1020                            It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe.
1021                            Since 1407 is just below the range of tinc MTUs over typical networks,
1022                            this fine-tuning allows tinc to cover a lot of ground very quickly.
1023                            This fine-tuning is only valid for maxmtu = MTU; if maxmtu is smaller,
1024                            then it's better to use a multiplier of 1. Indeed, this leads to an interesting scenario
1025                            if choose_initial_maxmtu() returns the actual MTU value - it will get confirmed with one single probe. */
1026                         const float multiplier = (n->maxmtu == MTU) ? 0.97 : 1;
1027
1028                         const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1;
1029                         const length_t minmtu = MAX(n->minmtu, 512);
1030                         const float interval = n->maxmtu - minmtu;
1031
1032                         /* The core of the discovery algorithm is this exponential.
1033                            It produces very large probes early in the cycle, and then it very quickly decreases the probe size.
1034                            This reflects the fact that in the most difficult cases, we don't get any feedback for probes that
1035                            are too large, and therefore we need to concentrate on small offsets so that we can quickly converge
1036                            on the precise MTU as we are approaching it.
1037                            The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one
1038                            reply per cycle so that we can make progress. */
1039                         const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1));
1040
1041                         length_t maxmtu = n->maxmtu;
1042                         send_udp_probe_packet(n, minmtu + offset);
1043                         /* If maxmtu changed, it means the probe was rejected by the system because it was too large.
1044                            In that case, we recalculate with the new maxmtu and try again. */
1045                         if(n->mtuprobes < 0 || maxmtu == n->maxmtu)
1046                                 break;
1047                 }
1048
1049                 if(n->mtuprobes >= 0)
1050                         n->mtuprobes++;
1051         }
1052
1053         n->probe_counter = 0;
1054         n->probe_sent_time = now;
1055         n->probe_time = now;
1056
1057         /* Calculate the packet loss of incoming traffic by comparing the rate of
1058            packets received to the rate with which the sequence number has increased.
1059            TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
1060          */
1061
1062         if(n->received > n->prev_received)
1063                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
1064         else
1065                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
1066
1067         n->prev_received_seqno = n->received_seqno;
1068         n->prev_received = n->received;
1069 }
1070
1071 /* These functions try to establish a tunnel to a node (or its relay) so that
1072    packets can be sent (e.g. exchange keys).
1073    If a tunnel is already established, it tries to improve it (e.g. by trying
1074    to establish a UDP tunnel instead of TCP).  This function makes no
1075    guarantees - it is up to the caller to check the node's state to figure out
1076    if TCP and/or UDP is usable.  By calling this function repeatedly, the
1077    tunnel is gradually improved until we hit the wall imposed by the underlying
1078    network environment.  It is recommended to call this function every time a
1079    packet is sent (or intended to be sent) to a node, so that the tunnel keeps
1080    improving as packets flow, and then gracefully downgrades itself as it goes
1081    idle.
1082 */
1083
1084 static void try_tx_sptps(node_t *n) {
1085         /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
1086            messages anyway, so there's no need for SPTPS at all. */
1087
1088         if(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))
1089                 return;
1090
1091         /* Otherwise, try to do SPTPS authentication with n if necessary. */
1092
1093         try_sptps(n);
1094
1095         /* Do we need to relay packets? */
1096
1097         node_t *via = (n->via == myself) ? n->nexthop : n->via;
1098
1099         /* If the relay doesn't support SPTPS, everything goes via TCP anyway. */
1100
1101         if((via->options >> 24) < 4)
1102                 return;
1103
1104         /* If we do have a relay, try everything with that one instead. */
1105
1106         if(via != n)
1107                 return try_tx_sptps(via);
1108
1109         try_udp(n);
1110         try_mtu(n);
1111 }
1112
1113 static void try_tx_legacy(node_t *n) {
1114         /* Does he have our key? If not, send one. */
1115
1116         if(!n->status.validkey_in)
1117                 send_ans_key(n);
1118
1119         /* Check if we already have a key, or request one. */
1120
1121         if(!n->status.validkey) {
1122                 if(n->last_req_key + 10 <= now.tv_sec) {
1123                         send_req_key(n);
1124                         n->last_req_key = now.tv_sec;
1125                 }
1126                 return;
1127         }
1128
1129         try_udp(n);
1130         try_mtu(n);
1131 }
1132
1133 void send_packet(node_t *n, vpn_packet_t *packet) {
1134         // If it's for myself, write it to the tun/tap device.
1135
1136         if(n == myself) {
1137                 if(overwrite_mac)
1138                          memcpy(DATA(packet), mymac.x, ETH_ALEN);
1139                 n->out_packets++;
1140                 n->out_bytes += packet->len;
1141                 devops.write(packet);
1142                 return;
1143         }
1144
1145         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)", packet->len, n->name, n->hostname);
1146
1147         // If the node is not reachable, drop it.
1148
1149         if(!n->status.reachable) {
1150                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable", n->name, n->hostname);
1151                 return;
1152         }
1153
1154         // Keep track of packet statistics.
1155
1156         n->out_packets++;
1157         n->out_bytes += packet->len;
1158
1159         // Check if it should be sent as an SPTPS packet.
1160
1161         if(n->status.sptps) {
1162                 send_sptps_packet(n, packet);
1163                 try_tx_sptps(n);
1164                 return;
1165         }
1166
1167         // Determine which node to actually send it to.
1168
1169         node_t *via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
1170
1171         if(via != n)
1172                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)", n->name, via->name, n->via->hostname);
1173
1174         // Try to send via UDP, unless TCP is forced.
1175
1176         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
1177                 if(!send_tcppacket(via->connection, packet))
1178                         terminate_connection(via->connection, true);
1179                 return;
1180         }
1181
1182         send_udppacket(via, packet);
1183         try_tx_legacy(via);
1184 }
1185
1186 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1187         // Always give ourself a copy of the packet.
1188         if(from != myself)
1189                 send_packet(myself, packet);
1190
1191         // In TunnelServer mode, do not forward broadcast packets.
1192         // The MST might not be valid and create loops.
1193         if(tunnelserver || broadcast_mode == BMODE_NONE)
1194                 return;
1195
1196         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1197                            packet->len, from->name, from->hostname);
1198
1199         switch(broadcast_mode) {
1200                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1201                 // This guarantees all nodes receive the broadcast packet, and
1202                 // usually distributes the sending of broadcast packets over all nodes.
1203                 case BMODE_MST:
1204                         for list_each(connection_t, c, connection_list)
1205                                 if(c->edge && c->status.mst && c != from->nexthop->connection)
1206                                         send_packet(c->node, packet);
1207                         break;
1208
1209                 // In direct mode, we send copies to each node we know of.
1210                 // However, this only reaches nodes that can be reached in a single hop.
1211                 // We don't have enough information to forward broadcast packets in this case.
1212                 case BMODE_DIRECT:
1213                         if(from != myself)
1214                                 break;
1215
1216                         for splay_each(node_t, n, node_tree)
1217                                 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1218                                         send_packet(n, packet);
1219                         break;
1220
1221                 default:
1222                         break;
1223         }
1224 }
1225
1226 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1227         node_t *n = NULL;
1228         bool hard = false;
1229         static time_t last_hard_try = 0;
1230
1231         for splay_each(edge_t, e, edge_weight_tree) {
1232                 if(!e->to->status.reachable || e->to == myself)
1233                         continue;
1234
1235                 if(sockaddrcmp_noport(from, &e->address)) {
1236                         if(last_hard_try == now.tv_sec)
1237                                 continue;
1238                         hard = true;
1239                 }
1240
1241                 if(!try_mac(e->to, pkt))
1242                         continue;
1243
1244                 n = e->to;
1245                 break;
1246         }
1247
1248         if(hard)
1249                 last_hard_try = now.tv_sec;
1250
1251         last_hard_try = now.tv_sec;
1252         return n;
1253 }
1254
1255 void handle_incoming_vpn_data(void *data, int flags) {
1256         listen_socket_t *ls = data;
1257         vpn_packet_t pkt;
1258         char *hostname;
1259         node_id_t nullid = {};
1260         sockaddr_t addr = {};
1261         socklen_t addrlen = sizeof addr;
1262         node_t *from, *to;
1263         bool direct = false;
1264
1265         pkt.offset = 0;
1266         int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1267
1268         if(len <= 0 || len > MAXSIZE) {
1269                 if(!sockwouldblock(sockerrno))
1270                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1271                 return;
1272         }
1273
1274         pkt.len = len;
1275
1276         sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1277
1278         // Try to figure out who sent this packet.
1279
1280         node_t *n = lookup_node_udp(&addr);
1281
1282         if(!n) {
1283                 // It might be from a 1.1 node, which might have a source ID in the packet.
1284                 pkt.offset = 2 * sizeof(node_id_t);
1285                 from = lookup_node_id(SRCID(&pkt));
1286                 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1287                         if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1288                                 n = from;
1289                         else
1290                                 goto skip_harder;
1291                 }
1292         }
1293
1294         if(!n) {
1295                 pkt.offset = 0;
1296                 n = try_harder(&addr, &pkt);
1297         }
1298
1299 skip_harder:
1300         if(!n) {
1301                 if(debug_level >= DEBUG_PROTOCOL) {
1302                         hostname = sockaddr2hostname(&addr);
1303                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1304                         free(hostname);
1305                 }
1306                 return;
1307         }
1308
1309         if(n->status.sptps) {
1310                 pkt.offset = 2 * sizeof(node_id_t);
1311
1312                 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1313                         direct = true;
1314                         from = n;
1315                         to = myself;
1316                 } else {
1317                         from = lookup_node_id(SRCID(&pkt));
1318                         to = lookup_node_id(DSTID(&pkt));
1319                 }
1320                 if(!from || !to) {
1321                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1322                         return;
1323                 }
1324
1325                 if(to != myself) {
1326                         send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1327                         return;
1328                 }
1329         } else {
1330                 direct = true;
1331                 from = n;
1332         }
1333
1334         pkt.offset = 0;
1335         if(!receive_udppacket(from, &pkt))
1336                 return;
1337
1338         n->sock = ls - listen_socket;
1339         if(direct && sockaddrcmp(&addr, &n->address))
1340                 update_node_udp(n, &addr);
1341 }
1342
1343 void handle_device_data(void *data, int flags) {
1344         vpn_packet_t packet;
1345         packet.offset = DEFAULT_PACKET_OFFSET;
1346         packet.priority = 0;
1347
1348         if(devops.read(&packet)) {
1349                 myself->in_packets++;
1350                 myself->in_bytes += packet.len;
1351                 route(myself, &packet);
1352         }
1353 }