Move try_mtu() closer to try_tx().
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "route.h"
46 #include "utils.h"
47 #include "xalloc.h"
48
49 #ifndef MAX
50 #define MAX(a, b) ((a) > (b) ? (a) : (b))
51 #endif
52
53 int keylifetime = 0;
54 #ifdef HAVE_LZO
55 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
56 #endif
57
58 static void send_udppacket(node_t *, vpn_packet_t *);
59
60 unsigned replaywin = 16;
61 bool localdiscovery = true;
62 bool udp_discovery = true;
63 int udp_discovery_interval = 9;
64 int udp_discovery_timeout = 30;
65
66 #define MAX_SEQNO 1073741824
67
68 static void udp_probe_timeout_handler(void *data) {
69         node_t *n = data;
70         if(!n->status.udp_confirmed)
71                 return;
72
73         logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname);
74         n->status.udp_confirmed = false;
75         n->mtuprobes = 0;
76         n->minmtu = 0;
77         n->maxmtu = MTU;
78 }
79
80 static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
81         if(!DATA(packet)[0]) {
82                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname);
83
84                 /* It's a probe request, send back a reply */
85
86                 /* Type 2 probe replies were introduced in protocol 17.3 */
87                 if ((n->options >> 24) >= 3) {
88                         uint8_t *data = DATA(packet);
89                         *data++ = 2;
90                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
91                         struct timeval now;
92                         gettimeofday(&now, NULL);
93                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
94                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
95                         packet->len -= 10;
96                 } else {
97                         /* Legacy protocol: n won't understand type 2 probe replies. */
98                         DATA(packet)[0] = 1;
99                 }
100
101                 /* Temporarily set udp_confirmed, so that the reply is sent
102                    back exactly the way it came in. */
103
104                 bool udp_confirmed = n->status.udp_confirmed;
105                 n->status.udp_confirmed = true;
106                 send_udppacket(n, packet);
107                 n->status.udp_confirmed = udp_confirmed;
108         } else {
109                 length_t probelen = len;
110                 if (DATA(packet)[0] == 2) {
111                         if (len < 3)
112                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) UDP probe reply from %s (%s)", n->name, n->hostname);
113                         else {
114                                 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
115                         }
116                 }
117                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d UDP probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
118
119                 /* It's a valid reply: now we know bidirectional communication
120                    is possible using the address and socket that the reply
121                    packet used. */
122                 n->status.udp_confirmed = true;
123
124                 if(udp_discovery) {
125                         timeout_del(&n->udp_ping_timeout);
126                         timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
127                 }
128
129                 if(probelen >= n->maxmtu + 8) {
130                         logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
131                         n->maxmtu = MTU;
132                         n->mtuprobes = 10;
133                         return;
134                 }
135
136                 /* If applicable, raise the minimum supported MTU */
137
138                 if(probelen > n->maxmtu)
139                         probelen = n->maxmtu;
140                 if(n->minmtu < probelen)
141                         n->minmtu = probelen;
142
143                 /* Calculate RTT and bandwidth.
144                    The RTT is the time between the MTU probe burst was sent and the first
145                    reply is received. The bandwidth is measured using the time between the
146                    arrival of the first and third probe reply (or type 2 probe requests).
147                  */
148
149                 struct timeval now, diff;
150                 gettimeofday(&now, NULL);
151                 timersub(&now, &n->probe_time, &diff);
152
153                 struct timeval probe_timestamp = now;
154                 if (DATA(packet)[0] == 2 && packet->len >= 11) {
155                         uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
156                         uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
157                         probe_timestamp.tv_sec = ntohl(sec);
158                         probe_timestamp.tv_usec = ntohl(usec);
159                 }
160                 
161                 n->probe_counter++;
162
163                 if(n->probe_counter == 1) {
164                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
165                         n->probe_time = probe_timestamp;
166                 } else if(n->probe_counter == 3) {
167                         /* TODO: this will never fire after initial MTU discovery. */
168                         struct timeval probe_timestamp_diff;
169                         timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
170                         n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
171                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
172                 }
173         }
174 }
175
176 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
177         if(level == 0) {
178                 memcpy(dest, source, len);
179                 return len;
180         } else if(level == 10) {
181 #ifdef HAVE_LZO
182                 lzo_uint lzolen = MAXSIZE;
183                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
184                 return lzolen;
185 #else
186                 return -1;
187 #endif
188         } else if(level < 10) {
189 #ifdef HAVE_ZLIB
190                 unsigned long destlen = MAXSIZE;
191                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
192                         return destlen;
193                 else
194 #endif
195                         return -1;
196         } else {
197 #ifdef HAVE_LZO
198                 lzo_uint lzolen = MAXSIZE;
199                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
200                 return lzolen;
201 #else
202                 return -1;
203 #endif
204         }
205
206         return -1;
207 }
208
209 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
210         if(level == 0) {
211                 memcpy(dest, source, len);
212                 return len;
213         } else if(level > 9) {
214 #ifdef HAVE_LZO
215                 lzo_uint lzolen = MAXSIZE;
216                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
217                         return lzolen;
218                 else
219 #endif
220                         return -1;
221         }
222 #ifdef HAVE_ZLIB
223         else {
224                 unsigned long destlen = MAXSIZE;
225                 if(uncompress(dest, &destlen, source, len) == Z_OK)
226                         return destlen;
227                 else
228                         return -1;
229         }
230 #endif
231
232         return -1;
233 }
234
235 /* VPN packet I/O */
236
237 static void receive_packet(node_t *n, vpn_packet_t *packet) {
238         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
239                            packet->len, n->name, n->hostname);
240
241         n->in_packets++;
242         n->in_bytes += packet->len;
243
244         route(n, packet);
245 }
246
247 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
248         if(n->status.sptps)
249                 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
250
251 #ifdef DISABLE_LEGACY
252         return false;
253 #else
254         if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
255                 return false;
256
257         return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
258 #endif
259 }
260
261 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
262         vpn_packet_t pkt1, pkt2;
263         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
264         int nextpkt = 0;
265         size_t outlen;
266         pkt1.offset = DEFAULT_PACKET_OFFSET;
267         pkt2.offset = DEFAULT_PACKET_OFFSET;
268
269         if(n->status.sptps) {
270                 if(!n->sptps.state) {
271                         if(!n->status.waitingforkey) {
272                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
273                                 send_req_key(n);
274                         } else {
275                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
276                         }
277                         return false;
278                 }
279                 inpkt->offset += 2 * sizeof(node_id_t);
280                 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
281                         logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
282                         return false;
283                 }
284                 return true;
285         }
286
287 #ifdef DISABLE_LEGACY
288         return false;
289 #else
290         if(!n->status.validkey) {
291                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
292                 return false;
293         }
294
295         /* Check packet length */
296
297         if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
298                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
299                                         n->name, n->hostname);
300                 return false;
301         }
302
303         /* It's a legacy UDP packet, the data starts after the seqno */
304
305         inpkt->offset += sizeof(seqno_t);
306
307         /* Check the message authentication code */
308
309         if(digest_active(n->indigest)) {
310                 inpkt->len -= digest_length(n->indigest);
311                 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
312                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
313                         return false;
314                 }
315         }
316         /* Decrypt the packet */
317
318         if(cipher_active(n->incipher)) {
319                 vpn_packet_t *outpkt = pkt[nextpkt++];
320                 outlen = MAXSIZE;
321
322                 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
323                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
324                         return false;
325                 }
326
327                 outpkt->len = outlen;
328                 inpkt = outpkt;
329         }
330
331         /* Check the sequence number */
332
333         seqno_t seqno;
334         memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
335         seqno = ntohl(seqno);
336         inpkt->len -= sizeof seqno;
337
338         if(replaywin) {
339                 if(seqno != n->received_seqno + 1) {
340                         if(seqno >= n->received_seqno + replaywin * 8) {
341                                 if(n->farfuture++ < replaywin >> 2) {
342                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
343                                                 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
344                                         return false;
345                                 }
346                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
347                                                 seqno - n->received_seqno - 1, n->name, n->hostname);
348                                 memset(n->late, 0, replaywin);
349                         } else if (seqno <= n->received_seqno) {
350                                 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
351                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
352                                                 n->name, n->hostname, seqno, n->received_seqno);
353                                         return false;
354                                 }
355                         } else {
356                                 for(int i = n->received_seqno + 1; i < seqno; i++)
357                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
358                         }
359                 }
360
361                 n->farfuture = 0;
362                 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
363         }
364
365         if(seqno > n->received_seqno)
366                 n->received_seqno = seqno;
367
368         n->received++;
369
370         if(n->received_seqno > MAX_SEQNO)
371                 regenerate_key();
372
373         /* Decompress the packet */
374
375         length_t origlen = inpkt->len;
376
377         if(n->incompression) {
378                 vpn_packet_t *outpkt = pkt[nextpkt++];
379
380                 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
381                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
382                                                  n->name, n->hostname);
383                         return false;
384                 }
385
386                 inpkt = outpkt;
387
388                 origlen -= MTU/64 + 20;
389         }
390
391         inpkt->priority = 0;
392
393         if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
394                 udp_probe_h(n, inpkt, origlen);
395         else
396                 receive_packet(n, inpkt);
397         return true;
398 #endif
399 }
400
401 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
402         vpn_packet_t outpkt;
403         outpkt.offset = DEFAULT_PACKET_OFFSET;
404
405         if(len > sizeof outpkt.data - outpkt.offset)
406                 return;
407
408         outpkt.len = len;
409         if(c->options & OPTION_TCPONLY)
410                 outpkt.priority = 0;
411         else
412                 outpkt.priority = -1;
413         memcpy(DATA(&outpkt), buffer, len);
414
415         receive_packet(c->node, &outpkt);
416 }
417
418 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
419         if(!n->status.validkey && !n->connection)
420                 return;
421
422         uint8_t type = 0;
423         int offset = 0;
424
425         if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
426                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
427                 return;
428         }
429
430         if(routing_mode == RMODE_ROUTER)
431                 offset = 14;
432         else
433                 type = PKT_MAC;
434
435         if(origpkt->len < offset)
436                 return;
437
438         vpn_packet_t outpkt;
439
440         if(n->outcompression) {
441                 outpkt.offset = 0;
442                 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
443                 if(len < 0) {
444                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
445                 } else if(len < origpkt->len - offset) {
446                         outpkt.len = len + offset;
447                         origpkt = &outpkt;
448                         type |= PKT_COMPRESSED;
449                 }
450         }
451
452         /* If we have a direct metaconnection to n, and we can't use UDP, then
453            don't bother with SPTPS and just use a "plaintext" PACKET message.
454            We don't really care about end-to-end security since we're not
455            sending the message through any intermediate nodes. */
456         if(n->connection && origpkt->len > n->minmtu)
457                 send_tcppacket(n->connection, origpkt);
458         else
459                 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
460         return;
461 }
462
463 static void adapt_socket(const sockaddr_t *sa, int *sock) {
464         /* Make sure we have a suitable socket for the chosen address */
465         if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
466                 for(int i = 0; i < listen_sockets; i++) {
467                         if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
468                                 *sock = i;
469                                 break;
470                         }
471                 }
472         }
473 }
474
475 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
476         /* Latest guess */
477         *sa = &n->address;
478         *sock = n->sock;
479
480         /* If the UDP address is confirmed, use it. */
481         if(n->status.udp_confirmed)
482                 return;
483
484         /* Send every third packet to n->address; that could be set
485            to the node's reflexive UDP address discovered during key
486            exchange. */
487
488         static int x = 0;
489         if(++x >= 3) {
490                 x = 0;
491                 return;
492         }
493
494         /* Otherwise, address are found in edges to this node.
495            So we pick a random edge and a random socket. */
496
497         int i = 0;
498         int j = rand() % n->edge_tree->count;
499         edge_t *candidate = NULL;
500
501         for splay_each(edge_t, e, n->edge_tree) {
502                 if(i++ == j) {
503                         candidate = e->reverse;
504                         break;
505                 }
506         }
507
508         if(candidate) {
509                 *sa = &candidate->address;
510                 *sock = rand() % listen_sockets;
511         }
512
513         adapt_socket(*sa, sock);
514 }
515
516 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
517         *sa = NULL;
518
519         /* Pick one of the edges from this node at random, then use its local address. */
520
521         int i = 0;
522         int j = rand() % n->edge_tree->count;
523         edge_t *candidate = NULL;
524
525         for splay_each(edge_t, e, n->edge_tree) {
526                 if(i++ == j) {
527                         candidate = e;
528                         break;
529                 }
530         }
531
532         if (candidate && candidate->local_address.sa.sa_family) {
533                 *sa = &candidate->local_address;
534                 *sock = rand() % listen_sockets;
535                 adapt_socket(*sa, sock);
536         }
537 }
538
539 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
540         vpn_packet_t pkt1, pkt2;
541         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
542         vpn_packet_t *inpkt = origpkt;
543         int nextpkt = 0;
544         vpn_packet_t *outpkt;
545         int origlen = origpkt->len;
546         size_t outlen;
547 #if defined(SOL_IP) && defined(IP_TOS)
548         static int priority = 0;
549         int origpriority = origpkt->priority;
550 #endif
551
552         pkt1.offset = DEFAULT_PACKET_OFFSET;
553         pkt2.offset = DEFAULT_PACKET_OFFSET;
554
555         if(!n->status.reachable) {
556                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
557                 return;
558         }
559
560         if(n->status.sptps)
561                 return send_sptps_packet(n, origpkt);
562
563 #ifdef DISABLE_LEGACY
564         return;
565 #else
566         /* Make sure we have a valid key */
567
568         if(!n->status.validkey) {
569                 logger(DEBUG_TRAFFIC, LOG_INFO,
570                                    "No valid key known yet for %s (%s), forwarding via TCP",
571                                    n->name, n->hostname);
572                 send_tcppacket(n->nexthop->connection, origpkt);
573                 return;
574         }
575
576         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
577                 logger(DEBUG_TRAFFIC, LOG_INFO,
578                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
579                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
580
581                 if(n != n->nexthop)
582                         send_packet(n->nexthop, origpkt);
583                 else
584                         send_tcppacket(n->nexthop->connection, origpkt);
585
586                 return;
587         }
588
589         /* Compress the packet */
590
591         if(n->outcompression) {
592                 outpkt = pkt[nextpkt++];
593
594                 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
595                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
596                                    n->name, n->hostname);
597                         return;
598                 }
599
600                 inpkt = outpkt;
601         }
602
603         /* Add sequence number */
604
605         seqno_t seqno = htonl(++(n->sent_seqno));
606         memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
607         inpkt->len += sizeof seqno;
608
609         /* Encrypt the packet */
610
611         if(cipher_active(n->outcipher)) {
612                 outpkt = pkt[nextpkt++];
613                 outlen = MAXSIZE;
614
615                 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
616                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
617                         goto end;
618                 }
619
620                 outpkt->len = outlen;
621                 inpkt = outpkt;
622         }
623
624         /* Add the message authentication code */
625
626         if(digest_active(n->outdigest)) {
627                 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
628                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
629                         goto end;
630                 }
631
632                 inpkt->len += digest_length(n->outdigest);
633         }
634
635         /* Send the packet */
636
637         const sockaddr_t *sa = NULL;
638         int sock;
639
640         if(n->status.send_locally)
641                 choose_local_address(n, &sa, &sock);
642         if(!sa)
643                 choose_udp_address(n, &sa, &sock);
644
645 #if defined(SOL_IP) && defined(IP_TOS)
646         if(priorityinheritance && origpriority != priority
647            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
648                 priority = origpriority;
649                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
650                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
651                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
652         }
653 #endif
654
655         if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
656                 if(sockmsgsize(sockerrno)) {
657                         if(n->maxmtu >= origlen)
658                                 n->maxmtu = origlen - 1;
659                         if(n->mtu >= origlen)
660                                 n->mtu = origlen - 1;
661                 } else
662                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
663         }
664
665 end:
666         origpkt->len = origlen;
667 #endif
668 }
669
670 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
671         node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
672         bool direct = from == myself && to == relay;
673         bool relay_supported = (relay->options >> 24) >= 4;
674         bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
675
676         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
677            TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
678                  This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
679
680         if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
681                 char buf[len * 4 / 3 + 5];
682                 b64encode(data, buf, len);
683                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
684                    to ensure we get to learn the reflexive UDP address. */
685                 if(from == myself && !to->status.validkey) {
686                         to->incompression = myself->incompression;
687                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
688                 } else {
689                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
690                 }
691         }
692
693         size_t overhead = 0;
694         if(relay_supported) overhead += sizeof to->id + sizeof from->id;
695         char buf[len + overhead]; char* buf_ptr = buf;
696         if(relay_supported) {
697                 if(direct) {
698                         /* Inform the recipient that this packet was sent directly. */
699                         node_id_t nullid = {};
700                         memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
701                 } else {
702                         memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
703                 }
704                 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
705
706         }
707         /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
708         memcpy(buf_ptr, data, len); buf_ptr += len;
709
710         const sockaddr_t *sa = NULL;
711         int sock;
712         if(relay->status.send_locally)
713                 choose_local_address(relay, &sa, &sock);
714         if(!sa)
715                 choose_udp_address(relay, &sa, &sock);
716         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
717         if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
718                 if(sockmsgsize(sockerrno)) {
719                         // Compensate for SPTPS overhead
720                         len -= SPTPS_DATAGRAM_OVERHEAD;
721                         if(relay->maxmtu >= len)
722                                 relay->maxmtu = len - 1;
723                         if(relay->mtu >= len)
724                                 relay->mtu = len - 1;
725                 } else {
726                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
727                         return false;
728                 }
729         }
730
731         return true;
732 }
733
734 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
735         return send_sptps_data_priv(handle, myself, type, data, len);
736 }
737
738 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
739         node_t *from = handle;
740
741         if(type == SPTPS_HANDSHAKE) {
742                 if(!from->status.validkey) {
743                         from->status.validkey = true;
744                         from->status.waitingforkey = false;
745                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
746                 }
747                 return true;
748         }
749
750         if(len > MTU) {
751                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
752                 return false;
753         }
754
755         vpn_packet_t inpkt;
756         inpkt.offset = DEFAULT_PACKET_OFFSET;
757
758         if(type == PKT_PROBE) {
759                 inpkt.len = len;
760                 memcpy(DATA(&inpkt), data, len);
761                 udp_probe_h(from, &inpkt, len);
762                 return true;
763         }
764
765         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
766                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
767                 return false;
768         }
769
770         /* Check if we have the headers we need */
771         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
772                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
773                 return false;
774         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
775                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
776         }
777
778         int offset = (type & PKT_MAC) ? 0 : 14;
779         if(type & PKT_COMPRESSED) {
780                 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
781                 if(ulen < 0) {
782                         return false;
783                 } else {
784                         inpkt.len = ulen + offset;
785                 }
786                 if(inpkt.len > MAXSIZE)
787                         abort();
788         } else {
789                 memcpy(DATA(&inpkt) + offset, data, len);
790                 inpkt.len = len + offset;
791         }
792
793         /* Generate the Ethernet packet type if necessary */
794         if(offset) {
795                 switch(DATA(&inpkt)[14] >> 4) {
796                         case 4:
797                                 DATA(&inpkt)[12] = 0x08;
798                                 DATA(&inpkt)[13] = 0x00;
799                                 break;
800                         case 6:
801                                 DATA(&inpkt)[12] = 0x86;
802                                 DATA(&inpkt)[13] = 0xDD;
803                                 break;
804                         default:
805                                 logger(DEBUG_TRAFFIC, LOG_ERR,
806                                                    "Unknown IP version %d while reading packet from %s (%s)",
807                                                    DATA(&inpkt)[14] >> 4, from->name, from->hostname);
808                                 return false;
809                 }
810         }
811
812         receive_packet(from, &inpkt);
813         return true;
814 }
815
816 // This function tries to get SPTPS keys, if they aren't already known.
817 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
818 static void try_sptps(node_t *n) {
819         if(n->status.validkey)
820                 return;
821
822         logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
823
824         if(!n->status.waitingforkey)
825                 send_req_key(n);
826         else if(n->last_req_key + 10 < now.tv_sec) {
827                 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
828                 sptps_stop(&n->sptps);
829                 n->status.waitingforkey = false;
830                 send_req_key(n);
831         }
832
833         return;
834 }
835
836 static void send_udp_probe_packet(node_t *n, int len) {
837         vpn_packet_t packet;
838         packet.offset = DEFAULT_PACKET_OFFSET;
839         memset(DATA(&packet), 0, 14);
840         randomize(DATA(&packet) + 14, len - 14);
841         packet.len = len;
842         packet.priority = 0;
843
844         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
845
846         send_udppacket(n, &packet);
847 }
848
849 // This function tries to establish a UDP tunnel to a node so that packets can be sent.
850 // If a tunnel is already established, it makes sure it stays up.
851 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
852 static void try_udp(node_t* n) {
853         if(!udp_discovery)
854                 return;
855
856         struct timeval now;
857         gettimeofday(&now, NULL);
858         struct timeval ping_tx_elapsed;
859         timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
860
861         if(ping_tx_elapsed.tv_sec >= udp_discovery_interval) {
862                 send_udp_probe_packet(n, MAX(n->minmtu, 16));
863                 n->udp_ping_sent = now;
864
865                 if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
866                         n->status.send_locally = true;
867                         send_udp_probe_packet(n, 16);
868                         n->status.send_locally = false;
869                 }
870         }
871 }
872
873 // This function tries to determines the MTU of a node.
874 // By calling this function repeatedly, n->minmtu will be progressively increased, and at some point, n->mtu will be fixed to n->minmtu.
875 // If the MTU is already fixed, this function checks if it can be increased.
876 static void try_mtu(node_t *n) {
877         if(!(n->options & OPTION_PMTU_DISCOVERY))
878                 return;
879
880         if(udp_discovery && !n->status.udp_confirmed) {
881                 n->mtuprobes = 0;
882                 n->minmtu = 0;
883                 n->maxmtu = MTU;
884                 return;
885         }
886
887         /* mtuprobes == 0..29: initial discovery, send bursts with 1 second interval, mtuprobes++
888            mtuprobes ==    30: fix MTU, and go to 31
889            mtuprobes ==    31: send one >maxmtu probe every pingtimeout */
890
891         struct timeval now;
892         gettimeofday(&now, NULL);
893         struct timeval elapsed;
894         timersub(&now, &n->probe_sent_time, &elapsed);
895         if(n->mtuprobes < 31) {
896                 if(n->mtuprobes != 0 && elapsed.tv_sec < 1)
897                         return;
898         } else {
899                 if(elapsed.tv_sec < pingtimeout)
900                         return;
901         }
902
903         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
904                 if(n->minmtu > n->maxmtu)
905                         n->minmtu = n->maxmtu;
906                 else
907                         n->maxmtu = n->minmtu;
908                 n->mtu = n->minmtu;
909                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
910                 n->mtuprobes = 31;
911         }
912
913         int timeout;
914         if(n->mtuprobes == 31) {
915                 /* After the initial discovery, we only send one >maxmtu probe
916                    to detect PMTU increases. */
917                 if(n->maxmtu + 8 < MTU)
918                         send_udp_probe_packet(n, n->maxmtu + 8);
919         } else {
920                 /* Probes are sent in batches of three, with random sizes between the
921                    lower and upper boundaries for the MTU thus far discovered. */
922                 for (int i = 0; i < 3; i++) {
923                         int len = n->maxmtu;
924                         if(n->minmtu < n->maxmtu)
925                                 len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
926
927                         send_udp_probe_packet(n, MAX(len, 64));
928                 }
929                 n->mtuprobes++;
930         }
931
932         n->probe_counter = 0;
933         n->probe_sent_time = now;
934         n->probe_time = now;
935
936         /* Calculate the packet loss of incoming traffic by comparing the rate of
937            packets received to the rate with which the sequence number has increased.
938            TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
939          */
940
941         if(n->received > n->prev_received)
942                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
943         else
944                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
945
946         n->prev_received_seqno = n->received_seqno;
947         n->prev_received = n->received;
948 }
949
950 // This function tries to establish a tunnel to a node (or its relay) so that packets can be sent (e.g. get SPTPS keys).
951 // If a tunnel is already established, it tries to improve it (e.g. by trying to establish a UDP tunnel instead of TCP).
952 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if TCP and/or UDP is usable.
953 // By calling this function repeatedly, the tunnel is gradually improved until we hit the wall imposed by the underlying network environment.
954 // It is recommended to call this function every time a packet is sent (or intended to be sent) to a node,
955 // so that the tunnel keeps improving as packets flow, and then gracefully downgrades itself as it goes idle.
956 static void try_tx(node_t *n) {
957         /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
958            messages anyway, so there's no need for SPTPS at all. Otherwise, get the keys. */
959         if(n->status.sptps && !(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))) {
960                 try_sptps(n);
961                 if (!n->status.validkey)
962                         return;
963         }
964
965         node_t *via = (n->via == myself) ? n->nexthop : n->via;
966         
967         if((myself->options | via->options) & OPTION_TCPONLY)
968                 return;
969
970         if(!n->status.sptps && !via->status.validkey && via->last_req_key + 10 <= now.tv_sec) {
971                 send_req_key(via);
972                 via->last_req_key = now.tv_sec;
973         } else if(via == n || !n->status.sptps || (via->options >> 24) >= 4) {
974                 try_udp(via);
975                 try_mtu(via);
976         }
977
978         /* If we don't know how to reach "via" yet, then try to reach it through a relay. */
979         if(n->status.sptps && !via->status.udp_confirmed && via->nexthop != via && (via->nexthop->options >> 24) >= 4)
980                 try_tx(via->nexthop);
981 }
982
983 /*
984   send a packet to the given vpn ip.
985 */
986 void send_packet(node_t *n, vpn_packet_t *packet) {
987         node_t *via;
988
989         if(n == myself) {
990                 if(overwrite_mac)
991                          memcpy(DATA(packet), mymac.x, ETH_ALEN);
992                 n->out_packets++;
993                 n->out_bytes += packet->len;
994                 devops.write(packet);
995                 return;
996         }
997
998         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
999                            packet->len, n->name, n->hostname);
1000
1001         if(!n->status.reachable) {
1002                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
1003                                    n->name, n->hostname);
1004                 return;
1005         }
1006
1007         n->out_packets++;
1008         n->out_bytes += packet->len;
1009
1010         if(n->status.sptps) {
1011                 send_sptps_packet(n, packet);
1012                 goto end;
1013         }
1014
1015         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
1016
1017         if(via != n)
1018                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
1019                            n->name, via->name, n->via->hostname);
1020
1021         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
1022                 if(!send_tcppacket(via->connection, packet))
1023                         terminate_connection(via->connection, true);
1024         } else
1025                 send_udppacket(via, packet);
1026
1027 end:
1028         /* Try to improve the tunnel.
1029            Note that we do this *after* we send the packet because sending actual packets take priority
1030            with regard to the send buffer space and latency. */
1031         try_tx(n);
1032 }
1033
1034 /* Broadcast a packet using the minimum spanning tree */
1035
1036 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1037         // Always give ourself a copy of the packet.
1038         if(from != myself)
1039                 send_packet(myself, packet);
1040
1041         // In TunnelServer mode, do not forward broadcast packets.
1042         // The MST might not be valid and create loops.
1043         if(tunnelserver || broadcast_mode == BMODE_NONE)
1044                 return;
1045
1046         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1047                            packet->len, from->name, from->hostname);
1048
1049         switch(broadcast_mode) {
1050                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1051                 // This guarantees all nodes receive the broadcast packet, and
1052                 // usually distributes the sending of broadcast packets over all nodes.
1053                 case BMODE_MST:
1054                         for list_each(connection_t, c, connection_list)
1055                                 if(c->edge && c->status.mst && c != from->nexthop->connection)
1056                                         send_packet(c->node, packet);
1057                         break;
1058
1059                 // In direct mode, we send copies to each node we know of.
1060                 // However, this only reaches nodes that can be reached in a single hop.
1061                 // We don't have enough information to forward broadcast packets in this case.
1062                 case BMODE_DIRECT:
1063                         if(from != myself)
1064                                 break;
1065
1066                         for splay_each(node_t, n, node_tree)
1067                                 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1068                                         send_packet(n, packet);
1069                         break;
1070
1071                 default:
1072                         break;
1073         }
1074 }
1075
1076 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1077         node_t *n = NULL;
1078         bool hard = false;
1079         static time_t last_hard_try = 0;
1080
1081         for splay_each(edge_t, e, edge_weight_tree) {
1082                 if(!e->to->status.reachable || e->to == myself)
1083                         continue;
1084
1085                 if(sockaddrcmp_noport(from, &e->address)) {
1086                         if(last_hard_try == now.tv_sec)
1087                                 continue;
1088                         hard = true;
1089                 }
1090
1091                 if(!try_mac(e->to, pkt))
1092                         continue;
1093
1094                 n = e->to;
1095                 break;
1096         }
1097
1098         if(hard)
1099                 last_hard_try = now.tv_sec;
1100
1101         last_hard_try = now.tv_sec;
1102         return n;
1103 }
1104
1105 void handle_incoming_vpn_data(void *data, int flags) {
1106         listen_socket_t *ls = data;
1107         vpn_packet_t pkt;
1108         char *hostname;
1109         node_id_t nullid = {};
1110         sockaddr_t addr = {};
1111         socklen_t addrlen = sizeof addr;
1112         node_t *from, *to;
1113         bool direct = false;
1114
1115         pkt.offset = 0;
1116         int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1117
1118         if(len <= 0 || len > MAXSIZE) {
1119                 if(!sockwouldblock(sockerrno))
1120                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1121                 return;
1122         }
1123
1124         pkt.len = len;
1125
1126         sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1127
1128         // Try to figure out who sent this packet.
1129
1130         node_t *n = lookup_node_udp(&addr);
1131
1132         if(!n) {
1133                 // It might be from a 1.1 node, which might have a source ID in the packet.
1134                 pkt.offset = 2 * sizeof(node_id_t);
1135                 from = lookup_node_id(SRCID(&pkt));
1136                 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1137                         if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1138                                 n = from;
1139                         else
1140                                 goto skip_harder;
1141                 }
1142         }
1143
1144         if(!n) {
1145                 pkt.offset = 0;
1146                 n = try_harder(&addr, &pkt);
1147         }
1148
1149 skip_harder:
1150         if(!n) {
1151                 if(debug_level >= DEBUG_PROTOCOL) {
1152                         hostname = sockaddr2hostname(&addr);
1153                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1154                         free(hostname);
1155                 }
1156                 return;
1157         }
1158
1159         if(n->status.sptps) {
1160                 pkt.offset = 2 * sizeof(node_id_t);
1161
1162                 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1163                         direct = true;
1164                         from = n;
1165                         to = myself;
1166                 } else {
1167                         from = lookup_node_id(SRCID(&pkt));
1168                         to = lookup_node_id(DSTID(&pkt));
1169                 }
1170                 if(!from || !to) {
1171                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1172                         return;
1173                 }
1174
1175                 if(to != myself) {
1176                         send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1177                         return;
1178                 }
1179         } else {
1180                 direct = true;
1181                 from = n;
1182         }
1183
1184         pkt.offset = 0;
1185         if(!receive_udppacket(from, &pkt))
1186                 return;
1187
1188         n->sock = ls - listen_socket;
1189         if(direct && sockaddrcmp(&addr, &n->address))
1190                 update_node_udp(n, &addr);
1191 }
1192
1193 void handle_device_data(void *data, int flags) {
1194         vpn_packet_t packet;
1195         packet.offset = DEFAULT_PACKET_OFFSET;
1196         packet.priority = 0;
1197
1198         if(devops.read(&packet)) {
1199                 myself->in_packets++;
1200                 myself->in_bytes += packet.len;
1201                 route(myself, &packet);
1202         }
1203 }