Move PMTU discovery code into the TX path.
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "route.h"
46 #include "utils.h"
47 #include "xalloc.h"
48
49 #ifndef MAX
50 #define MAX(a, b) ((a) > (b) ? (a) : (b))
51 #endif
52
53 int keylifetime = 0;
54 #ifdef HAVE_LZO
55 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
56 #endif
57
58 static void send_udppacket(node_t *, vpn_packet_t *);
59
60 unsigned replaywin = 16;
61 bool localdiscovery = true;
62 bool udp_discovery = true;
63 int udp_discovery_interval = 9;
64 int udp_discovery_timeout = 30;
65
66 #define MAX_SEQNO 1073741824
67
68 static void send_udp_probe_packet(node_t *n, int len) {
69         vpn_packet_t packet;
70         packet.offset = DEFAULT_PACKET_OFFSET;
71         memset(DATA(&packet), 0, 14);
72         randomize(DATA(&packet) + 14, len - 14);
73         packet.len = len;
74         packet.priority = 0;
75
76         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
77
78         send_udppacket(n, &packet);
79 }
80
81 // This function tries to determines the MTU of a node.
82 // By calling this function repeatedly, n->minmtu will be progressively increased, and at some point, n->mtu will be fixed to n->minmtu.
83 // If the MTU is already fixed, this function checks if it can be increased.
84 static void try_mtu(node_t *n) {
85         if(!(n->options & OPTION_PMTU_DISCOVERY))
86                 return;
87
88         if(udp_discovery && !n->status.udp_confirmed) {
89                 n->mtuprobes = 0;
90                 n->minmtu = 0;
91                 n->maxmtu = MTU;
92                 return;
93         }
94
95         /* mtuprobes == 0..29: initial discovery, send bursts with 1 second interval, mtuprobes++
96            mtuprobes ==    30: fix MTU, and go to 31
97            mtuprobes ==    31: send one >maxmtu probe every pingtimeout */
98
99         struct timeval now;
100         gettimeofday(&now, NULL);
101         struct timeval elapsed;
102         timersub(&now, &n->probe_sent_time, &elapsed);
103         if(n->mtuprobes < 31) {
104                 if(n->mtuprobes != 0 && elapsed.tv_sec < 1)
105                         return;
106         } else {
107                 if(elapsed.tv_sec < pingtimeout)
108                         return;
109         }
110
111         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
112                 if(n->minmtu > n->maxmtu)
113                         n->minmtu = n->maxmtu;
114                 else
115                         n->maxmtu = n->minmtu;
116                 n->mtu = n->minmtu;
117                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
118                 n->mtuprobes = 31;
119         }
120
121         int timeout;
122         if(n->mtuprobes == 31) {
123                 /* After the initial discovery, we only send one >maxmtu probe
124                    to detect PMTU increases. */
125                 if(n->maxmtu + 8 < MTU)
126                         send_udp_probe_packet(n, n->maxmtu + 8);
127         } else {
128                 /* Probes are sent in batches of three, with random sizes between the
129                    lower and upper boundaries for the MTU thus far discovered. */
130                 for (int i = 0; i < 3; i++) {
131                         int len = n->maxmtu;
132                         if(n->minmtu < n->maxmtu)
133                                 len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
134
135                         send_udp_probe_packet(n, MAX(len, 64));
136                 }
137                 n->mtuprobes++;
138         }
139
140         n->probe_counter = 0;
141         n->probe_sent_time = now;
142         n->probe_time = now;
143
144         /* Calculate the packet loss of incoming traffic by comparing the rate of
145            packets received to the rate with which the sequence number has increased.
146            TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
147          */
148
149         if(n->received > n->prev_received)
150                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
151         else
152                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
153
154         n->prev_received_seqno = n->received_seqno;
155         n->prev_received = n->received;
156 }
157
158 static void udp_probe_timeout_handler(void *data) {
159         node_t *n = data;
160         if(!n->status.udp_confirmed)
161                 return;
162
163         logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname);
164         n->status.udp_confirmed = false;
165         n->mtuprobes = 0;
166         n->minmtu = 0;
167         n->maxmtu = MTU;
168 }
169
170 static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
171         if(!DATA(packet)[0]) {
172                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname);
173
174                 /* It's a probe request, send back a reply */
175
176                 /* Type 2 probe replies were introduced in protocol 17.3 */
177                 if ((n->options >> 24) >= 3) {
178                         uint8_t *data = DATA(packet);
179                         *data++ = 2;
180                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
181                         struct timeval now;
182                         gettimeofday(&now, NULL);
183                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
184                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
185                         packet->len -= 10;
186                 } else {
187                         /* Legacy protocol: n won't understand type 2 probe replies. */
188                         DATA(packet)[0] = 1;
189                 }
190
191                 /* Temporarily set udp_confirmed, so that the reply is sent
192                    back exactly the way it came in. */
193
194                 bool udp_confirmed = n->status.udp_confirmed;
195                 n->status.udp_confirmed = true;
196                 send_udppacket(n, packet);
197                 n->status.udp_confirmed = udp_confirmed;
198         } else {
199                 length_t probelen = len;
200                 if (DATA(packet)[0] == 2) {
201                         if (len < 3)
202                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) UDP probe reply from %s (%s)", n->name, n->hostname);
203                         else {
204                                 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
205                         }
206                 }
207                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d UDP probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
208
209                 /* It's a valid reply: now we know bidirectional communication
210                    is possible using the address and socket that the reply
211                    packet used. */
212                 n->status.udp_confirmed = true;
213
214                 if(udp_discovery) {
215                         timeout_del(&n->udp_ping_timeout);
216                         timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
217                 }
218
219                 if(probelen >= n->maxmtu + 8) {
220                         logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
221                         n->maxmtu = MTU;
222                         n->mtuprobes = 10;
223                         return;
224                 }
225
226                 /* If applicable, raise the minimum supported MTU */
227
228                 if(probelen > n->maxmtu)
229                         probelen = n->maxmtu;
230                 if(n->minmtu < probelen)
231                         n->minmtu = probelen;
232
233                 /* Calculate RTT and bandwidth.
234                    The RTT is the time between the MTU probe burst was sent and the first
235                    reply is received. The bandwidth is measured using the time between the
236                    arrival of the first and third probe reply (or type 2 probe requests).
237                  */
238
239                 struct timeval now, diff;
240                 gettimeofday(&now, NULL);
241                 timersub(&now, &n->probe_time, &diff);
242
243                 struct timeval probe_timestamp = now;
244                 if (DATA(packet)[0] == 2 && packet->len >= 11) {
245                         uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
246                         uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
247                         probe_timestamp.tv_sec = ntohl(sec);
248                         probe_timestamp.tv_usec = ntohl(usec);
249                 }
250                 
251                 n->probe_counter++;
252
253                 if(n->probe_counter == 1) {
254                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
255                         n->probe_time = probe_timestamp;
256                 } else if(n->probe_counter == 3) {
257                         /* TODO: this will never fire after initial MTU discovery. */
258                         struct timeval probe_timestamp_diff;
259                         timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
260                         n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
261                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
262                 }
263         }
264 }
265
266 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
267         if(level == 0) {
268                 memcpy(dest, source, len);
269                 return len;
270         } else if(level == 10) {
271 #ifdef HAVE_LZO
272                 lzo_uint lzolen = MAXSIZE;
273                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
274                 return lzolen;
275 #else
276                 return -1;
277 #endif
278         } else if(level < 10) {
279 #ifdef HAVE_ZLIB
280                 unsigned long destlen = MAXSIZE;
281                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
282                         return destlen;
283                 else
284 #endif
285                         return -1;
286         } else {
287 #ifdef HAVE_LZO
288                 lzo_uint lzolen = MAXSIZE;
289                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
290                 return lzolen;
291 #else
292                 return -1;
293 #endif
294         }
295
296         return -1;
297 }
298
299 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
300         if(level == 0) {
301                 memcpy(dest, source, len);
302                 return len;
303         } else if(level > 9) {
304 #ifdef HAVE_LZO
305                 lzo_uint lzolen = MAXSIZE;
306                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
307                         return lzolen;
308                 else
309 #endif
310                         return -1;
311         }
312 #ifdef HAVE_ZLIB
313         else {
314                 unsigned long destlen = MAXSIZE;
315                 if(uncompress(dest, &destlen, source, len) == Z_OK)
316                         return destlen;
317                 else
318                         return -1;
319         }
320 #endif
321
322         return -1;
323 }
324
325 /* VPN packet I/O */
326
327 static void receive_packet(node_t *n, vpn_packet_t *packet) {
328         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
329                            packet->len, n->name, n->hostname);
330
331         n->in_packets++;
332         n->in_bytes += packet->len;
333
334         route(n, packet);
335 }
336
337 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
338         if(n->status.sptps)
339                 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
340
341 #ifdef DISABLE_LEGACY
342         return false;
343 #else
344         if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
345                 return false;
346
347         return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
348 #endif
349 }
350
351 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
352         vpn_packet_t pkt1, pkt2;
353         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
354         int nextpkt = 0;
355         size_t outlen;
356         pkt1.offset = DEFAULT_PACKET_OFFSET;
357         pkt2.offset = DEFAULT_PACKET_OFFSET;
358
359         if(n->status.sptps) {
360                 if(!n->sptps.state) {
361                         if(!n->status.waitingforkey) {
362                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
363                                 send_req_key(n);
364                         } else {
365                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
366                         }
367                         return false;
368                 }
369                 inpkt->offset += 2 * sizeof(node_id_t);
370                 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
371                         logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
372                         return false;
373                 }
374                 return true;
375         }
376
377 #ifdef DISABLE_LEGACY
378         return false;
379 #else
380         if(!n->status.validkey) {
381                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
382                 return false;
383         }
384
385         /* Check packet length */
386
387         if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
388                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
389                                         n->name, n->hostname);
390                 return false;
391         }
392
393         /* It's a legacy UDP packet, the data starts after the seqno */
394
395         inpkt->offset += sizeof(seqno_t);
396
397         /* Check the message authentication code */
398
399         if(digest_active(n->indigest)) {
400                 inpkt->len -= digest_length(n->indigest);
401                 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
402                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
403                         return false;
404                 }
405         }
406         /* Decrypt the packet */
407
408         if(cipher_active(n->incipher)) {
409                 vpn_packet_t *outpkt = pkt[nextpkt++];
410                 outlen = MAXSIZE;
411
412                 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
413                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
414                         return false;
415                 }
416
417                 outpkt->len = outlen;
418                 inpkt = outpkt;
419         }
420
421         /* Check the sequence number */
422
423         seqno_t seqno;
424         memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
425         seqno = ntohl(seqno);
426         inpkt->len -= sizeof seqno;
427
428         if(replaywin) {
429                 if(seqno != n->received_seqno + 1) {
430                         if(seqno >= n->received_seqno + replaywin * 8) {
431                                 if(n->farfuture++ < replaywin >> 2) {
432                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
433                                                 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
434                                         return false;
435                                 }
436                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
437                                                 seqno - n->received_seqno - 1, n->name, n->hostname);
438                                 memset(n->late, 0, replaywin);
439                         } else if (seqno <= n->received_seqno) {
440                                 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
441                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
442                                                 n->name, n->hostname, seqno, n->received_seqno);
443                                         return false;
444                                 }
445                         } else {
446                                 for(int i = n->received_seqno + 1; i < seqno; i++)
447                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
448                         }
449                 }
450
451                 n->farfuture = 0;
452                 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
453         }
454
455         if(seqno > n->received_seqno)
456                 n->received_seqno = seqno;
457
458         n->received++;
459
460         if(n->received_seqno > MAX_SEQNO)
461                 regenerate_key();
462
463         /* Decompress the packet */
464
465         length_t origlen = inpkt->len;
466
467         if(n->incompression) {
468                 vpn_packet_t *outpkt = pkt[nextpkt++];
469
470                 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
471                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
472                                                  n->name, n->hostname);
473                         return false;
474                 }
475
476                 inpkt = outpkt;
477
478                 origlen -= MTU/64 + 20;
479         }
480
481         inpkt->priority = 0;
482
483         if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
484                 udp_probe_h(n, inpkt, origlen);
485         else
486                 receive_packet(n, inpkt);
487         return true;
488 #endif
489 }
490
491 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
492         vpn_packet_t outpkt;
493         outpkt.offset = DEFAULT_PACKET_OFFSET;
494
495         if(len > sizeof outpkt.data - outpkt.offset)
496                 return;
497
498         outpkt.len = len;
499         if(c->options & OPTION_TCPONLY)
500                 outpkt.priority = 0;
501         else
502                 outpkt.priority = -1;
503         memcpy(DATA(&outpkt), buffer, len);
504
505         receive_packet(c->node, &outpkt);
506 }
507
508 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
509         if(!n->status.validkey && !n->connection)
510                 return;
511
512         uint8_t type = 0;
513         int offset = 0;
514
515         if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
516                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
517                 return;
518         }
519
520         if(routing_mode == RMODE_ROUTER)
521                 offset = 14;
522         else
523                 type = PKT_MAC;
524
525         if(origpkt->len < offset)
526                 return;
527
528         vpn_packet_t outpkt;
529
530         if(n->outcompression) {
531                 outpkt.offset = 0;
532                 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
533                 if(len < 0) {
534                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
535                 } else if(len < origpkt->len - offset) {
536                         outpkt.len = len + offset;
537                         origpkt = &outpkt;
538                         type |= PKT_COMPRESSED;
539                 }
540         }
541
542         /* If we have a direct metaconnection to n, and we can't use UDP, then
543            don't bother with SPTPS and just use a "plaintext" PACKET message.
544            We don't really care about end-to-end security since we're not
545            sending the message through any intermediate nodes. */
546         if(n->connection && origpkt->len > n->minmtu)
547                 send_tcppacket(n->connection, origpkt);
548         else
549                 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
550         return;
551 }
552
553 static void adapt_socket(const sockaddr_t *sa, int *sock) {
554         /* Make sure we have a suitable socket for the chosen address */
555         if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
556                 for(int i = 0; i < listen_sockets; i++) {
557                         if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
558                                 *sock = i;
559                                 break;
560                         }
561                 }
562         }
563 }
564
565 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
566         /* Latest guess */
567         *sa = &n->address;
568         *sock = n->sock;
569
570         /* If the UDP address is confirmed, use it. */
571         if(n->status.udp_confirmed)
572                 return;
573
574         /* Send every third packet to n->address; that could be set
575            to the node's reflexive UDP address discovered during key
576            exchange. */
577
578         static int x = 0;
579         if(++x >= 3) {
580                 x = 0;
581                 return;
582         }
583
584         /* Otherwise, address are found in edges to this node.
585            So we pick a random edge and a random socket. */
586
587         int i = 0;
588         int j = rand() % n->edge_tree->count;
589         edge_t *candidate = NULL;
590
591         for splay_each(edge_t, e, n->edge_tree) {
592                 if(i++ == j) {
593                         candidate = e->reverse;
594                         break;
595                 }
596         }
597
598         if(candidate) {
599                 *sa = &candidate->address;
600                 *sock = rand() % listen_sockets;
601         }
602
603         adapt_socket(*sa, sock);
604 }
605
606 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
607         *sa = NULL;
608
609         /* Pick one of the edges from this node at random, then use its local address. */
610
611         int i = 0;
612         int j = rand() % n->edge_tree->count;
613         edge_t *candidate = NULL;
614
615         for splay_each(edge_t, e, n->edge_tree) {
616                 if(i++ == j) {
617                         candidate = e;
618                         break;
619                 }
620         }
621
622         if (candidate && candidate->local_address.sa.sa_family) {
623                 *sa = &candidate->local_address;
624                 *sock = rand() % listen_sockets;
625                 adapt_socket(*sa, sock);
626         }
627 }
628
629 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
630         vpn_packet_t pkt1, pkt2;
631         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
632         vpn_packet_t *inpkt = origpkt;
633         int nextpkt = 0;
634         vpn_packet_t *outpkt;
635         int origlen = origpkt->len;
636         size_t outlen;
637 #if defined(SOL_IP) && defined(IP_TOS)
638         static int priority = 0;
639         int origpriority = origpkt->priority;
640 #endif
641
642         pkt1.offset = DEFAULT_PACKET_OFFSET;
643         pkt2.offset = DEFAULT_PACKET_OFFSET;
644
645         if(!n->status.reachable) {
646                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
647                 return;
648         }
649
650         if(n->status.sptps)
651                 return send_sptps_packet(n, origpkt);
652
653 #ifdef DISABLE_LEGACY
654         return;
655 #else
656         /* Make sure we have a valid key */
657
658         if(!n->status.validkey) {
659                 logger(DEBUG_TRAFFIC, LOG_INFO,
660                                    "No valid key known yet for %s (%s), forwarding via TCP",
661                                    n->name, n->hostname);
662                 send_tcppacket(n->nexthop->connection, origpkt);
663                 return;
664         }
665
666         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
667                 logger(DEBUG_TRAFFIC, LOG_INFO,
668                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
669                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
670
671                 if(n != n->nexthop)
672                         send_packet(n->nexthop, origpkt);
673                 else
674                         send_tcppacket(n->nexthop->connection, origpkt);
675
676                 return;
677         }
678
679         /* Compress the packet */
680
681         if(n->outcompression) {
682                 outpkt = pkt[nextpkt++];
683
684                 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
685                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
686                                    n->name, n->hostname);
687                         return;
688                 }
689
690                 inpkt = outpkt;
691         }
692
693         /* Add sequence number */
694
695         seqno_t seqno = htonl(++(n->sent_seqno));
696         memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
697         inpkt->len += sizeof seqno;
698
699         /* Encrypt the packet */
700
701         if(cipher_active(n->outcipher)) {
702                 outpkt = pkt[nextpkt++];
703                 outlen = MAXSIZE;
704
705                 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
706                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
707                         goto end;
708                 }
709
710                 outpkt->len = outlen;
711                 inpkt = outpkt;
712         }
713
714         /* Add the message authentication code */
715
716         if(digest_active(n->outdigest)) {
717                 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
718                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
719                         goto end;
720                 }
721
722                 inpkt->len += digest_length(n->outdigest);
723         }
724
725         /* Send the packet */
726
727         const sockaddr_t *sa = NULL;
728         int sock;
729
730         if(n->status.send_locally)
731                 choose_local_address(n, &sa, &sock);
732         if(!sa)
733                 choose_udp_address(n, &sa, &sock);
734
735 #if defined(SOL_IP) && defined(IP_TOS)
736         if(priorityinheritance && origpriority != priority
737            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
738                 priority = origpriority;
739                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
740                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
741                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
742         }
743 #endif
744
745         if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
746                 if(sockmsgsize(sockerrno)) {
747                         if(n->maxmtu >= origlen)
748                                 n->maxmtu = origlen - 1;
749                         if(n->mtu >= origlen)
750                                 n->mtu = origlen - 1;
751                 } else
752                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
753         }
754
755 end:
756         origpkt->len = origlen;
757 #endif
758 }
759
760 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
761         node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
762         bool direct = from == myself && to == relay;
763         bool relay_supported = (relay->options >> 24) >= 4;
764         bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
765
766         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
767            TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
768                  This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
769
770         if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
771                 char buf[len * 4 / 3 + 5];
772                 b64encode(data, buf, len);
773                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
774                    to ensure we get to learn the reflexive UDP address. */
775                 if(from == myself && !to->status.validkey) {
776                         to->incompression = myself->incompression;
777                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
778                 } else {
779                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
780                 }
781         }
782
783         size_t overhead = 0;
784         if(relay_supported) overhead += sizeof to->id + sizeof from->id;
785         char buf[len + overhead]; char* buf_ptr = buf;
786         if(relay_supported) {
787                 if(direct) {
788                         /* Inform the recipient that this packet was sent directly. */
789                         node_id_t nullid = {};
790                         memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
791                 } else {
792                         memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
793                 }
794                 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
795
796         }
797         /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
798         memcpy(buf_ptr, data, len); buf_ptr += len;
799
800         const sockaddr_t *sa = NULL;
801         int sock;
802         if(relay->status.send_locally)
803                 choose_local_address(relay, &sa, &sock);
804         if(!sa)
805                 choose_udp_address(relay, &sa, &sock);
806         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
807         if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
808                 if(sockmsgsize(sockerrno)) {
809                         // Compensate for SPTPS overhead
810                         len -= SPTPS_DATAGRAM_OVERHEAD;
811                         if(relay->maxmtu >= len)
812                                 relay->maxmtu = len - 1;
813                         if(relay->mtu >= len)
814                                 relay->mtu = len - 1;
815                 } else {
816                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
817                         return false;
818                 }
819         }
820
821         return true;
822 }
823
824 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
825         return send_sptps_data_priv(handle, myself, type, data, len);
826 }
827
828 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
829         node_t *from = handle;
830
831         if(type == SPTPS_HANDSHAKE) {
832                 if(!from->status.validkey) {
833                         from->status.validkey = true;
834                         from->status.waitingforkey = false;
835                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
836                 }
837                 return true;
838         }
839
840         if(len > MTU) {
841                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
842                 return false;
843         }
844
845         vpn_packet_t inpkt;
846         inpkt.offset = DEFAULT_PACKET_OFFSET;
847
848         if(type == PKT_PROBE) {
849                 inpkt.len = len;
850                 memcpy(DATA(&inpkt), data, len);
851                 udp_probe_h(from, &inpkt, len);
852                 return true;
853         }
854
855         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
856                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
857                 return false;
858         }
859
860         /* Check if we have the headers we need */
861         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
862                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
863                 return false;
864         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
865                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
866         }
867
868         int offset = (type & PKT_MAC) ? 0 : 14;
869         if(type & PKT_COMPRESSED) {
870                 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
871                 if(ulen < 0) {
872                         return false;
873                 } else {
874                         inpkt.len = ulen + offset;
875                 }
876                 if(inpkt.len > MAXSIZE)
877                         abort();
878         } else {
879                 memcpy(DATA(&inpkt) + offset, data, len);
880                 inpkt.len = len + offset;
881         }
882
883         /* Generate the Ethernet packet type if necessary */
884         if(offset) {
885                 switch(DATA(&inpkt)[14] >> 4) {
886                         case 4:
887                                 DATA(&inpkt)[12] = 0x08;
888                                 DATA(&inpkt)[13] = 0x00;
889                                 break;
890                         case 6:
891                                 DATA(&inpkt)[12] = 0x86;
892                                 DATA(&inpkt)[13] = 0xDD;
893                                 break;
894                         default:
895                                 logger(DEBUG_TRAFFIC, LOG_ERR,
896                                                    "Unknown IP version %d while reading packet from %s (%s)",
897                                                    DATA(&inpkt)[14] >> 4, from->name, from->hostname);
898                                 return false;
899                 }
900         }
901
902         receive_packet(from, &inpkt);
903         return true;
904 }
905
906 // This function tries to get SPTPS keys, if they aren't already known.
907 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
908 static void try_sptps(node_t *n) {
909         if(n->status.validkey)
910                 return;
911
912         logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
913
914         if(!n->status.waitingforkey)
915                 send_req_key(n);
916         else if(n->last_req_key + 10 < now.tv_sec) {
917                 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
918                 sptps_stop(&n->sptps);
919                 n->status.waitingforkey = false;
920                 send_req_key(n);
921         }
922
923         return;
924 }
925
926 // This function tries to establish a UDP tunnel to a node so that packets can be sent.
927 // If a tunnel is already established, it makes sure it stays up.
928 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
929 static void try_udp(node_t* n) {
930         if(!udp_discovery)
931                 return;
932
933         struct timeval now;
934         gettimeofday(&now, NULL);
935         struct timeval ping_tx_elapsed;
936         timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
937
938         if(ping_tx_elapsed.tv_sec >= udp_discovery_interval) {
939                 send_udp_probe_packet(n, MAX(n->minmtu, 16));
940                 n->udp_ping_sent = now;
941
942                 if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
943                         n->status.send_locally = true;
944                         send_udp_probe_packet(n, 16);
945                         n->status.send_locally = false;
946                 }
947         }
948 }
949
950 // This function tries to establish a tunnel to a node (or its relay) so that packets can be sent (e.g. get SPTPS keys).
951 // If a tunnel is already established, it tries to improve it (e.g. by trying to establish a UDP tunnel instead of TCP).
952 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if TCP and/or UDP is usable.
953 // By calling this function repeatedly, the tunnel is gradually improved until we hit the wall imposed by the underlying network environment.
954 // It is recommended to call this function every time a packet is sent (or intended to be sent) to a node,
955 // so that the tunnel keeps improving as packets flow, and then gracefully downgrades itself as it goes idle.
956 static void try_tx(node_t *n) {
957         /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
958            messages anyway, so there's no need for SPTPS at all. Otherwise, get the keys. */
959         if(n->status.sptps && !(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))) {
960                 try_sptps(n);
961                 if (!n->status.validkey)
962                         return;
963         }
964
965         node_t *via = (n->via == myself) ? n->nexthop : n->via;
966         
967         if((myself->options | via->options) & OPTION_TCPONLY)
968                 return;
969
970         if(!n->status.sptps && !via->status.validkey && via->last_req_key + 10 <= now.tv_sec) {
971                 send_req_key(via);
972                 via->last_req_key = now.tv_sec;
973         } else if(via == n || !n->status.sptps || (via->options >> 24) >= 4) {
974                 try_udp(via);
975                 try_mtu(via);
976         }
977
978         /* If we don't know how to reach "via" yet, then try to reach it through a relay. */
979         if(n->status.sptps && !via->status.udp_confirmed && via->nexthop != via && (via->nexthop->options >> 24) >= 4)
980                 try_tx(via->nexthop);
981 }
982
983 /*
984   send a packet to the given vpn ip.
985 */
986 void send_packet(node_t *n, vpn_packet_t *packet) {
987         node_t *via;
988
989         if(n == myself) {
990                 if(overwrite_mac)
991                          memcpy(DATA(packet), mymac.x, ETH_ALEN);
992                 n->out_packets++;
993                 n->out_bytes += packet->len;
994                 devops.write(packet);
995                 return;
996         }
997
998         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
999                            packet->len, n->name, n->hostname);
1000
1001         if(!n->status.reachable) {
1002                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
1003                                    n->name, n->hostname);
1004                 return;
1005         }
1006
1007         n->out_packets++;
1008         n->out_bytes += packet->len;
1009
1010         if(n->status.sptps) {
1011                 send_sptps_packet(n, packet);
1012                 goto end;
1013         }
1014
1015         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
1016
1017         if(via != n)
1018                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
1019                            n->name, via->name, n->via->hostname);
1020
1021         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
1022                 if(!send_tcppacket(via->connection, packet))
1023                         terminate_connection(via->connection, true);
1024         } else
1025                 send_udppacket(via, packet);
1026
1027 end:
1028         /* Try to improve the tunnel.
1029            Note that we do this *after* we send the packet because sending actual packets take priority
1030            with regard to the send buffer space and latency. */
1031         try_tx(n);
1032 }
1033
1034 /* Broadcast a packet using the minimum spanning tree */
1035
1036 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1037         // Always give ourself a copy of the packet.
1038         if(from != myself)
1039                 send_packet(myself, packet);
1040
1041         // In TunnelServer mode, do not forward broadcast packets.
1042         // The MST might not be valid and create loops.
1043         if(tunnelserver || broadcast_mode == BMODE_NONE)
1044                 return;
1045
1046         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1047                            packet->len, from->name, from->hostname);
1048
1049         switch(broadcast_mode) {
1050                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1051                 // This guarantees all nodes receive the broadcast packet, and
1052                 // usually distributes the sending of broadcast packets over all nodes.
1053                 case BMODE_MST:
1054                         for list_each(connection_t, c, connection_list)
1055                                 if(c->edge && c->status.mst && c != from->nexthop->connection)
1056                                         send_packet(c->node, packet);
1057                         break;
1058
1059                 // In direct mode, we send copies to each node we know of.
1060                 // However, this only reaches nodes that can be reached in a single hop.
1061                 // We don't have enough information to forward broadcast packets in this case.
1062                 case BMODE_DIRECT:
1063                         if(from != myself)
1064                                 break;
1065
1066                         for splay_each(node_t, n, node_tree)
1067                                 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1068                                         send_packet(n, packet);
1069                         break;
1070
1071                 default:
1072                         break;
1073         }
1074 }
1075
1076 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1077         node_t *n = NULL;
1078         bool hard = false;
1079         static time_t last_hard_try = 0;
1080
1081         for splay_each(edge_t, e, edge_weight_tree) {
1082                 if(!e->to->status.reachable || e->to == myself)
1083                         continue;
1084
1085                 if(sockaddrcmp_noport(from, &e->address)) {
1086                         if(last_hard_try == now.tv_sec)
1087                                 continue;
1088                         hard = true;
1089                 }
1090
1091                 if(!try_mac(e->to, pkt))
1092                         continue;
1093
1094                 n = e->to;
1095                 break;
1096         }
1097
1098         if(hard)
1099                 last_hard_try = now.tv_sec;
1100
1101         last_hard_try = now.tv_sec;
1102         return n;
1103 }
1104
1105 void handle_incoming_vpn_data(void *data, int flags) {
1106         listen_socket_t *ls = data;
1107         vpn_packet_t pkt;
1108         char *hostname;
1109         node_id_t nullid = {};
1110         sockaddr_t addr = {};
1111         socklen_t addrlen = sizeof addr;
1112         node_t *from, *to;
1113         bool direct = false;
1114
1115         pkt.offset = 0;
1116         int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1117
1118         if(len <= 0 || len > MAXSIZE) {
1119                 if(!sockwouldblock(sockerrno))
1120                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1121                 return;
1122         }
1123
1124         pkt.len = len;
1125
1126         sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1127
1128         // Try to figure out who sent this packet.
1129
1130         node_t *n = lookup_node_udp(&addr);
1131
1132         if(!n) {
1133                 // It might be from a 1.1 node, which might have a source ID in the packet.
1134                 pkt.offset = 2 * sizeof(node_id_t);
1135                 from = lookup_node_id(SRCID(&pkt));
1136                 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1137                         if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1138                                 n = from;
1139                         else
1140                                 goto skip_harder;
1141                 }
1142         }
1143
1144         if(!n) {
1145                 pkt.offset = 0;
1146                 n = try_harder(&addr, &pkt);
1147         }
1148
1149 skip_harder:
1150         if(!n) {
1151                 if(debug_level >= DEBUG_PROTOCOL) {
1152                         hostname = sockaddr2hostname(&addr);
1153                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1154                         free(hostname);
1155                 }
1156                 return;
1157         }
1158
1159         if(n->status.sptps) {
1160                 pkt.offset = 2 * sizeof(node_id_t);
1161
1162                 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1163                         direct = true;
1164                         from = n;
1165                         to = myself;
1166                 } else {
1167                         from = lookup_node_id(SRCID(&pkt));
1168                         to = lookup_node_id(DSTID(&pkt));
1169                 }
1170                 if(!from || !to) {
1171                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1172                         return;
1173                 }
1174
1175                 if(to != myself) {
1176                         send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1177                         return;
1178                 }
1179         } else {
1180                 direct = true;
1181                 from = n;
1182         }
1183
1184         pkt.offset = 0;
1185         if(!receive_udppacket(from, &pkt))
1186                 return;
1187
1188         n->sock = ls - listen_socket;
1189         if(direct && sockaddrcmp(&addr, &n->address))
1190                 update_node_udp(n, &addr);
1191 }
1192
1193 void handle_device_data(void *data, int flags) {
1194         vpn_packet_t packet;
1195         packet.offset = DEFAULT_PACKET_OFFSET;
1196         packet.priority = 0;
1197
1198         if(devops.read(&packet)) {
1199                 myself->in_packets++;
1200                 myself->in_bytes += packet.len;
1201                 route(myself, &packet);
1202         }
1203 }