Remove PMTU discovery code redundant with UDP discovery.
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "route.h"
46 #include "utils.h"
47 #include "xalloc.h"
48
49 #ifndef MAX
50 #define MAX(a, b) ((a) > (b) ? (a) : (b))
51 #endif
52
53 int keylifetime = 0;
54 #ifdef HAVE_LZO
55 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
56 #endif
57
58 static void send_udppacket(node_t *, vpn_packet_t *);
59
60 unsigned replaywin = 16;
61 bool localdiscovery = true;
62 bool udp_discovery = true;
63 int udp_discovery_interval = 9;
64 int udp_discovery_timeout = 30;
65
66 #define MAX_SEQNO 1073741824
67
68 static void send_udp_probe_packet(node_t *n, int len) {
69         vpn_packet_t packet;
70         packet.offset = DEFAULT_PACKET_OFFSET;
71         memset(DATA(&packet), 0, 14);
72         randomize(DATA(&packet) + 14, len - 14);
73         packet.len = len;
74         packet.priority = 0;
75
76         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
77
78         send_udppacket(n, &packet);
79 }
80
81 static void send_mtu_probe_handler(void *data) {
82         node_t *n = data;
83
84         if(!n->status.reachable || !n->status.validkey) {
85                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
86                 n->mtuprobes = 0;
87                 return;
88         }
89
90         /* mtuprobes == 0..29: initial discovery, send bursts with 1 second interval, mtuprobes++
91            mtuprobes ==    30: fix MTU, and go to 31
92            mtuprobes ==    31: send one >maxmtu probe every pingtimeout */
93
94         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
95                 if(n->minmtu > n->maxmtu)
96                         n->minmtu = n->maxmtu;
97                 else
98                         n->maxmtu = n->minmtu;
99                 n->mtu = n->minmtu;
100                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
101                 n->mtuprobes = 31;
102         }
103
104         int timeout;
105         if(n->mtuprobes == 31) {
106                 /* After the initial discovery, we only send one >maxmtu probe
107                    to detect PMTU increases. */
108                 if(n->maxmtu + 8 < MTU)
109                         send_udp_probe_packet(n, n->maxmtu + 8);
110                 timeout = pingtimeout;
111         } else {
112                 /* Probes are sent in batches of three, with random sizes between the
113                    lower and upper boundaries for the MTU thus far discovered. */
114                 for (int i = 0; i < 3; i++) {
115                         int len = n->maxmtu;
116                         if(n->minmtu < n->maxmtu)
117                                 len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
118
119                         send_udp_probe_packet(n, MAX(len, 64));
120                 }
121                 timeout = 1;
122                 n->mtuprobes++;
123         }
124
125         n->probe_counter = 0;
126         gettimeofday(&n->probe_time, NULL);
127
128         /* Calculate the packet loss of incoming traffic by comparing the rate of
129            packets received to the rate with which the sequence number has increased.
130            TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
131          */
132
133         if(n->received > n->prev_received)
134                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
135         else
136                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
137
138         n->prev_received_seqno = n->received_seqno;
139         n->prev_received = n->received;
140
141         timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
142 }
143
144 void send_mtu_probe(node_t *n) {
145         timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
146         send_mtu_probe_handler(n);
147 }
148
149 static void udp_probe_timeout_handler(void *data) {
150         node_t *n = data;
151         if(!n->status.udp_confirmed)
152                 return;
153
154         logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname);
155         n->status.udp_confirmed = false;
156         n->mtuprobes = 0;
157         n->minmtu = 0;
158         n->maxmtu = MTU;
159 }
160
161 static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
162         if(!DATA(packet)[0]) {
163                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname);
164
165                 /* It's a probe request, send back a reply */
166
167                 /* Type 2 probe replies were introduced in protocol 17.3 */
168                 if ((n->options >> 24) >= 3) {
169                         uint8_t *data = DATA(packet);
170                         *data++ = 2;
171                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
172                         struct timeval now;
173                         gettimeofday(&now, NULL);
174                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
175                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
176                         packet->len -= 10;
177                 } else {
178                         /* Legacy protocol: n won't understand type 2 probe replies. */
179                         DATA(packet)[0] = 1;
180                 }
181
182                 /* Temporarily set udp_confirmed, so that the reply is sent
183                    back exactly the way it came in. */
184
185                 bool udp_confirmed = n->status.udp_confirmed;
186                 n->status.udp_confirmed = true;
187                 send_udppacket(n, packet);
188                 n->status.udp_confirmed = udp_confirmed;
189         } else {
190                 length_t probelen = len;
191                 if (DATA(packet)[0] == 2) {
192                         if (len < 3)
193                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) UDP probe reply from %s (%s)", n->name, n->hostname);
194                         else {
195                                 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
196                         }
197                 }
198                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d UDP probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
199
200                 /* It's a valid reply: now we know bidirectional communication
201                    is possible using the address and socket that the reply
202                    packet used. */
203                 n->status.udp_confirmed = true;
204
205                 if(udp_discovery) {
206                         timeout_del(&n->udp_ping_timeout);
207                         timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
208                 }
209
210                 if(probelen >= n->maxmtu + 8) {
211                         logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
212                         n->maxmtu = MTU;
213                         n->mtuprobes = 10;
214                         return;
215                 }
216
217                 /* If applicable, raise the minimum supported MTU */
218
219                 if(probelen > n->maxmtu)
220                         probelen = n->maxmtu;
221                 if(n->minmtu < probelen)
222                         n->minmtu = probelen;
223
224                 /* Calculate RTT and bandwidth.
225                    The RTT is the time between the MTU probe burst was sent and the first
226                    reply is received. The bandwidth is measured using the time between the
227                    arrival of the first and third probe reply (or type 2 probe requests).
228                  */
229
230                 struct timeval now, diff;
231                 gettimeofday(&now, NULL);
232                 timersub(&now, &n->probe_time, &diff);
233
234                 struct timeval probe_timestamp = now;
235                 if (DATA(packet)[0] == 2 && packet->len >= 11) {
236                         uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
237                         uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
238                         probe_timestamp.tv_sec = ntohl(sec);
239                         probe_timestamp.tv_usec = ntohl(usec);
240                 }
241                 
242                 n->probe_counter++;
243
244                 if(n->probe_counter == 1) {
245                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
246                         n->probe_time = probe_timestamp;
247                 } else if(n->probe_counter == 3) {
248                         /* TODO: this will never fire after initial MTU discovery. */
249                         struct timeval probe_timestamp_diff;
250                         timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
251                         n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
252                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
253                 }
254         }
255 }
256
257 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
258         if(level == 0) {
259                 memcpy(dest, source, len);
260                 return len;
261         } else if(level == 10) {
262 #ifdef HAVE_LZO
263                 lzo_uint lzolen = MAXSIZE;
264                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
265                 return lzolen;
266 #else
267                 return -1;
268 #endif
269         } else if(level < 10) {
270 #ifdef HAVE_ZLIB
271                 unsigned long destlen = MAXSIZE;
272                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
273                         return destlen;
274                 else
275 #endif
276                         return -1;
277         } else {
278 #ifdef HAVE_LZO
279                 lzo_uint lzolen = MAXSIZE;
280                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
281                 return lzolen;
282 #else
283                 return -1;
284 #endif
285         }
286
287         return -1;
288 }
289
290 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
291         if(level == 0) {
292                 memcpy(dest, source, len);
293                 return len;
294         } else if(level > 9) {
295 #ifdef HAVE_LZO
296                 lzo_uint lzolen = MAXSIZE;
297                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
298                         return lzolen;
299                 else
300 #endif
301                         return -1;
302         }
303 #ifdef HAVE_ZLIB
304         else {
305                 unsigned long destlen = MAXSIZE;
306                 if(uncompress(dest, &destlen, source, len) == Z_OK)
307                         return destlen;
308                 else
309                         return -1;
310         }
311 #endif
312
313         return -1;
314 }
315
316 /* VPN packet I/O */
317
318 static void receive_packet(node_t *n, vpn_packet_t *packet) {
319         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
320                            packet->len, n->name, n->hostname);
321
322         n->in_packets++;
323         n->in_bytes += packet->len;
324
325         route(n, packet);
326 }
327
328 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
329         if(n->status.sptps)
330                 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
331
332 #ifdef DISABLE_LEGACY
333         return false;
334 #else
335         if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
336                 return false;
337
338         return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
339 #endif
340 }
341
342 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
343         vpn_packet_t pkt1, pkt2;
344         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
345         int nextpkt = 0;
346         size_t outlen;
347         pkt1.offset = DEFAULT_PACKET_OFFSET;
348         pkt2.offset = DEFAULT_PACKET_OFFSET;
349
350         if(n->status.sptps) {
351                 if(!n->sptps.state) {
352                         if(!n->status.waitingforkey) {
353                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
354                                 send_req_key(n);
355                         } else {
356                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
357                         }
358                         return false;
359                 }
360                 inpkt->offset += 2 * sizeof(node_id_t);
361                 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
362                         logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
363                         return false;
364                 }
365                 return true;
366         }
367
368 #ifdef DISABLE_LEGACY
369         return false;
370 #else
371         if(!n->status.validkey) {
372                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
373                 return false;
374         }
375
376         /* Check packet length */
377
378         if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
379                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
380                                         n->name, n->hostname);
381                 return false;
382         }
383
384         /* It's a legacy UDP packet, the data starts after the seqno */
385
386         inpkt->offset += sizeof(seqno_t);
387
388         /* Check the message authentication code */
389
390         if(digest_active(n->indigest)) {
391                 inpkt->len -= digest_length(n->indigest);
392                 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
393                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
394                         return false;
395                 }
396         }
397         /* Decrypt the packet */
398
399         if(cipher_active(n->incipher)) {
400                 vpn_packet_t *outpkt = pkt[nextpkt++];
401                 outlen = MAXSIZE;
402
403                 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
404                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
405                         return false;
406                 }
407
408                 outpkt->len = outlen;
409                 inpkt = outpkt;
410         }
411
412         /* Check the sequence number */
413
414         seqno_t seqno;
415         memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
416         seqno = ntohl(seqno);
417         inpkt->len -= sizeof seqno;
418
419         if(replaywin) {
420                 if(seqno != n->received_seqno + 1) {
421                         if(seqno >= n->received_seqno + replaywin * 8) {
422                                 if(n->farfuture++ < replaywin >> 2) {
423                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
424                                                 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
425                                         return false;
426                                 }
427                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
428                                                 seqno - n->received_seqno - 1, n->name, n->hostname);
429                                 memset(n->late, 0, replaywin);
430                         } else if (seqno <= n->received_seqno) {
431                                 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
432                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
433                                                 n->name, n->hostname, seqno, n->received_seqno);
434                                         return false;
435                                 }
436                         } else {
437                                 for(int i = n->received_seqno + 1; i < seqno; i++)
438                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
439                         }
440                 }
441
442                 n->farfuture = 0;
443                 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
444         }
445
446         if(seqno > n->received_seqno)
447                 n->received_seqno = seqno;
448
449         n->received++;
450
451         if(n->received_seqno > MAX_SEQNO)
452                 regenerate_key();
453
454         /* Decompress the packet */
455
456         length_t origlen = inpkt->len;
457
458         if(n->incompression) {
459                 vpn_packet_t *outpkt = pkt[nextpkt++];
460
461                 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
462                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
463                                                  n->name, n->hostname);
464                         return false;
465                 }
466
467                 inpkt = outpkt;
468
469                 origlen -= MTU/64 + 20;
470         }
471
472         inpkt->priority = 0;
473
474         if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
475                 udp_probe_h(n, inpkt, origlen);
476         else
477                 receive_packet(n, inpkt);
478         return true;
479 #endif
480 }
481
482 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
483         vpn_packet_t outpkt;
484         outpkt.offset = DEFAULT_PACKET_OFFSET;
485
486         if(len > sizeof outpkt.data - outpkt.offset)
487                 return;
488
489         outpkt.len = len;
490         if(c->options & OPTION_TCPONLY)
491                 outpkt.priority = 0;
492         else
493                 outpkt.priority = -1;
494         memcpy(DATA(&outpkt), buffer, len);
495
496         receive_packet(c->node, &outpkt);
497 }
498
499 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
500         if(!n->status.validkey && !n->connection)
501                 return;
502
503         uint8_t type = 0;
504         int offset = 0;
505
506         if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
507                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
508                 return;
509         }
510
511         if(routing_mode == RMODE_ROUTER)
512                 offset = 14;
513         else
514                 type = PKT_MAC;
515
516         if(origpkt->len < offset)
517                 return;
518
519         vpn_packet_t outpkt;
520
521         if(n->outcompression) {
522                 outpkt.offset = 0;
523                 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
524                 if(len < 0) {
525                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
526                 } else if(len < origpkt->len - offset) {
527                         outpkt.len = len + offset;
528                         origpkt = &outpkt;
529                         type |= PKT_COMPRESSED;
530                 }
531         }
532
533         /* If we have a direct metaconnection to n, and we can't use UDP, then
534            don't bother with SPTPS and just use a "plaintext" PACKET message.
535            We don't really care about end-to-end security since we're not
536            sending the message through any intermediate nodes. */
537         if(n->connection && origpkt->len > n->minmtu)
538                 send_tcppacket(n->connection, origpkt);
539         else
540                 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
541         return;
542 }
543
544 static void adapt_socket(const sockaddr_t *sa, int *sock) {
545         /* Make sure we have a suitable socket for the chosen address */
546         if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
547                 for(int i = 0; i < listen_sockets; i++) {
548                         if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
549                                 *sock = i;
550                                 break;
551                         }
552                 }
553         }
554 }
555
556 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
557         /* Latest guess */
558         *sa = &n->address;
559         *sock = n->sock;
560
561         /* If the UDP address is confirmed, use it. */
562         if(n->status.udp_confirmed)
563                 return;
564
565         /* Send every third packet to n->address; that could be set
566            to the node's reflexive UDP address discovered during key
567            exchange. */
568
569         static int x = 0;
570         if(++x >= 3) {
571                 x = 0;
572                 return;
573         }
574
575         /* Otherwise, address are found in edges to this node.
576            So we pick a random edge and a random socket. */
577
578         int i = 0;
579         int j = rand() % n->edge_tree->count;
580         edge_t *candidate = NULL;
581
582         for splay_each(edge_t, e, n->edge_tree) {
583                 if(i++ == j) {
584                         candidate = e->reverse;
585                         break;
586                 }
587         }
588
589         if(candidate) {
590                 *sa = &candidate->address;
591                 *sock = rand() % listen_sockets;
592         }
593
594         adapt_socket(*sa, sock);
595 }
596
597 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
598         *sa = NULL;
599
600         /* Pick one of the edges from this node at random, then use its local address. */
601
602         int i = 0;
603         int j = rand() % n->edge_tree->count;
604         edge_t *candidate = NULL;
605
606         for splay_each(edge_t, e, n->edge_tree) {
607                 if(i++ == j) {
608                         candidate = e;
609                         break;
610                 }
611         }
612
613         if (candidate && candidate->local_address.sa.sa_family) {
614                 *sa = &candidate->local_address;
615                 *sock = rand() % listen_sockets;
616                 adapt_socket(*sa, sock);
617         }
618 }
619
620 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
621         vpn_packet_t pkt1, pkt2;
622         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
623         vpn_packet_t *inpkt = origpkt;
624         int nextpkt = 0;
625         vpn_packet_t *outpkt;
626         int origlen = origpkt->len;
627         size_t outlen;
628 #if defined(SOL_IP) && defined(IP_TOS)
629         static int priority = 0;
630         int origpriority = origpkt->priority;
631 #endif
632
633         pkt1.offset = DEFAULT_PACKET_OFFSET;
634         pkt2.offset = DEFAULT_PACKET_OFFSET;
635
636         if(!n->status.reachable) {
637                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
638                 return;
639         }
640
641         if(n->status.sptps)
642                 return send_sptps_packet(n, origpkt);
643
644 #ifdef DISABLE_LEGACY
645         return;
646 #else
647         /* Make sure we have a valid key */
648
649         if(!n->status.validkey) {
650                 logger(DEBUG_TRAFFIC, LOG_INFO,
651                                    "No valid key known yet for %s (%s), forwarding via TCP",
652                                    n->name, n->hostname);
653                 send_tcppacket(n->nexthop->connection, origpkt);
654                 return;
655         }
656
657         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
658                 logger(DEBUG_TRAFFIC, LOG_INFO,
659                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
660                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
661
662                 if(n != n->nexthop)
663                         send_packet(n->nexthop, origpkt);
664                 else
665                         send_tcppacket(n->nexthop->connection, origpkt);
666
667                 return;
668         }
669
670         /* Compress the packet */
671
672         if(n->outcompression) {
673                 outpkt = pkt[nextpkt++];
674
675                 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
676                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
677                                    n->name, n->hostname);
678                         return;
679                 }
680
681                 inpkt = outpkt;
682         }
683
684         /* Add sequence number */
685
686         seqno_t seqno = htonl(++(n->sent_seqno));
687         memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
688         inpkt->len += sizeof seqno;
689
690         /* Encrypt the packet */
691
692         if(cipher_active(n->outcipher)) {
693                 outpkt = pkt[nextpkt++];
694                 outlen = MAXSIZE;
695
696                 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
697                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
698                         goto end;
699                 }
700
701                 outpkt->len = outlen;
702                 inpkt = outpkt;
703         }
704
705         /* Add the message authentication code */
706
707         if(digest_active(n->outdigest)) {
708                 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
709                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
710                         goto end;
711                 }
712
713                 inpkt->len += digest_length(n->outdigest);
714         }
715
716         /* Send the packet */
717
718         const sockaddr_t *sa = NULL;
719         int sock;
720
721         if(n->status.send_locally)
722                 choose_local_address(n, &sa, &sock);
723         if(!sa)
724                 choose_udp_address(n, &sa, &sock);
725
726 #if defined(SOL_IP) && defined(IP_TOS)
727         if(priorityinheritance && origpriority != priority
728            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
729                 priority = origpriority;
730                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
731                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
732                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
733         }
734 #endif
735
736         if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
737                 if(sockmsgsize(sockerrno)) {
738                         if(n->maxmtu >= origlen)
739                                 n->maxmtu = origlen - 1;
740                         if(n->mtu >= origlen)
741                                 n->mtu = origlen - 1;
742                 } else
743                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
744         }
745
746 end:
747         origpkt->len = origlen;
748 #endif
749 }
750
751 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
752         node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
753         bool direct = from == myself && to == relay;
754         bool relay_supported = (relay->options >> 24) >= 4;
755         bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
756
757         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
758            TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
759                  This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
760
761         if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
762                 char buf[len * 4 / 3 + 5];
763                 b64encode(data, buf, len);
764                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
765                    to ensure we get to learn the reflexive UDP address. */
766                 if(from == myself && !to->status.validkey) {
767                         to->incompression = myself->incompression;
768                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
769                 } else {
770                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
771                 }
772         }
773
774         size_t overhead = 0;
775         if(relay_supported) overhead += sizeof to->id + sizeof from->id;
776         char buf[len + overhead]; char* buf_ptr = buf;
777         if(relay_supported) {
778                 if(direct) {
779                         /* Inform the recipient that this packet was sent directly. */
780                         node_id_t nullid = {};
781                         memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
782                 } else {
783                         memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
784                 }
785                 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
786
787         }
788         /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
789         memcpy(buf_ptr, data, len); buf_ptr += len;
790
791         const sockaddr_t *sa = NULL;
792         int sock;
793         if(relay->status.send_locally)
794                 choose_local_address(relay, &sa, &sock);
795         if(!sa)
796                 choose_udp_address(relay, &sa, &sock);
797         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
798         if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
799                 if(sockmsgsize(sockerrno)) {
800                         // Compensate for SPTPS overhead
801                         len -= SPTPS_DATAGRAM_OVERHEAD;
802                         if(relay->maxmtu >= len)
803                                 relay->maxmtu = len - 1;
804                         if(relay->mtu >= len)
805                                 relay->mtu = len - 1;
806                 } else {
807                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
808                         return false;
809                 }
810         }
811
812         return true;
813 }
814
815 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
816         return send_sptps_data_priv(handle, myself, type, data, len);
817 }
818
819 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
820         node_t *from = handle;
821
822         if(type == SPTPS_HANDSHAKE) {
823                 if(!from->status.validkey) {
824                         from->status.validkey = true;
825                         from->status.waitingforkey = false;
826                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
827                 }
828                 return true;
829         }
830
831         if(len > MTU) {
832                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
833                 return false;
834         }
835
836         vpn_packet_t inpkt;
837         inpkt.offset = DEFAULT_PACKET_OFFSET;
838
839         if(type == PKT_PROBE) {
840                 inpkt.len = len;
841                 memcpy(DATA(&inpkt), data, len);
842                 udp_probe_h(from, &inpkt, len);
843                 return true;
844         }
845
846         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
847                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
848                 return false;
849         }
850
851         /* Check if we have the headers we need */
852         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
853                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
854                 return false;
855         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
856                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
857         }
858
859         int offset = (type & PKT_MAC) ? 0 : 14;
860         if(type & PKT_COMPRESSED) {
861                 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
862                 if(ulen < 0) {
863                         return false;
864                 } else {
865                         inpkt.len = ulen + offset;
866                 }
867                 if(inpkt.len > MAXSIZE)
868                         abort();
869         } else {
870                 memcpy(DATA(&inpkt) + offset, data, len);
871                 inpkt.len = len + offset;
872         }
873
874         /* Generate the Ethernet packet type if necessary */
875         if(offset) {
876                 switch(DATA(&inpkt)[14] >> 4) {
877                         case 4:
878                                 DATA(&inpkt)[12] = 0x08;
879                                 DATA(&inpkt)[13] = 0x00;
880                                 break;
881                         case 6:
882                                 DATA(&inpkt)[12] = 0x86;
883                                 DATA(&inpkt)[13] = 0xDD;
884                                 break;
885                         default:
886                                 logger(DEBUG_TRAFFIC, LOG_ERR,
887                                                    "Unknown IP version %d while reading packet from %s (%s)",
888                                                    DATA(&inpkt)[14] >> 4, from->name, from->hostname);
889                                 return false;
890                 }
891         }
892
893         receive_packet(from, &inpkt);
894         return true;
895 }
896
897 // This function tries to get SPTPS keys, if they aren't already known.
898 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
899 static void try_sptps(node_t *n) {
900         if(n->status.validkey)
901                 return;
902
903         logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
904
905         if(!n->status.waitingforkey)
906                 send_req_key(n);
907         else if(n->last_req_key + 10 < now.tv_sec) {
908                 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
909                 sptps_stop(&n->sptps);
910                 n->status.waitingforkey = false;
911                 send_req_key(n);
912         }
913
914         return;
915 }
916
917 // This function tries to establish a UDP tunnel to a node so that packets can be sent.
918 // If a tunnel is already established, it makes sure it stays up.
919 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
920 static void try_udp(node_t* n) {
921         if(!udp_discovery)
922                 return;
923
924         struct timeval now;
925         gettimeofday(&now, NULL);
926         struct timeval ping_tx_elapsed;
927         timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
928
929         if(ping_tx_elapsed.tv_sec >= udp_discovery_interval) {
930                 send_udp_probe_packet(n, MAX(n->minmtu, 16));
931                 n->udp_ping_sent = now;
932
933                 if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
934                         n->status.send_locally = true;
935                         send_udp_probe_packet(n, 16);
936                         n->status.send_locally = false;
937                 }
938         }
939 }
940
941 // This function tries to establish a tunnel to a node (or its relay) so that packets can be sent (e.g. get SPTPS keys).
942 // If a tunnel is already established, it tries to improve it (e.g. by trying to establish a UDP tunnel instead of TCP).
943 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if TCP and/or UDP is usable.
944 // By calling this function repeatedly, the tunnel is gradually improved until we hit the wall imposed by the underlying network environment.
945 // It is recommended to call this function every time a packet is sent (or intended to be sent) to a node,
946 // so that the tunnel keeps improving as packets flow, and then gracefully downgrades itself as it goes idle.
947 static void try_tx(node_t *n) {
948         /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
949            messages anyway, so there's no need for SPTPS at all. Otherwise, get the keys. */
950         if(n->status.sptps && !(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))) {
951                 try_sptps(n);
952                 if (!n->status.validkey)
953                         return;
954         }
955
956         node_t *via = (n->via == myself) ? n->nexthop : n->via;
957         
958         if((myself->options | via->options) & OPTION_TCPONLY)
959                 return;
960
961         if(!n->status.sptps && !via->status.validkey && via->last_req_key + 10 <= now.tv_sec) {
962                 send_req_key(via);
963                 via->last_req_key = now.tv_sec;
964         } else if(via == n || !n->status.sptps || (via->options >> 24) >= 4)
965                 try_udp(via);
966
967         /* If we don't know how to reach "via" yet, then try to reach it through a relay. */
968         if(n->status.sptps && !via->status.udp_confirmed && via->nexthop != via && (via->nexthop->options >> 24) >= 4)
969                 try_tx(via->nexthop);
970 }
971
972 /*
973   send a packet to the given vpn ip.
974 */
975 void send_packet(node_t *n, vpn_packet_t *packet) {
976         node_t *via;
977
978         if(n == myself) {
979                 if(overwrite_mac)
980                          memcpy(DATA(packet), mymac.x, ETH_ALEN);
981                 n->out_packets++;
982                 n->out_bytes += packet->len;
983                 devops.write(packet);
984                 return;
985         }
986
987         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
988                            packet->len, n->name, n->hostname);
989
990         if(!n->status.reachable) {
991                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
992                                    n->name, n->hostname);
993                 return;
994         }
995
996         n->out_packets++;
997         n->out_bytes += packet->len;
998
999         if(n->status.sptps) {
1000                 send_sptps_packet(n, packet);
1001                 goto end;
1002         }
1003
1004         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
1005
1006         if(via != n)
1007                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
1008                            n->name, via->name, n->via->hostname);
1009
1010         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
1011                 if(!send_tcppacket(via->connection, packet))
1012                         terminate_connection(via->connection, true);
1013         } else
1014                 send_udppacket(via, packet);
1015
1016 end:
1017         /* Try to improve the tunnel.
1018            Note that we do this *after* we send the packet because sending actual packets take priority
1019            with regard to the send buffer space and latency. */
1020         try_tx(n);
1021 }
1022
1023 /* Broadcast a packet using the minimum spanning tree */
1024
1025 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1026         // Always give ourself a copy of the packet.
1027         if(from != myself)
1028                 send_packet(myself, packet);
1029
1030         // In TunnelServer mode, do not forward broadcast packets.
1031         // The MST might not be valid and create loops.
1032         if(tunnelserver || broadcast_mode == BMODE_NONE)
1033                 return;
1034
1035         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1036                            packet->len, from->name, from->hostname);
1037
1038         switch(broadcast_mode) {
1039                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1040                 // This guarantees all nodes receive the broadcast packet, and
1041                 // usually distributes the sending of broadcast packets over all nodes.
1042                 case BMODE_MST:
1043                         for list_each(connection_t, c, connection_list)
1044                                 if(c->edge && c->status.mst && c != from->nexthop->connection)
1045                                         send_packet(c->node, packet);
1046                         break;
1047
1048                 // In direct mode, we send copies to each node we know of.
1049                 // However, this only reaches nodes that can be reached in a single hop.
1050                 // We don't have enough information to forward broadcast packets in this case.
1051                 case BMODE_DIRECT:
1052                         if(from != myself)
1053                                 break;
1054
1055                         for splay_each(node_t, n, node_tree)
1056                                 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1057                                         send_packet(n, packet);
1058                         break;
1059
1060                 default:
1061                         break;
1062         }
1063 }
1064
1065 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1066         node_t *n = NULL;
1067         bool hard = false;
1068         static time_t last_hard_try = 0;
1069
1070         for splay_each(edge_t, e, edge_weight_tree) {
1071                 if(!e->to->status.reachable || e->to == myself)
1072                         continue;
1073
1074                 if(sockaddrcmp_noport(from, &e->address)) {
1075                         if(last_hard_try == now.tv_sec)
1076                                 continue;
1077                         hard = true;
1078                 }
1079
1080                 if(!try_mac(e->to, pkt))
1081                         continue;
1082
1083                 n = e->to;
1084                 break;
1085         }
1086
1087         if(hard)
1088                 last_hard_try = now.tv_sec;
1089
1090         last_hard_try = now.tv_sec;
1091         return n;
1092 }
1093
1094 void handle_incoming_vpn_data(void *data, int flags) {
1095         listen_socket_t *ls = data;
1096         vpn_packet_t pkt;
1097         char *hostname;
1098         node_id_t nullid = {};
1099         sockaddr_t addr = {};
1100         socklen_t addrlen = sizeof addr;
1101         node_t *from, *to;
1102         bool direct = false;
1103
1104         pkt.offset = 0;
1105         int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1106
1107         if(len <= 0 || len > MAXSIZE) {
1108                 if(!sockwouldblock(sockerrno))
1109                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1110                 return;
1111         }
1112
1113         pkt.len = len;
1114
1115         sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1116
1117         // Try to figure out who sent this packet.
1118
1119         node_t *n = lookup_node_udp(&addr);
1120
1121         if(!n) {
1122                 // It might be from a 1.1 node, which might have a source ID in the packet.
1123                 pkt.offset = 2 * sizeof(node_id_t);
1124                 from = lookup_node_id(SRCID(&pkt));
1125                 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1126                         if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1127                                 n = from;
1128                         else
1129                                 goto skip_harder;
1130                 }
1131         }
1132
1133         if(!n) {
1134                 pkt.offset = 0;
1135                 n = try_harder(&addr, &pkt);
1136         }
1137
1138 skip_harder:
1139         if(!n) {
1140                 if(debug_level >= DEBUG_PROTOCOL) {
1141                         hostname = sockaddr2hostname(&addr);
1142                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1143                         free(hostname);
1144                 }
1145                 return;
1146         }
1147
1148         if(n->status.sptps) {
1149                 pkt.offset = 2 * sizeof(node_id_t);
1150
1151                 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1152                         direct = true;
1153                         from = n;
1154                         to = myself;
1155                 } else {
1156                         from = lookup_node_id(SRCID(&pkt));
1157                         to = lookup_node_id(DSTID(&pkt));
1158                 }
1159                 if(!from || !to) {
1160                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1161                         return;
1162                 }
1163
1164                 if(to != myself) {
1165                         send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1166                         return;
1167                 }
1168         } else {
1169                 direct = true;
1170                 from = n;
1171         }
1172
1173         pkt.offset = 0;
1174         if(!receive_udppacket(from, &pkt))
1175                 return;
1176
1177         n->sock = ls - listen_socket;
1178         if(direct && sockaddrcmp(&addr, &n->address))
1179                 update_node_udp(n, &addr);
1180 }
1181
1182 void handle_device_data(void *data, int flags) {
1183         vpn_packet_t packet;
1184         packet.offset = DEFAULT_PACKET_OFFSET;
1185         packet.priority = 0;
1186
1187         if(devops.read(&packet)) {
1188                 myself->in_packets++;
1189                 myself->in_bytes += packet.len;
1190                 route(myself, &packet);
1191         }
1192 }