828e193be546aae1ad33bdb040b8d631ab38a49e
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "route.h"
46 #include "utils.h"
47 #include "xalloc.h"
48
49 #ifndef MAX
50 #define MAX(a, b) ((a) > (b) ? (a) : (b))
51 #endif
52
53 int keylifetime = 0;
54 #ifdef HAVE_LZO
55 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
56 #endif
57
58 static void send_udppacket(node_t *, vpn_packet_t *);
59
60 unsigned replaywin = 16;
61 bool localdiscovery = true;
62 bool udp_discovery = true;
63 int udp_discovery_interval = 9;
64 int udp_discovery_timeout = 30;
65
66 #define MAX_SEQNO 1073741824
67
68 static void try_fix_mtu(node_t *n) {
69         if(n->mtuprobes < 0)
70                 return;
71
72         if(n->mtuprobes == 90 || n->minmtu >= n->maxmtu) {
73                 if(n->minmtu > n->maxmtu)
74                         n->minmtu = n->maxmtu;
75                 else
76                         n->maxmtu = n->minmtu;
77                 n->mtu = n->minmtu;
78                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
79                 n->mtuprobes = -1;
80         }
81 }
82
83 static void udp_probe_timeout_handler(void *data) {
84         node_t *n = data;
85         if(!n->status.udp_confirmed)
86                 return;
87
88         logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname);
89         n->status.udp_confirmed = false;
90         n->mtuprobes = 0;
91         n->minmtu = 0;
92         n->maxmtu = MTU;
93 }
94
95 static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
96         if(!DATA(packet)[0]) {
97                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname);
98
99                 /* It's a probe request, send back a reply */
100
101                 /* Type 2 probe replies were introduced in protocol 17.3 */
102                 if ((n->options >> 24) >= 3) {
103                         uint8_t *data = DATA(packet);
104                         *data++ = 2;
105                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
106                         struct timeval now;
107                         gettimeofday(&now, NULL);
108                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
109                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
110                         packet->len -= 10;
111                 } else {
112                         /* Legacy protocol: n won't understand type 2 probe replies. */
113                         DATA(packet)[0] = 1;
114                 }
115
116                 /* Temporarily set udp_confirmed, so that the reply is sent
117                    back exactly the way it came in. */
118
119                 bool udp_confirmed = n->status.udp_confirmed;
120                 n->status.udp_confirmed = true;
121                 send_udppacket(n, packet);
122                 n->status.udp_confirmed = udp_confirmed;
123         } else {
124                 length_t probelen = len;
125                 if (DATA(packet)[0] == 2) {
126                         if (len < 3)
127                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) UDP probe reply from %s (%s)", n->name, n->hostname);
128                         else {
129                                 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
130                         }
131                 }
132                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d UDP probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
133
134                 /* It's a valid reply: now we know bidirectional communication
135                    is possible using the address and socket that the reply
136                    packet used. */
137                 n->status.udp_confirmed = true;
138
139                 if(udp_discovery) {
140                         timeout_del(&n->udp_ping_timeout);
141                         timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
142                 }
143
144                 if(probelen >= n->maxmtu + 8) {
145                         logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
146                         n->maxmtu = MTU;
147                         n->mtuprobes = 30;
148                         return;
149                 }
150
151                 /* If applicable, raise the minimum supported MTU */
152
153                 if(probelen > n->maxmtu)
154                         probelen = n->maxmtu;
155                 if(n->minmtu < probelen) {
156                         n->minmtu = probelen;
157                         try_fix_mtu(n);
158                 }
159
160                 /* Calculate RTT.
161                    The RTT is the time between the MTU probe burst was sent and the first
162                    reply is received.
163                  */
164
165                 struct timeval now, diff;
166                 gettimeofday(&now, NULL);
167                 timersub(&now, &n->probe_time, &diff);
168
169                 struct timeval probe_timestamp = now;
170                 if (DATA(packet)[0] == 2 && packet->len >= 11) {
171                         uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
172                         uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
173                         probe_timestamp.tv_sec = ntohl(sec);
174                         probe_timestamp.tv_usec = ntohl(usec);
175                 }
176                 
177                 n->probe_counter++;
178
179                 if(n->probe_counter == 1) {
180                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
181                         n->probe_time = probe_timestamp;
182                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->packetloss * 1e2);
183                 }
184         }
185 }
186
187 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
188         if(level == 0) {
189                 memcpy(dest, source, len);
190                 return len;
191         } else if(level == 10) {
192 #ifdef HAVE_LZO
193                 lzo_uint lzolen = MAXSIZE;
194                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
195                 return lzolen;
196 #else
197                 return -1;
198 #endif
199         } else if(level < 10) {
200 #ifdef HAVE_ZLIB
201                 unsigned long destlen = MAXSIZE;
202                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
203                         return destlen;
204                 else
205 #endif
206                         return -1;
207         } else {
208 #ifdef HAVE_LZO
209                 lzo_uint lzolen = MAXSIZE;
210                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
211                 return lzolen;
212 #else
213                 return -1;
214 #endif
215         }
216
217         return -1;
218 }
219
220 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
221         if(level == 0) {
222                 memcpy(dest, source, len);
223                 return len;
224         } else if(level > 9) {
225 #ifdef HAVE_LZO
226                 lzo_uint lzolen = MAXSIZE;
227                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
228                         return lzolen;
229                 else
230 #endif
231                         return -1;
232         }
233 #ifdef HAVE_ZLIB
234         else {
235                 unsigned long destlen = MAXSIZE;
236                 if(uncompress(dest, &destlen, source, len) == Z_OK)
237                         return destlen;
238                 else
239                         return -1;
240         }
241 #endif
242
243         return -1;
244 }
245
246 /* VPN packet I/O */
247
248 static void receive_packet(node_t *n, vpn_packet_t *packet) {
249         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
250                            packet->len, n->name, n->hostname);
251
252         n->in_packets++;
253         n->in_bytes += packet->len;
254
255         route(n, packet);
256 }
257
258 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
259         if(n->status.sptps)
260                 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
261
262 #ifdef DISABLE_LEGACY
263         return false;
264 #else
265         if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
266                 return false;
267
268         return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
269 #endif
270 }
271
272 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
273         vpn_packet_t pkt1, pkt2;
274         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
275         int nextpkt = 0;
276         size_t outlen;
277         pkt1.offset = DEFAULT_PACKET_OFFSET;
278         pkt2.offset = DEFAULT_PACKET_OFFSET;
279
280         if(n->status.sptps) {
281                 if(!n->sptps.state) {
282                         if(!n->status.waitingforkey) {
283                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
284                                 send_req_key(n);
285                         } else {
286                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
287                         }
288                         return false;
289                 }
290                 inpkt->offset += 2 * sizeof(node_id_t);
291                 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
292                         logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
293                         return false;
294                 }
295                 return true;
296         }
297
298 #ifdef DISABLE_LEGACY
299         return false;
300 #else
301         if(!n->status.validkey) {
302                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
303                 return false;
304         }
305
306         /* Check packet length */
307
308         if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
309                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
310                                         n->name, n->hostname);
311                 return false;
312         }
313
314         /* It's a legacy UDP packet, the data starts after the seqno */
315
316         inpkt->offset += sizeof(seqno_t);
317
318         /* Check the message authentication code */
319
320         if(digest_active(n->indigest)) {
321                 inpkt->len -= digest_length(n->indigest);
322                 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
323                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
324                         return false;
325                 }
326         }
327         /* Decrypt the packet */
328
329         if(cipher_active(n->incipher)) {
330                 vpn_packet_t *outpkt = pkt[nextpkt++];
331                 outlen = MAXSIZE;
332
333                 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
334                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
335                         return false;
336                 }
337
338                 outpkt->len = outlen;
339                 inpkt = outpkt;
340         }
341
342         /* Check the sequence number */
343
344         seqno_t seqno;
345         memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
346         seqno = ntohl(seqno);
347         inpkt->len -= sizeof seqno;
348
349         if(replaywin) {
350                 if(seqno != n->received_seqno + 1) {
351                         if(seqno >= n->received_seqno + replaywin * 8) {
352                                 if(n->farfuture++ < replaywin >> 2) {
353                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
354                                                 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
355                                         return false;
356                                 }
357                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
358                                                 seqno - n->received_seqno - 1, n->name, n->hostname);
359                                 memset(n->late, 0, replaywin);
360                         } else if (seqno <= n->received_seqno) {
361                                 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
362                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
363                                                 n->name, n->hostname, seqno, n->received_seqno);
364                                         return false;
365                                 }
366                         } else {
367                                 for(int i = n->received_seqno + 1; i < seqno; i++)
368                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
369                         }
370                 }
371
372                 n->farfuture = 0;
373                 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
374         }
375
376         if(seqno > n->received_seqno)
377                 n->received_seqno = seqno;
378
379         n->received++;
380
381         if(n->received_seqno > MAX_SEQNO)
382                 regenerate_key();
383
384         /* Decompress the packet */
385
386         length_t origlen = inpkt->len;
387
388         if(n->incompression) {
389                 vpn_packet_t *outpkt = pkt[nextpkt++];
390
391                 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
392                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
393                                                  n->name, n->hostname);
394                         return false;
395                 }
396
397                 inpkt = outpkt;
398
399                 origlen -= MTU/64 + 20;
400         }
401
402         inpkt->priority = 0;
403
404         if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
405                 udp_probe_h(n, inpkt, origlen);
406         else
407                 receive_packet(n, inpkt);
408         return true;
409 #endif
410 }
411
412 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
413         vpn_packet_t outpkt;
414         outpkt.offset = DEFAULT_PACKET_OFFSET;
415
416         if(len > sizeof outpkt.data - outpkt.offset)
417                 return;
418
419         outpkt.len = len;
420         if(c->options & OPTION_TCPONLY)
421                 outpkt.priority = 0;
422         else
423                 outpkt.priority = -1;
424         memcpy(DATA(&outpkt), buffer, len);
425
426         receive_packet(c->node, &outpkt);
427 }
428
429 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
430         if(!n->status.validkey && !n->connection)
431                 return;
432
433         uint8_t type = 0;
434         int offset = 0;
435
436         if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
437                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
438                 return;
439         }
440
441         if(routing_mode == RMODE_ROUTER)
442                 offset = 14;
443         else
444                 type = PKT_MAC;
445
446         if(origpkt->len < offset)
447                 return;
448
449         vpn_packet_t outpkt;
450
451         if(n->outcompression) {
452                 outpkt.offset = 0;
453                 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
454                 if(len < 0) {
455                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
456                 } else if(len < origpkt->len - offset) {
457                         outpkt.len = len + offset;
458                         origpkt = &outpkt;
459                         type |= PKT_COMPRESSED;
460                 }
461         }
462
463         /* If we have a direct metaconnection to n, and we can't use UDP, then
464            don't bother with SPTPS and just use a "plaintext" PACKET message.
465            We don't really care about end-to-end security since we're not
466            sending the message through any intermediate nodes. */
467         if(n->connection && origpkt->len > n->minmtu)
468                 send_tcppacket(n->connection, origpkt);
469         else
470                 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
471         return;
472 }
473
474 static void adapt_socket(const sockaddr_t *sa, int *sock) {
475         /* Make sure we have a suitable socket for the chosen address */
476         if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
477                 for(int i = 0; i < listen_sockets; i++) {
478                         if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
479                                 *sock = i;
480                                 break;
481                         }
482                 }
483         }
484 }
485
486 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
487         /* Latest guess */
488         *sa = &n->address;
489         *sock = n->sock;
490
491         /* If the UDP address is confirmed, use it. */
492         if(n->status.udp_confirmed)
493                 return;
494
495         /* Send every third packet to n->address; that could be set
496            to the node's reflexive UDP address discovered during key
497            exchange. */
498
499         static int x = 0;
500         if(++x >= 3) {
501                 x = 0;
502                 return;
503         }
504
505         /* Otherwise, address are found in edges to this node.
506            So we pick a random edge and a random socket. */
507
508         int i = 0;
509         int j = rand() % n->edge_tree->count;
510         edge_t *candidate = NULL;
511
512         for splay_each(edge_t, e, n->edge_tree) {
513                 if(i++ == j) {
514                         candidate = e->reverse;
515                         break;
516                 }
517         }
518
519         if(candidate) {
520                 *sa = &candidate->address;
521                 *sock = rand() % listen_sockets;
522         }
523
524         adapt_socket(*sa, sock);
525 }
526
527 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
528         *sa = NULL;
529
530         /* Pick one of the edges from this node at random, then use its local address. */
531
532         int i = 0;
533         int j = rand() % n->edge_tree->count;
534         edge_t *candidate = NULL;
535
536         for splay_each(edge_t, e, n->edge_tree) {
537                 if(i++ == j) {
538                         candidate = e;
539                         break;
540                 }
541         }
542
543         if (candidate && candidate->local_address.sa.sa_family) {
544                 *sa = &candidate->local_address;
545                 *sock = rand() % listen_sockets;
546                 adapt_socket(*sa, sock);
547         }
548 }
549
550 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
551         vpn_packet_t pkt1, pkt2;
552         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
553         vpn_packet_t *inpkt = origpkt;
554         int nextpkt = 0;
555         vpn_packet_t *outpkt;
556         int origlen = origpkt->len;
557         size_t outlen;
558 #if defined(SOL_IP) && defined(IP_TOS)
559         static int priority = 0;
560         int origpriority = origpkt->priority;
561 #endif
562
563         pkt1.offset = DEFAULT_PACKET_OFFSET;
564         pkt2.offset = DEFAULT_PACKET_OFFSET;
565
566         if(!n->status.reachable) {
567                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
568                 return;
569         }
570
571         if(n->status.sptps)
572                 return send_sptps_packet(n, origpkt);
573
574 #ifdef DISABLE_LEGACY
575         return;
576 #else
577         /* Make sure we have a valid key */
578
579         if(!n->status.validkey) {
580                 logger(DEBUG_TRAFFIC, LOG_INFO,
581                                    "No valid key known yet for %s (%s), forwarding via TCP",
582                                    n->name, n->hostname);
583                 send_tcppacket(n->nexthop->connection, origpkt);
584                 return;
585         }
586
587         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
588                 logger(DEBUG_TRAFFIC, LOG_INFO,
589                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
590                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
591
592                 if(n != n->nexthop)
593                         send_packet(n->nexthop, origpkt);
594                 else
595                         send_tcppacket(n->nexthop->connection, origpkt);
596
597                 return;
598         }
599
600         /* Compress the packet */
601
602         if(n->outcompression) {
603                 outpkt = pkt[nextpkt++];
604
605                 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
606                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
607                                    n->name, n->hostname);
608                         return;
609                 }
610
611                 inpkt = outpkt;
612         }
613
614         /* Add sequence number */
615
616         seqno_t seqno = htonl(++(n->sent_seqno));
617         memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
618         inpkt->len += sizeof seqno;
619
620         /* Encrypt the packet */
621
622         if(cipher_active(n->outcipher)) {
623                 outpkt = pkt[nextpkt++];
624                 outlen = MAXSIZE;
625
626                 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
627                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
628                         goto end;
629                 }
630
631                 outpkt->len = outlen;
632                 inpkt = outpkt;
633         }
634
635         /* Add the message authentication code */
636
637         if(digest_active(n->outdigest)) {
638                 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
639                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
640                         goto end;
641                 }
642
643                 inpkt->len += digest_length(n->outdigest);
644         }
645
646         /* Send the packet */
647
648         const sockaddr_t *sa = NULL;
649         int sock;
650
651         if(n->status.send_locally)
652                 choose_local_address(n, &sa, &sock);
653         if(!sa)
654                 choose_udp_address(n, &sa, &sock);
655
656 #if defined(SOL_IP) && defined(IP_TOS)
657         if(priorityinheritance && origpriority != priority
658            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
659                 priority = origpriority;
660                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
661                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
662                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
663         }
664 #endif
665
666         if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
667                 if(sockmsgsize(sockerrno)) {
668                         if(n->maxmtu >= origlen)
669                                 n->maxmtu = origlen - 1;
670                         if(n->mtu >= origlen)
671                                 n->mtu = origlen - 1;
672                         try_fix_mtu(n);
673                 } else
674                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
675         }
676
677 end:
678         origpkt->len = origlen;
679 #endif
680 }
681
682 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
683         node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
684         bool direct = from == myself && to == relay;
685         bool relay_supported = (relay->options >> 24) >= 4;
686         bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
687
688         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
689            TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
690                  This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
691
692         if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
693                 char buf[len * 4 / 3 + 5];
694                 b64encode(data, buf, len);
695                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
696                    to ensure we get to learn the reflexive UDP address. */
697                 if(from == myself && !to->status.validkey) {
698                         to->incompression = myself->incompression;
699                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
700                 } else {
701                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
702                 }
703         }
704
705         size_t overhead = 0;
706         if(relay_supported) overhead += sizeof to->id + sizeof from->id;
707         char buf[len + overhead]; char* buf_ptr = buf;
708         if(relay_supported) {
709                 if(direct) {
710                         /* Inform the recipient that this packet was sent directly. */
711                         node_id_t nullid = {};
712                         memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
713                 } else {
714                         memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
715                 }
716                 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
717
718         }
719         /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
720         memcpy(buf_ptr, data, len); buf_ptr += len;
721
722         const sockaddr_t *sa = NULL;
723         int sock;
724         if(relay->status.send_locally)
725                 choose_local_address(relay, &sa, &sock);
726         if(!sa)
727                 choose_udp_address(relay, &sa, &sock);
728         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
729         if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
730                 if(sockmsgsize(sockerrno)) {
731                         // Compensate for SPTPS overhead
732                         len -= SPTPS_DATAGRAM_OVERHEAD;
733                         if(relay->maxmtu >= len)
734                                 relay->maxmtu = len - 1;
735                         if(relay->mtu >= len)
736                                 relay->mtu = len - 1;
737                         try_fix_mtu(relay);
738                 } else {
739                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
740                         return false;
741                 }
742         }
743
744         return true;
745 }
746
747 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
748         return send_sptps_data_priv(handle, myself, type, data, len);
749 }
750
751 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
752         node_t *from = handle;
753
754         if(type == SPTPS_HANDSHAKE) {
755                 if(!from->status.validkey) {
756                         from->status.validkey = true;
757                         from->status.waitingforkey = false;
758                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
759                 }
760                 return true;
761         }
762
763         if(len > MTU) {
764                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
765                 return false;
766         }
767
768         vpn_packet_t inpkt;
769         inpkt.offset = DEFAULT_PACKET_OFFSET;
770
771         if(type == PKT_PROBE) {
772                 inpkt.len = len;
773                 memcpy(DATA(&inpkt), data, len);
774                 udp_probe_h(from, &inpkt, len);
775                 return true;
776         }
777
778         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
779                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
780                 return false;
781         }
782
783         /* Check if we have the headers we need */
784         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
785                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
786                 return false;
787         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
788                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
789         }
790
791         int offset = (type & PKT_MAC) ? 0 : 14;
792         if(type & PKT_COMPRESSED) {
793                 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
794                 if(ulen < 0) {
795                         return false;
796                 } else {
797                         inpkt.len = ulen + offset;
798                 }
799                 if(inpkt.len > MAXSIZE)
800                         abort();
801         } else {
802                 memcpy(DATA(&inpkt) + offset, data, len);
803                 inpkt.len = len + offset;
804         }
805
806         /* Generate the Ethernet packet type if necessary */
807         if(offset) {
808                 switch(DATA(&inpkt)[14] >> 4) {
809                         case 4:
810                                 DATA(&inpkt)[12] = 0x08;
811                                 DATA(&inpkt)[13] = 0x00;
812                                 break;
813                         case 6:
814                                 DATA(&inpkt)[12] = 0x86;
815                                 DATA(&inpkt)[13] = 0xDD;
816                                 break;
817                         default:
818                                 logger(DEBUG_TRAFFIC, LOG_ERR,
819                                                    "Unknown IP version %d while reading packet from %s (%s)",
820                                                    DATA(&inpkt)[14] >> 4, from->name, from->hostname);
821                                 return false;
822                 }
823         }
824
825         receive_packet(from, &inpkt);
826         return true;
827 }
828
829 // This function tries to get SPTPS keys, if they aren't already known.
830 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
831 static void try_sptps(node_t *n) {
832         if(n->status.validkey)
833                 return;
834
835         logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
836
837         if(!n->status.waitingforkey)
838                 send_req_key(n);
839         else if(n->last_req_key + 10 < now.tv_sec) {
840                 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
841                 sptps_stop(&n->sptps);
842                 n->status.waitingforkey = false;
843                 send_req_key(n);
844         }
845
846         return;
847 }
848
849 static void send_udp_probe_packet(node_t *n, int len) {
850         vpn_packet_t packet;
851         packet.offset = DEFAULT_PACKET_OFFSET;
852         memset(DATA(&packet), 0, 14);
853         randomize(DATA(&packet) + 14, len - 14);
854         packet.len = len;
855         packet.priority = 0;
856
857         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
858
859         send_udppacket(n, &packet);
860 }
861
862 // This function tries to establish a UDP tunnel to a node so that packets can be sent.
863 // If a tunnel is already established, it makes sure it stays up.
864 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
865 static void try_udp(node_t* n) {
866         if(!udp_discovery)
867                 return;
868
869         struct timeval now;
870         gettimeofday(&now, NULL);
871         struct timeval ping_tx_elapsed;
872         timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
873
874         if(ping_tx_elapsed.tv_sec >= udp_discovery_interval) {
875                 send_udp_probe_packet(n, MAX(n->minmtu, 16));
876                 n->udp_ping_sent = now;
877
878                 if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
879                         n->status.send_locally = true;
880                         send_udp_probe_packet(n, 16);
881                         n->status.send_locally = false;
882                 }
883         }
884 }
885
886 // This function tries to determines the MTU of a node.
887 // By calling this function repeatedly, n->minmtu will be progressively increased, and at some point, n->mtu will be fixed to n->minmtu.
888 // If the MTU is already fixed, this function checks if it can be increased.
889 static void try_mtu(node_t *n) {
890         if(!(n->options & OPTION_PMTU_DISCOVERY))
891                 return;
892
893         if(udp_discovery && !n->status.udp_confirmed) {
894                 n->mtuprobes = 0;
895                 n->minmtu = 0;
896                 n->maxmtu = MTU;
897                 return;
898         }
899
900         /* mtuprobes == 0..89: initial discovery, send bursts with 1 second interval, mtuprobes++
901            mtuprobes ==    90: fix MTU, and go to -1
902            mtuprobes ==    -1: send one >maxmtu probe every pingtimeout */
903
904         struct timeval now;
905         gettimeofday(&now, NULL);
906         struct timeval elapsed;
907         timersub(&now, &n->probe_sent_time, &elapsed);
908         if(n->mtuprobes >= 0) {
909                 if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333)
910                         return;
911         } else {
912                 if(elapsed.tv_sec < pingtimeout)
913                         return;
914         }
915
916         try_fix_mtu(n);
917
918         int timeout;
919         if(n->mtuprobes < 0) {
920                 /* After the initial discovery, we only send one >maxmtu probe
921                    to detect PMTU increases. */
922                 if(n->maxmtu + 8 < MTU)
923                         send_udp_probe_packet(n, n->maxmtu + 8);
924         } else {
925                 /* Probes are sent with random sizes between the
926                    lower and upper boundaries for the MTU thus far discovered. */
927                 int len = n->maxmtu;
928                 if(n->minmtu < n->maxmtu)
929                         len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
930                 send_udp_probe_packet(n, MAX(len, 64));
931
932                 if(n->mtuprobes >= 0)
933                         n->mtuprobes++;
934         }
935
936         n->probe_counter = 0;
937         n->probe_sent_time = now;
938         n->probe_time = now;
939
940         /* Calculate the packet loss of incoming traffic by comparing the rate of
941            packets received to the rate with which the sequence number has increased.
942            TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
943          */
944
945         if(n->received > n->prev_received)
946                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
947         else
948                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
949
950         n->prev_received_seqno = n->received_seqno;
951         n->prev_received = n->received;
952 }
953
954 // This function tries to establish a tunnel to a node (or its relay) so that packets can be sent (e.g. get SPTPS keys).
955 // If a tunnel is already established, it tries to improve it (e.g. by trying to establish a UDP tunnel instead of TCP).
956 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if TCP and/or UDP is usable.
957 // By calling this function repeatedly, the tunnel is gradually improved until we hit the wall imposed by the underlying network environment.
958 // It is recommended to call this function every time a packet is sent (or intended to be sent) to a node,
959 // so that the tunnel keeps improving as packets flow, and then gracefully downgrades itself as it goes idle.
960 static void try_tx(node_t *n) {
961         /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
962            messages anyway, so there's no need for SPTPS at all. Otherwise, get the keys. */
963         if(n->status.sptps && !(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))) {
964                 try_sptps(n);
965                 if (!n->status.validkey)
966                         return;
967         }
968
969         node_t *via = (n->via == myself) ? n->nexthop : n->via;
970         
971         if((myself->options | via->options) & OPTION_TCPONLY)
972                 return;
973
974         if(!n->status.sptps && !via->status.validkey && via->last_req_key + 10 <= now.tv_sec) {
975                 send_req_key(via);
976                 via->last_req_key = now.tv_sec;
977         } else if(via == n || !n->status.sptps || (via->options >> 24) >= 4) {
978                 try_udp(via);
979                 try_mtu(via);
980         }
981
982         /* If we don't know how to reach "via" yet, then try to reach it through a relay. */
983         if(n->status.sptps && !via->status.udp_confirmed && via->nexthop != via && (via->nexthop->options >> 24) >= 4)
984                 try_tx(via->nexthop);
985 }
986
987 /*
988   send a packet to the given vpn ip.
989 */
990 void send_packet(node_t *n, vpn_packet_t *packet) {
991         node_t *via;
992
993         if(n == myself) {
994                 if(overwrite_mac)
995                          memcpy(DATA(packet), mymac.x, ETH_ALEN);
996                 n->out_packets++;
997                 n->out_bytes += packet->len;
998                 devops.write(packet);
999                 return;
1000         }
1001
1002         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
1003                            packet->len, n->name, n->hostname);
1004
1005         if(!n->status.reachable) {
1006                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
1007                                    n->name, n->hostname);
1008                 return;
1009         }
1010
1011         n->out_packets++;
1012         n->out_bytes += packet->len;
1013
1014         if(n->status.sptps) {
1015                 send_sptps_packet(n, packet);
1016                 goto end;
1017         }
1018
1019         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
1020
1021         if(via != n)
1022                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
1023                            n->name, via->name, n->via->hostname);
1024
1025         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
1026                 if(!send_tcppacket(via->connection, packet))
1027                         terminate_connection(via->connection, true);
1028         } else
1029                 send_udppacket(via, packet);
1030
1031 end:
1032         /* Try to improve the tunnel.
1033            Note that we do this *after* we send the packet because sending actual packets take priority
1034            with regard to the send buffer space and latency. */
1035         try_tx(n);
1036 }
1037
1038 /* Broadcast a packet using the minimum spanning tree */
1039
1040 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1041         // Always give ourself a copy of the packet.
1042         if(from != myself)
1043                 send_packet(myself, packet);
1044
1045         // In TunnelServer mode, do not forward broadcast packets.
1046         // The MST might not be valid and create loops.
1047         if(tunnelserver || broadcast_mode == BMODE_NONE)
1048                 return;
1049
1050         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1051                            packet->len, from->name, from->hostname);
1052
1053         switch(broadcast_mode) {
1054                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1055                 // This guarantees all nodes receive the broadcast packet, and
1056                 // usually distributes the sending of broadcast packets over all nodes.
1057                 case BMODE_MST:
1058                         for list_each(connection_t, c, connection_list)
1059                                 if(c->edge && c->status.mst && c != from->nexthop->connection)
1060                                         send_packet(c->node, packet);
1061                         break;
1062
1063                 // In direct mode, we send copies to each node we know of.
1064                 // However, this only reaches nodes that can be reached in a single hop.
1065                 // We don't have enough information to forward broadcast packets in this case.
1066                 case BMODE_DIRECT:
1067                         if(from != myself)
1068                                 break;
1069
1070                         for splay_each(node_t, n, node_tree)
1071                                 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1072                                         send_packet(n, packet);
1073                         break;
1074
1075                 default:
1076                         break;
1077         }
1078 }
1079
1080 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1081         node_t *n = NULL;
1082         bool hard = false;
1083         static time_t last_hard_try = 0;
1084
1085         for splay_each(edge_t, e, edge_weight_tree) {
1086                 if(!e->to->status.reachable || e->to == myself)
1087                         continue;
1088
1089                 if(sockaddrcmp_noport(from, &e->address)) {
1090                         if(last_hard_try == now.tv_sec)
1091                                 continue;
1092                         hard = true;
1093                 }
1094
1095                 if(!try_mac(e->to, pkt))
1096                         continue;
1097
1098                 n = e->to;
1099                 break;
1100         }
1101
1102         if(hard)
1103                 last_hard_try = now.tv_sec;
1104
1105         last_hard_try = now.tv_sec;
1106         return n;
1107 }
1108
1109 void handle_incoming_vpn_data(void *data, int flags) {
1110         listen_socket_t *ls = data;
1111         vpn_packet_t pkt;
1112         char *hostname;
1113         node_id_t nullid = {};
1114         sockaddr_t addr = {};
1115         socklen_t addrlen = sizeof addr;
1116         node_t *from, *to;
1117         bool direct = false;
1118
1119         pkt.offset = 0;
1120         int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1121
1122         if(len <= 0 || len > MAXSIZE) {
1123                 if(!sockwouldblock(sockerrno))
1124                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1125                 return;
1126         }
1127
1128         pkt.len = len;
1129
1130         sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1131
1132         // Try to figure out who sent this packet.
1133
1134         node_t *n = lookup_node_udp(&addr);
1135
1136         if(!n) {
1137                 // It might be from a 1.1 node, which might have a source ID in the packet.
1138                 pkt.offset = 2 * sizeof(node_id_t);
1139                 from = lookup_node_id(SRCID(&pkt));
1140                 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1141                         if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1142                                 n = from;
1143                         else
1144                                 goto skip_harder;
1145                 }
1146         }
1147
1148         if(!n) {
1149                 pkt.offset = 0;
1150                 n = try_harder(&addr, &pkt);
1151         }
1152
1153 skip_harder:
1154         if(!n) {
1155                 if(debug_level >= DEBUG_PROTOCOL) {
1156                         hostname = sockaddr2hostname(&addr);
1157                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1158                         free(hostname);
1159                 }
1160                 return;
1161         }
1162
1163         if(n->status.sptps) {
1164                 pkt.offset = 2 * sizeof(node_id_t);
1165
1166                 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1167                         direct = true;
1168                         from = n;
1169                         to = myself;
1170                 } else {
1171                         from = lookup_node_id(SRCID(&pkt));
1172                         to = lookup_node_id(DSTID(&pkt));
1173                 }
1174                 if(!from || !to) {
1175                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1176                         return;
1177                 }
1178
1179                 if(to != myself) {
1180                         send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1181                         return;
1182                 }
1183         } else {
1184                 direct = true;
1185                 from = n;
1186         }
1187
1188         pkt.offset = 0;
1189         if(!receive_udppacket(from, &pkt))
1190                 return;
1191
1192         n->sock = ls - listen_socket;
1193         if(direct && sockaddrcmp(&addr, &n->address))
1194                 update_node_udp(n, &addr);
1195 }
1196
1197 void handle_device_data(void *data, int flags) {
1198         vpn_packet_t packet;
1199         packet.offset = DEFAULT_PACKET_OFFSET;
1200         packet.priority = 0;
1201
1202         if(devops.read(&packet)) {
1203                 myself->in_packets++;
1204                 myself->in_bytes += packet.len;
1205                 route(myself, &packet);
1206         }
1207 }