Fix compiler warnings.
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2013 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "route.h"
46 #include "utils.h"
47 #include "xalloc.h"
48
49 int keylifetime = 0;
50 #ifdef HAVE_LZO
51 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
52 #endif
53
54 static void send_udppacket(node_t *, vpn_packet_t *);
55
56 unsigned replaywin = 16;
57 bool localdiscovery = true;
58
59 #define MAX_SEQNO 1073741824
60
61 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
62    mtuprobes ==    31: sleep pinginterval seconds
63    mtuprobes ==    32: send 1 burst, sleep pingtimeout second
64    mtuprobes ==    33: no response from other side, restart PMTU discovery process
65
66    Probes are sent in batches of at least three, with random sizes between the
67    lower and upper boundaries for the MTU thus far discovered.
68
69    After the initial discovery, a fourth packet is added to each batch with a
70    size larger than the currently known PMTU, to test if the PMTU has increased.
71
72    In case local discovery is enabled, another packet is added to each batch,
73    which will be broadcast to the local network.
74
75 */
76
77 static void send_mtu_probe_handler(void *data) {
78         node_t *n = data;
79         int timeout = 1;
80
81         n->mtuprobes++;
82
83         if(!n->status.reachable || !n->status.validkey) {
84                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
85                 n->mtuprobes = 0;
86                 return;
87         }
88
89         if(n->mtuprobes > 32) {
90                 if(!n->minmtu) {
91                         n->mtuprobes = 31;
92                         timeout = pinginterval;
93                         goto end;
94                 }
95
96                 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
97                 n->status.udp_confirmed = false;
98                 n->mtuprobes = 1;
99                 n->minmtu = 0;
100                 n->maxmtu = MTU;
101         }
102
103         if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
104                 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
105                 n->mtuprobes = 31;
106         }
107
108         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
109                 if(n->minmtu > n->maxmtu)
110                         n->minmtu = n->maxmtu;
111                 else
112                         n->maxmtu = n->minmtu;
113                 n->mtu = n->minmtu;
114                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
115                 n->mtuprobes = 31;
116         }
117
118         if(n->mtuprobes == 31) {
119                 timeout = pinginterval;
120                 goto end;
121         } else if(n->mtuprobes == 32) {
122                 timeout = pingtimeout;
123         }
124
125         for(int i = 0; i < 4 + localdiscovery; i++) {
126                 int len;
127
128                 if(i == 0) {
129                         if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
130                                 continue;
131                         len = n->maxmtu + 8;
132                 } else if(n->maxmtu <= n->minmtu) {
133                         len = n->maxmtu;
134                 } else {
135                         len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
136                 }
137
138                 if(len < 64)
139                         len = 64;
140
141                 vpn_packet_t packet;
142                 memset(packet.data, 0, 14);
143                 randomize(packet.data + 14, len - 14);
144                 packet.len = len;
145                 packet.priority = 0;
146                 n->status.send_locally = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
147
148                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
149
150                 send_udppacket(n, &packet);
151         }
152
153         n->status.send_locally = false;
154         n->probe_counter = 0;
155         gettimeofday(&n->probe_time, NULL);
156
157         /* Calculate the packet loss of incoming traffic by comparing the rate of
158            packets received to the rate with which the sequence number has increased.
159          */
160
161         if(n->received > n->prev_received)
162                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
163         else
164                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
165
166         n->prev_received_seqno = n->received_seqno;
167         n->prev_received = n->received;
168
169 end:
170         timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
171 }
172
173 void send_mtu_probe(node_t *n) {
174         timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
175         send_mtu_probe_handler(n);
176 }
177
178 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
179         if(!packet->data[0]) {
180                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe request %d from %s (%s)", packet->len, n->name, n->hostname);
181
182                 /* It's a probe request, send back a reply */
183
184                 /* Type 2 probe replies were introduced in protocol 17.3 */
185                 if ((n->options >> 24) >= 3) {
186                         uint8_t* data = packet->data;
187                         *data++ = 2;
188                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
189                         struct timeval now;
190                         gettimeofday(&now, NULL);
191                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
192                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
193                         packet->len = data - packet->data;
194                 } else {
195                         /* Legacy protocol: n won't understand type 2 probe replies. */
196                         packet->data[0] = 1;
197                 }
198
199                 /* Temporarily set udp_confirmed, so that the reply is sent
200                    back exactly the way it came in. */
201
202                 bool udp_confirmed = n->status.udp_confirmed;
203                 n->status.udp_confirmed = true;
204                 send_udppacket(n, packet);
205                 n->status.udp_confirmed = udp_confirmed;
206         } else {
207                 length_t probelen = len;
208                 if (packet->data[0] == 2) {
209                         if (len < 3)
210                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) MTU probe reply from %s (%s)", n->name, n->hostname);
211                         else {
212                                 uint16_t probelen16; memcpy(&probelen16, packet->data + 1, 2); probelen = ntohs(probelen16);
213                         }
214                 }
215                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d MTU probe reply %d from %s (%s)", packet->data[0], probelen, n->name, n->hostname);
216
217                 /* It's a valid reply: now we know bidirectional communication
218                    is possible using the address and socket that the reply
219                    packet used. */
220
221                 n->status.udp_confirmed = true;
222
223                 /* If we haven't established the PMTU yet, restart the discovery process. */
224
225                 if(n->mtuprobes > 30) {
226                         if (probelen == n->maxmtu + 8) {
227                                 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
228                                 n->maxmtu = MTU;
229                                 n->mtuprobes = 10;
230                                 return;
231                         }
232
233                         if(n->minmtu)
234                                 n->mtuprobes = 30;
235                         else
236                                 n->mtuprobes = 1;
237                 }
238
239                 /* If applicable, raise the minimum supported MTU */
240
241                 if(probelen > n->maxmtu)
242                         probelen = n->maxmtu;
243                 if(n->minmtu < probelen)
244                         n->minmtu = probelen;
245
246                 /* Calculate RTT and bandwidth.
247                    The RTT is the time between the MTU probe burst was sent and the first
248                    reply is received. The bandwidth is measured using the time between the
249                    arrival of the first and third probe reply (or type 2 probe requests).
250                  */
251
252                 struct timeval now, diff;
253                 gettimeofday(&now, NULL);
254                 timersub(&now, &n->probe_time, &diff);
255
256                 struct timeval probe_timestamp = now;
257                 if (packet->data[0] == 2 && packet->len >= 11) {
258                         uint32_t sec; memcpy(&sec, packet->data + 3, 4);
259                         uint32_t usec; memcpy(&usec, packet->data + 7, 4);
260                         probe_timestamp.tv_sec = ntohl(sec);
261                         probe_timestamp.tv_usec = ntohl(usec);
262                 }
263                 
264                 n->probe_counter++;
265
266                 if(n->probe_counter == 1) {
267                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
268                         n->probe_time = probe_timestamp;
269                 } else if(n->probe_counter == 3) {
270                         struct timeval probe_timestamp_diff;
271                         timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
272                         n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
273                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
274                 }
275         }
276 }
277
278 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
279         if(level == 0) {
280                 memcpy(dest, source, len);
281                 return len;
282         } else if(level == 10) {
283 #ifdef HAVE_LZO
284                 lzo_uint lzolen = MAXSIZE;
285                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
286                 return lzolen;
287 #else
288                 return -1;
289 #endif
290         } else if(level < 10) {
291 #ifdef HAVE_ZLIB
292                 unsigned long destlen = MAXSIZE;
293                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
294                         return destlen;
295                 else
296 #endif
297                         return -1;
298         } else {
299 #ifdef HAVE_LZO
300                 lzo_uint lzolen = MAXSIZE;
301                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
302                 return lzolen;
303 #else
304                 return -1;
305 #endif
306         }
307
308         return -1;
309 }
310
311 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
312         if(level == 0) {
313                 memcpy(dest, source, len);
314                 return len;
315         } else if(level > 9) {
316 #ifdef HAVE_LZO
317                 lzo_uint lzolen = MAXSIZE;
318                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
319                         return lzolen;
320                 else
321 #endif
322                         return -1;
323         }
324 #ifdef HAVE_ZLIB
325         else {
326                 unsigned long destlen = MAXSIZE;
327                 if(uncompress(dest, &destlen, source, len) == Z_OK)
328                         return destlen;
329                 else
330                         return -1;
331         }
332 #endif
333
334         return -1;
335 }
336
337 /* VPN packet I/O */
338
339 static void receive_packet(node_t *n, vpn_packet_t *packet) {
340         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
341                            packet->len, n->name, n->hostname);
342
343         n->in_packets++;
344         n->in_bytes += packet->len;
345
346         route(n, packet);
347 }
348
349 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
350         if(n->status.sptps)
351                 return sptps_verify_datagram(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
352
353         if(!digest_active(n->indigest) || inpkt->len < sizeof inpkt->seqno + digest_length(n->indigest))
354                 return false;
355
356         return digest_verify(n->indigest, &inpkt->seqno, inpkt->len - digest_length(n->indigest), (const char *)&inpkt->seqno + inpkt->len - digest_length(n->indigest));
357 }
358
359 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
360         vpn_packet_t pkt1, pkt2;
361         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
362         int nextpkt = 0;
363         size_t outlen;
364
365         if(n->status.sptps) {
366                 if(!n->sptps.state) {
367                         if(!n->status.waitingforkey) {
368                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
369                                 send_req_key(n);
370                         } else {
371                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
372                         }
373                         return false;
374                 }
375                 return sptps_receive_data(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
376         }
377
378         if(!n->status.validkey) {
379                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
380                 return false;
381         }
382
383         /* Check packet length */
384
385         if(inpkt->len < sizeof inpkt->seqno + digest_length(n->indigest)) {
386                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
387                                         n->name, n->hostname);
388                 return false;
389         }
390
391         /* Check the message authentication code */
392
393         if(digest_active(n->indigest)) {
394                 inpkt->len -= digest_length(n->indigest);
395                 if(!digest_verify(n->indigest, &inpkt->seqno, inpkt->len, (const char *)&inpkt->seqno + inpkt->len)) {
396                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
397                         return false;
398                 }
399         }
400         /* Decrypt the packet */
401
402         if(cipher_active(n->incipher)) {
403                 vpn_packet_t *outpkt = pkt[nextpkt++];
404                 outlen = MAXSIZE;
405
406                 if(!cipher_decrypt(n->incipher, &inpkt->seqno, inpkt->len, &outpkt->seqno, &outlen, true)) {
407                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
408                         return false;
409                 }
410
411                 outpkt->len = outlen;
412                 inpkt = outpkt;
413         }
414
415         /* Check the sequence number */
416
417         inpkt->len -= sizeof inpkt->seqno;
418         uint32_t seqno;
419         memcpy(&seqno, inpkt->seqno, sizeof seqno);
420         seqno = ntohl(seqno);
421
422         if(replaywin) {
423                 if(seqno != n->received_seqno + 1) {
424                         if(seqno >= n->received_seqno + replaywin * 8) {
425                                 if(n->farfuture++ < replaywin >> 2) {
426                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
427                                                 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
428                                         return false;
429                                 }
430                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
431                                                 seqno - n->received_seqno - 1, n->name, n->hostname);
432                                 memset(n->late, 0, replaywin);
433                         } else if (seqno <= n->received_seqno) {
434                                 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
435                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
436                                                 n->name, n->hostname, seqno, n->received_seqno);
437                                         return false;
438                                 }
439                         } else {
440                                 for(int i = n->received_seqno + 1; i < seqno; i++)
441                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
442                         }
443                 }
444
445                 n->farfuture = 0;
446                 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
447         }
448
449         if(seqno > n->received_seqno)
450                 n->received_seqno = seqno;
451
452         n->received++;
453
454         if(n->received_seqno > MAX_SEQNO)
455                 regenerate_key();
456
457         /* Decompress the packet */
458
459         length_t origlen = inpkt->len;
460
461         if(n->incompression) {
462                 vpn_packet_t *outpkt = pkt[nextpkt++];
463
464                 if((outpkt->len = uncompress_packet(outpkt->data, inpkt->data, inpkt->len, n->incompression)) < 0) {
465                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
466                                                  n->name, n->hostname);
467                         return false;
468                 }
469
470                 inpkt = outpkt;
471
472                 origlen -= MTU/64 + 20;
473         }
474
475         inpkt->priority = 0;
476
477         if(!inpkt->data[12] && !inpkt->data[13])
478                 mtu_probe_h(n, inpkt, origlen);
479         else
480                 receive_packet(n, inpkt);
481         return true;
482 }
483
484 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
485         vpn_packet_t outpkt;
486
487         if(len > sizeof outpkt.data)
488                 return;
489
490         outpkt.len = len;
491         if(c->options & OPTION_TCPONLY)
492                 outpkt.priority = 0;
493         else
494                 outpkt.priority = -1;
495         memcpy(outpkt.data, buffer, len);
496
497         receive_packet(c->node, &outpkt);
498 }
499
500 static bool try_sptps(node_t *n) {
501         if(n->status.validkey)
502                 return true;
503
504         logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
505
506         if(!n->status.waitingforkey)
507                 send_req_key(n);
508         else if(n->last_req_key + 10 < now.tv_sec) {
509                 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
510                 sptps_stop(&n->sptps);
511                 n->status.waitingforkey = false;
512                 send_req_key(n);
513         }
514
515         return false;
516 }
517
518 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
519         if (!try_sptps(n))
520                 return;
521
522         uint8_t type = 0;
523         int offset = 0;
524
525         if(!(origpkt->data[12] | origpkt->data[13])) {
526                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)origpkt->data, origpkt->len);
527                 return;
528         }
529
530         if(routing_mode == RMODE_ROUTER)
531                 offset = 14;
532         else
533                 type = PKT_MAC;
534
535         if(origpkt->len < offset)
536                 return;
537
538         vpn_packet_t outpkt;
539
540         if(n->outcompression) {
541                 int len = compress_packet(outpkt.data + offset, origpkt->data + offset, origpkt->len - offset, n->outcompression);
542                 if(len < 0) {
543                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
544                 } else if(len < origpkt->len - offset) {
545                         outpkt.len = len + offset;
546                         origpkt = &outpkt;
547                         type |= PKT_COMPRESSED;
548                 }
549         }
550
551         sptps_send_record(&n->sptps, type, (char *)origpkt->data + offset, origpkt->len - offset);
552         return;
553 }
554
555 static void adapt_socket(const sockaddr_t *sa, int *sock) {
556         /* Make sure we have a suitable socket for the chosen address */
557         if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
558                 for(int i = 0; i < listen_sockets; i++) {
559                         if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
560                                 *sock = i;
561                                 break;
562                         }
563                 }
564         }
565 }
566
567 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
568         /* Latest guess */
569         *sa = &n->address;
570         *sock = n->sock;
571
572         /* If the UDP address is confirmed, use it. */
573         if(n->status.udp_confirmed)
574                 return;
575
576         /* Send every third packet to n->address; that could be set
577            to the node's reflexive UDP address discovered during key
578            exchange. */
579
580         static int x = 0;
581         if(++x >= 3) {
582                 x = 0;
583                 return;
584         }
585
586         /* Otherwise, address are found in edges to this node.
587            So we pick a random edge and a random socket. */
588
589         int i = 0;
590         int j = rand() % n->edge_tree->count;
591         edge_t *candidate = NULL;
592
593         for splay_each(edge_t, e, n->edge_tree) {
594                 if(i++ == j) {
595                         candidate = e->reverse;
596                         break;
597                 }
598         }
599
600         if(candidate) {
601                 *sa = &candidate->address;
602                 *sock = rand() % listen_sockets;
603         }
604
605         adapt_socket(*sa, sock);
606 }
607
608 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
609         *sa = NULL;
610
611         /* Pick one of the edges from this node at random, then use its local address. */
612
613         int i = 0;
614         int j = rand() % n->edge_tree->count;
615         edge_t *candidate = NULL;
616
617         for splay_each(edge_t, e, n->edge_tree) {
618                 if(i++ == j) {
619                         candidate = e;
620                         break;
621                 }
622         }
623
624         if (candidate && candidate->local_address.sa.sa_family) {
625                 *sa = &candidate->local_address;
626                 *sock = rand() % listen_sockets;
627                 adapt_socket(*sa, sock);
628         }
629 }
630
631 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
632         vpn_packet_t pkt1, pkt2;
633         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
634         vpn_packet_t *inpkt = origpkt;
635         int nextpkt = 0;
636         vpn_packet_t *outpkt;
637         int origlen = origpkt->len;
638         size_t outlen;
639 #if defined(SOL_IP) && defined(IP_TOS)
640         static int priority = 0;
641         int origpriority = origpkt->priority;
642 #endif
643
644         if(!n->status.reachable) {
645                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
646                 return;
647         }
648
649         if(n->status.sptps)
650                 return send_sptps_packet(n, origpkt);
651
652         /* Make sure we have a valid key */
653
654         if(!n->status.validkey) {
655                 logger(DEBUG_TRAFFIC, LOG_INFO,
656                                    "No valid key known yet for %s (%s), forwarding via TCP",
657                                    n->name, n->hostname);
658
659                 if(n->last_req_key + 10 <= now.tv_sec) {
660                         send_req_key(n);
661                         n->last_req_key = now.tv_sec;
662                 }
663
664                 send_tcppacket(n->nexthop->connection, origpkt);
665
666                 return;
667         }
668
669         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (inpkt->data[12] | inpkt->data[13])) {
670                 logger(DEBUG_TRAFFIC, LOG_INFO,
671                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
672                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
673
674                 if(n != n->nexthop)
675                         send_packet(n->nexthop, origpkt);
676                 else
677                         send_tcppacket(n->nexthop->connection, origpkt);
678
679                 return;
680         }
681
682         /* Compress the packet */
683
684         if(n->outcompression) {
685                 outpkt = pkt[nextpkt++];
686
687                 if((outpkt->len = compress_packet(outpkt->data, inpkt->data, inpkt->len, n->outcompression)) < 0) {
688                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
689                                    n->name, n->hostname);
690                         return;
691                 }
692
693                 inpkt = outpkt;
694         }
695
696         /* Add sequence number */
697
698         uint32_t seqno = htonl(++(n->sent_seqno));
699         memcpy(inpkt->seqno, &seqno, sizeof inpkt->seqno);
700         inpkt->len += sizeof inpkt->seqno;
701
702         /* Encrypt the packet */
703
704         if(cipher_active(n->outcipher)) {
705                 outpkt = pkt[nextpkt++];
706                 outlen = MAXSIZE;
707
708                 if(!cipher_encrypt(n->outcipher, inpkt->seqno, inpkt->len, outpkt->seqno, &outlen, true)) {
709                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
710                         goto end;
711                 }
712
713                 outpkt->len = outlen;
714                 inpkt = outpkt;
715         }
716
717         /* Add the message authentication code */
718
719         if(digest_active(n->outdigest)) {
720                 if(!digest_create(n->outdigest, inpkt->seqno, inpkt->len, inpkt->seqno + inpkt->len)) {
721                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
722                         goto end;
723                 }
724
725                 inpkt->len += digest_length(n->outdigest);
726         }
727
728         /* Send the packet */
729
730         const sockaddr_t *sa = NULL;
731         int sock;
732
733         if(n->status.send_locally)
734                 choose_local_address(n, &sa, &sock);
735         if(!sa)
736                 choose_udp_address(n, &sa, &sock);
737
738 #if defined(SOL_IP) && defined(IP_TOS)
739         if(priorityinheritance && origpriority != priority
740            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
741                 priority = origpriority;
742                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
743                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
744                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
745         }
746 #endif
747
748         if(sendto(listen_socket[sock].udp.fd, inpkt->seqno, inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
749                 if(sockmsgsize(sockerrno)) {
750                         if(n->maxmtu >= origlen)
751                                 n->maxmtu = origlen - 1;
752                         if(n->mtu >= origlen)
753                                 n->mtu = origlen - 1;
754                 } else
755                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
756         }
757
758 end:
759         origpkt->len = origlen;
760 }
761
762 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
763         node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
764         bool direct = from == myself && to == relay;
765         bool relay_supported = (relay->options >> 24) >= 4;
766         bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
767
768         /* We don't really need the relay's key, but we need to establish a UDP tunnel with it and discover its MTU. */
769         if (!direct && relay_supported && !tcponly)
770                 try_sptps(relay);
771
772         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
773            TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
774                  This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
775
776         if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
777                 char buf[len * 4 / 3 + 5];
778                 b64encode(data, buf, len);
779                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
780                    to ensure we get to learn the reflexive UDP address. */
781                 if(from == myself && !to->status.validkey) {
782                         to->incompression = myself->incompression;
783                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
784                 } else {
785                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
786                 }
787         }
788
789         size_t overhead = 0;
790         if(relay_supported) overhead += sizeof to->id + sizeof from->id;
791         char buf[len + overhead]; char* buf_ptr = buf;
792         if(relay_supported) {
793                 if(direct) {
794                         /* Inform the recipient that this packet was sent directly. */
795                         node_id_t nullid = {};
796                         memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
797                 } else {
798                         memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
799                 }
800                 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
801
802         }
803         /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
804         memcpy(buf_ptr, data, len); buf_ptr += len;
805
806         const sockaddr_t *sa = NULL;
807         int sock;
808         if(relay->status.send_locally)
809                 choose_local_address(relay, &sa, &sock);
810         if(!sa)
811                 choose_udp_address(relay, &sa, &sock);
812         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
813         if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
814                 if(sockmsgsize(sockerrno)) {
815                         // Compensate for SPTPS overhead
816                         len -= SPTPS_DATAGRAM_OVERHEAD;
817                         if(relay->maxmtu >= len)
818                                 relay->maxmtu = len - 1;
819                         if(relay->mtu >= len)
820                                 relay->mtu = len - 1;
821                 } else {
822                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
823                         return false;
824                 }
825         }
826
827         return true;
828 }
829
830 bool send_sptps_data(void *handle, uint8_t type, const char *data, size_t len) {
831         return send_sptps_data_priv(handle, myself, type, data, len);
832 }
833
834 bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t len) {
835         node_t *from = handle;
836
837         if(type == SPTPS_HANDSHAKE) {
838                 if(!from->status.validkey) {
839                         from->status.validkey = true;
840                         from->status.waitingforkey = false;
841                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
842                 }
843                 return true;
844         }
845
846         if(len > MTU) {
847                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
848                 return false;
849         }
850
851         vpn_packet_t inpkt;
852
853         if(type == PKT_PROBE) {
854                 inpkt.len = len;
855                 memcpy(inpkt.data, data, len);
856                 mtu_probe_h(from, &inpkt, len);
857                 return true;
858         }
859
860         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
861                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
862                 return false;
863         }
864
865         /* Check if we have the headers we need */
866         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
867                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
868                 return false;
869         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
870                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
871         }
872
873         int offset = (type & PKT_MAC) ? 0 : 14;
874         if(type & PKT_COMPRESSED) {
875                 length_t ulen = uncompress_packet(inpkt.data + offset, (const uint8_t *)data, len, from->incompression);
876                 if(ulen < 0) {
877                         return false;
878                 } else {
879                         inpkt.len = ulen + offset;
880                 }
881                 if(inpkt.len > MAXSIZE)
882                         abort();
883         } else {
884                 memcpy(inpkt.data + offset, data, len);
885                 inpkt.len = len + offset;
886         }
887
888         /* Generate the Ethernet packet type if necessary */
889         if(offset) {
890                 switch(inpkt.data[14] >> 4) {
891                         case 4:
892                                 inpkt.data[12] = 0x08;
893                                 inpkt.data[13] = 0x00;
894                                 break;
895                         case 6:
896                                 inpkt.data[12] = 0x86;
897                                 inpkt.data[13] = 0xDD;
898                                 break;
899                         default:
900                                 logger(DEBUG_TRAFFIC, LOG_ERR,
901                                                    "Unknown IP version %d while reading packet from %s (%s)",
902                                                    inpkt.data[14] >> 4, from->name, from->hostname);
903                                 return false;
904                 }
905         }
906
907         receive_packet(from, &inpkt);
908         return true;
909 }
910
911 /*
912   send a packet to the given vpn ip.
913 */
914 void send_packet(node_t *n, vpn_packet_t *packet) {
915         node_t *via;
916
917         if(n == myself) {
918                 if(overwrite_mac)
919                          memcpy(packet->data, mymac.x, ETH_ALEN);
920                 n->out_packets++;
921                 n->out_bytes += packet->len;
922                 devops.write(packet);
923                 return;
924         }
925
926         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
927                            packet->len, n->name, n->hostname);
928
929         if(!n->status.reachable) {
930                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
931                                    n->name, n->hostname);
932                 return;
933         }
934
935         n->out_packets++;
936         n->out_bytes += packet->len;
937
938         if(n->status.sptps) {
939                 send_sptps_packet(n, packet);
940                 return;
941         }
942
943         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
944
945         if(via != n)
946                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
947                            n->name, via->name, n->via->hostname);
948
949         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
950                 if(!send_tcppacket(via->connection, packet))
951                         terminate_connection(via->connection, true);
952         } else
953                 send_udppacket(via, packet);
954 }
955
956 /* Broadcast a packet using the minimum spanning tree */
957
958 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
959         // Always give ourself a copy of the packet.
960         if(from != myself)
961                 send_packet(myself, packet);
962
963         // In TunnelServer mode, do not forward broadcast packets.
964         // The MST might not be valid and create loops.
965         if(tunnelserver || broadcast_mode == BMODE_NONE)
966                 return;
967
968         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
969                            packet->len, from->name, from->hostname);
970
971         switch(broadcast_mode) {
972                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
973                 // This guarantees all nodes receive the broadcast packet, and
974                 // usually distributes the sending of broadcast packets over all nodes.
975                 case BMODE_MST:
976                         for list_each(connection_t, c, connection_list)
977                                 if(c->edge && c->status.mst && c != from->nexthop->connection)
978                                         send_packet(c->node, packet);
979                         break;
980
981                 // In direct mode, we send copies to each node we know of.
982                 // However, this only reaches nodes that can be reached in a single hop.
983                 // We don't have enough information to forward broadcast packets in this case.
984                 case BMODE_DIRECT:
985                         if(from != myself)
986                                 break;
987
988                         for splay_each(node_t, n, node_tree)
989                                 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
990                                         send_packet(n, packet);
991                         break;
992
993                 default:
994                         break;
995         }
996 }
997
998 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
999         node_t *n = NULL;
1000         bool hard = false;
1001         static time_t last_hard_try = 0;
1002
1003         for splay_each(edge_t, e, edge_weight_tree) {
1004                 if(!e->to->status.reachable || e->to == myself)
1005                         continue;
1006
1007                 if(sockaddrcmp_noport(from, &e->address)) {
1008                         if(last_hard_try == now.tv_sec)
1009                                 continue;
1010                         hard = true;
1011                 }
1012
1013                 if(!try_mac(e->to, pkt))
1014                         continue;
1015
1016                 n = e->to;
1017                 break;
1018         }
1019
1020         if(hard)
1021                 last_hard_try = now.tv_sec;
1022
1023         last_hard_try = now.tv_sec;
1024         return n;
1025 }
1026
1027 void handle_incoming_vpn_data(void *data, int flags) {
1028         listen_socket_t *ls = data;
1029         vpn_packet_t pkt;
1030         char *hostname;
1031         sockaddr_t from = {{0}};
1032         socklen_t fromlen = sizeof from;
1033         node_t *n = NULL;
1034         node_t *to = myself;
1035         int len;
1036
1037         len = recvfrom(ls->udp.fd, &pkt.dstid, MAXSIZE, 0, &from.sa, &fromlen);
1038
1039         if(len <= 0 || len > MAXSIZE) {
1040                 if(!sockwouldblock(sockerrno))
1041                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1042                 return;
1043         }
1044
1045         pkt.len = len;
1046
1047         sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
1048
1049         bool direct = false;
1050         if(len >= sizeof pkt.dstid + sizeof pkt.srcid) {
1051                 n = lookup_node_id(&pkt.srcid);
1052                 if(n) {
1053                         node_id_t nullid = {};
1054                         if(memcmp(&pkt.dstid, &nullid, sizeof nullid) == 0) {
1055                                 /* A zero dstid is used to indicate a direct, non-relayed packet. */
1056                                 direct = true;
1057                         } else {
1058                                 to = lookup_node_id(&pkt.dstid);
1059                                 if(!to) {
1060                                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet presumably sent by %s (%s) but with unknown destination ID", n->name, n->hostname);
1061                                         return;
1062                                 }
1063                         }
1064                         pkt.len -= sizeof pkt.dstid + sizeof pkt.srcid;
1065                 }
1066         }
1067
1068         if(to != myself) {
1069                 /* We are being asked to relay this packet. */
1070
1071                 /* Don't allow random strangers to relay through us. Note that we check for *any* known address since we are not necessarily the first relay. */
1072                 if (!lookup_node_udp(&from)) {
1073                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Refusing to relay packet from (presumably) %s (%s) to (presumably) %s (%s) because the packet comes from an unknown address", n->name, n->hostname, to->name, to->hostname);
1074                         return;
1075                 }
1076
1077                 send_sptps_data_priv(to, n, 0, pkt.seqno, pkt.len);
1078                 return;
1079         }
1080
1081         if(!n) {
1082                 /* Most likely an old-style packet without node IDs. */
1083                 direct = true;
1084                 memmove(pkt.seqno, &pkt.dstid, sizeof pkt - offsetof(vpn_packet_t, seqno));
1085                 n = lookup_node_udp(&from);
1086         }
1087
1088         if(!n)
1089                 n = try_harder(&from, &pkt);
1090
1091         if(!n) {
1092                 if(debug_level >= DEBUG_PROTOCOL) {
1093                         hostname = sockaddr2hostname(&from);
1094                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1095                         free(hostname);
1096                 }
1097                 return;
1098         }
1099
1100         if(!receive_udppacket(n, &pkt))
1101                 return;
1102
1103         n->sock = ls - listen_socket;
1104         if(direct && sockaddrcmp(&from, &n->address))
1105                 update_node_udp(n, &from);
1106 }
1107
1108 void handle_device_data(void *data, int flags) {
1109         vpn_packet_t packet;
1110
1111         packet.priority = 0;
1112
1113         if(devops.read(&packet)) {
1114                 myself->in_packets++;
1115                 myself->in_bytes += packet.len;
1116                 route(myself, &packet);
1117         }
1118 }