Add a variable offset to vpn_packet_t, drop sptps_packet_t.
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "route.h"
46 #include "utils.h"
47 #include "xalloc.h"
48
49 int keylifetime = 0;
50 #ifdef HAVE_LZO
51 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
52 #endif
53
54 static void send_udppacket(node_t *, vpn_packet_t *);
55
56 unsigned replaywin = 16;
57 bool localdiscovery = true;
58
59 #define MAX_SEQNO 1073741824
60
61 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
62    mtuprobes ==    31: sleep pinginterval seconds
63    mtuprobes ==    32: send 1 burst, sleep pingtimeout second
64    mtuprobes ==    33: no response from other side, restart PMTU discovery process
65
66    Probes are sent in batches of at least three, with random sizes between the
67    lower and upper boundaries for the MTU thus far discovered.
68
69    After the initial discovery, a fourth packet is added to each batch with a
70    size larger than the currently known PMTU, to test if the PMTU has increased.
71
72    In case local discovery is enabled, another packet is added to each batch,
73    which will be broadcast to the local network.
74
75 */
76
77 static void send_mtu_probe_handler(void *data) {
78         node_t *n = data;
79         int timeout = 1;
80
81         n->mtuprobes++;
82
83         if(!n->status.reachable || !n->status.validkey) {
84                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
85                 n->mtuprobes = 0;
86                 return;
87         }
88
89         if(n->mtuprobes > 32) {
90                 if(!n->minmtu) {
91                         n->mtuprobes = 31;
92                         timeout = pinginterval;
93                         goto end;
94                 }
95
96                 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
97                 n->status.udp_confirmed = false;
98                 n->mtuprobes = 1;
99                 n->minmtu = 0;
100                 n->maxmtu = MTU;
101         }
102
103         if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
104                 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
105                 n->mtuprobes = 31;
106         }
107
108         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
109                 if(n->minmtu > n->maxmtu)
110                         n->minmtu = n->maxmtu;
111                 else
112                         n->maxmtu = n->minmtu;
113                 n->mtu = n->minmtu;
114                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
115                 n->mtuprobes = 31;
116         }
117
118         if(n->mtuprobes == 31) {
119                 timeout = pinginterval;
120                 goto end;
121         } else if(n->mtuprobes == 32) {
122                 timeout = pingtimeout;
123         }
124
125         for(int i = 0; i < 4 + localdiscovery; i++) {
126                 int len;
127
128                 if(i == 0) {
129                         if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
130                                 continue;
131                         len = n->maxmtu + 8;
132                 } else if(n->maxmtu <= n->minmtu) {
133                         len = n->maxmtu;
134                 } else {
135                         len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
136                 }
137
138                 if(len < 64)
139                         len = 64;
140
141                 vpn_packet_t packet;
142                 packet.offset = DEFAULT_PACKET_OFFSET;
143                 memset(DATA(&packet), 0, 14);
144                 randomize(DATA(&packet) + 14, len - 14);
145                 packet.len = len;
146                 packet.priority = 0;
147                 n->status.send_locally = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
148
149                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
150
151                 send_udppacket(n, &packet);
152         }
153
154         n->status.send_locally = false;
155         n->probe_counter = 0;
156         gettimeofday(&n->probe_time, NULL);
157
158         /* Calculate the packet loss of incoming traffic by comparing the rate of
159            packets received to the rate with which the sequence number has increased.
160          */
161
162         if(n->received > n->prev_received)
163                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
164         else
165                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
166
167         n->prev_received_seqno = n->received_seqno;
168         n->prev_received = n->received;
169
170 end:
171         timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
172 }
173
174 void send_mtu_probe(node_t *n) {
175         timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
176         send_mtu_probe_handler(n);
177 }
178
179 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
180         if(!DATA(packet)[0]) {
181                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe request %d from %s (%s)", packet->len, n->name, n->hostname);
182
183                 /* It's a probe request, send back a reply */
184
185                 /* Type 2 probe replies were introduced in protocol 17.3 */
186                 if ((n->options >> 24) >= 3) {
187                         uint8_t *data = DATA(packet);
188                         *data++ = 2;
189                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
190                         struct timeval now;
191                         gettimeofday(&now, NULL);
192                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
193                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
194                         packet->len -= 10;
195                 } else {
196                         /* Legacy protocol: n won't understand type 2 probe replies. */
197                         DATA(packet)[0] = 1;
198                 }
199
200                 /* Temporarily set udp_confirmed, so that the reply is sent
201                    back exactly the way it came in. */
202
203                 bool udp_confirmed = n->status.udp_confirmed;
204                 n->status.udp_confirmed = true;
205                 send_udppacket(n, packet);
206                 n->status.udp_confirmed = udp_confirmed;
207         } else {
208                 length_t probelen = len;
209                 if (DATA(packet)[0] == 2) {
210                         if (len < 3)
211                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) MTU probe reply from %s (%s)", n->name, n->hostname);
212                         else {
213                                 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
214                         }
215                 }
216                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d MTU probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
217
218                 /* It's a valid reply: now we know bidirectional communication
219                    is possible using the address and socket that the reply
220                    packet used. */
221
222                 n->status.udp_confirmed = true;
223
224                 /* If we haven't established the PMTU yet, restart the discovery process. */
225
226                 if(n->mtuprobes > 30) {
227                         if (probelen == n->maxmtu + 8) {
228                                 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
229                                 n->maxmtu = MTU;
230                                 n->mtuprobes = 10;
231                                 return;
232                         }
233
234                         if(n->minmtu)
235                                 n->mtuprobes = 30;
236                         else
237                                 n->mtuprobes = 1;
238                 }
239
240                 /* If applicable, raise the minimum supported MTU */
241
242                 if(probelen > n->maxmtu)
243                         probelen = n->maxmtu;
244                 if(n->minmtu < probelen)
245                         n->minmtu = probelen;
246
247                 /* Calculate RTT and bandwidth.
248                    The RTT is the time between the MTU probe burst was sent and the first
249                    reply is received. The bandwidth is measured using the time between the
250                    arrival of the first and third probe reply (or type 2 probe requests).
251                  */
252
253                 struct timeval now, diff;
254                 gettimeofday(&now, NULL);
255                 timersub(&now, &n->probe_time, &diff);
256
257                 struct timeval probe_timestamp = now;
258                 if (DATA(packet)[0] == 2 && packet->len >= 11) {
259                         uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
260                         uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
261                         probe_timestamp.tv_sec = ntohl(sec);
262                         probe_timestamp.tv_usec = ntohl(usec);
263                 }
264                 
265                 n->probe_counter++;
266
267                 if(n->probe_counter == 1) {
268                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
269                         n->probe_time = probe_timestamp;
270                 } else if(n->probe_counter == 3) {
271                         struct timeval probe_timestamp_diff;
272                         timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
273                         n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
274                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
275                 }
276         }
277 }
278
279 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
280         if(level == 0) {
281                 memcpy(dest, source, len);
282                 return len;
283         } else if(level == 10) {
284 #ifdef HAVE_LZO
285                 lzo_uint lzolen = MAXSIZE;
286                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
287                 return lzolen;
288 #else
289                 return -1;
290 #endif
291         } else if(level < 10) {
292 #ifdef HAVE_ZLIB
293                 unsigned long destlen = MAXSIZE;
294                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
295                         return destlen;
296                 else
297 #endif
298                         return -1;
299         } else {
300 #ifdef HAVE_LZO
301                 lzo_uint lzolen = MAXSIZE;
302                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
303                 return lzolen;
304 #else
305                 return -1;
306 #endif
307         }
308
309         return -1;
310 }
311
312 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
313         if(level == 0) {
314                 memcpy(dest, source, len);
315                 return len;
316         } else if(level > 9) {
317 #ifdef HAVE_LZO
318                 lzo_uint lzolen = MAXSIZE;
319                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
320                         return lzolen;
321                 else
322 #endif
323                         return -1;
324         }
325 #ifdef HAVE_ZLIB
326         else {
327                 unsigned long destlen = MAXSIZE;
328                 if(uncompress(dest, &destlen, source, len) == Z_OK)
329                         return destlen;
330                 else
331                         return -1;
332         }
333 #endif
334
335         return -1;
336 }
337
338 /* VPN packet I/O */
339
340 static void receive_packet(node_t *n, vpn_packet_t *packet) {
341         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
342                            packet->len, n->name, n->hostname);
343
344         n->in_packets++;
345         n->in_bytes += packet->len;
346
347         route(n, packet);
348 }
349
350 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
351         if(n->status.sptps)
352                 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
353
354         if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
355                 return false;
356
357         return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
358 }
359
360 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
361         vpn_packet_t pkt1, pkt2;
362         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
363         int nextpkt = 0;
364         size_t outlen;
365         pkt1.offset = DEFAULT_PACKET_OFFSET;
366         pkt2.offset = DEFAULT_PACKET_OFFSET;
367
368         if(n->status.sptps) {
369                 if(!n->sptps.state) {
370                         if(!n->status.waitingforkey) {
371                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
372                                 send_req_key(n);
373                         } else {
374                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
375                         }
376                         return false;
377                 }
378                 inpkt->offset += 2 * sizeof(node_id_t);
379                 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
380                         logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
381                         return false;
382                 }
383                 return true;
384         }
385
386         if(!n->status.validkey) {
387                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
388                 return false;
389         }
390
391         /* Check packet length */
392
393         if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
394                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
395                                         n->name, n->hostname);
396                 return false;
397         }
398
399         /* It's a legacy UDP packet, the data starts after the seqno */
400
401         inpkt->offset += sizeof(seqno_t);
402
403         /* Check the message authentication code */
404
405         if(digest_active(n->indigest)) {
406                 inpkt->len -= digest_length(n->indigest);
407                 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
408                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
409                         return false;
410                 }
411         }
412         /* Decrypt the packet */
413
414         if(cipher_active(n->incipher)) {
415                 vpn_packet_t *outpkt = pkt[nextpkt++];
416                 outlen = MAXSIZE;
417
418                 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
419                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
420                         return false;
421                 }
422
423                 outpkt->len = outlen;
424                 inpkt = outpkt;
425         }
426
427         /* Check the sequence number */
428
429         seqno_t seqno;
430         memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
431         seqno = ntohl(seqno);
432         inpkt->len -= sizeof seqno;
433
434         if(replaywin) {
435                 if(seqno != n->received_seqno + 1) {
436                         if(seqno >= n->received_seqno + replaywin * 8) {
437                                 if(n->farfuture++ < replaywin >> 2) {
438                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
439                                                 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
440                                         return false;
441                                 }
442                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
443                                                 seqno - n->received_seqno - 1, n->name, n->hostname);
444                                 memset(n->late, 0, replaywin);
445                         } else if (seqno <= n->received_seqno) {
446                                 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
447                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
448                                                 n->name, n->hostname, seqno, n->received_seqno);
449                                         return false;
450                                 }
451                         } else {
452                                 for(int i = n->received_seqno + 1; i < seqno; i++)
453                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
454                         }
455                 }
456
457                 n->farfuture = 0;
458                 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
459         }
460
461         if(seqno > n->received_seqno)
462                 n->received_seqno = seqno;
463
464         n->received++;
465
466         if(n->received_seqno > MAX_SEQNO)
467                 regenerate_key();
468
469         /* Decompress the packet */
470
471         length_t origlen = inpkt->len;
472
473         if(n->incompression) {
474                 vpn_packet_t *outpkt = pkt[nextpkt++];
475
476                 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
477                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
478                                                  n->name, n->hostname);
479                         return false;
480                 }
481
482                 inpkt = outpkt;
483
484                 origlen -= MTU/64 + 20;
485         }
486
487         inpkt->priority = 0;
488
489         if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
490                 mtu_probe_h(n, inpkt, origlen);
491         else
492                 receive_packet(n, inpkt);
493         return true;
494 }
495
496 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
497         vpn_packet_t outpkt;
498         outpkt.offset = DEFAULT_PACKET_OFFSET;
499
500         if(len > sizeof outpkt.data - outpkt.offset)
501                 return;
502
503         outpkt.len = len;
504         if(c->options & OPTION_TCPONLY)
505                 outpkt.priority = 0;
506         else
507                 outpkt.priority = -1;
508         memcpy(DATA(&outpkt), buffer, len);
509
510         receive_packet(c->node, &outpkt);
511 }
512
513 static bool try_sptps(node_t *n) {
514         if(n->status.validkey)
515                 return true;
516
517         logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
518
519         if(!n->status.waitingforkey)
520                 send_req_key(n);
521         else if(n->last_req_key + 10 < now.tv_sec) {
522                 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
523                 sptps_stop(&n->sptps);
524                 n->status.waitingforkey = false;
525                 send_req_key(n);
526         }
527
528         return false;
529 }
530
531 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
532         if (!try_sptps(n))
533                 return;
534
535         uint8_t type = 0;
536         int offset = 0;
537
538         if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
539                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
540                 return;
541         }
542
543         if(routing_mode == RMODE_ROUTER)
544                 offset = 14;
545         else
546                 type = PKT_MAC;
547
548         if(origpkt->len < offset)
549                 return;
550
551         vpn_packet_t outpkt;
552
553         if(n->outcompression) {
554                 outpkt.offset = 0;
555                 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
556                 if(len < 0) {
557                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
558                 } else if(len < origpkt->len - offset) {
559                         outpkt.len = len + offset;
560                         origpkt = &outpkt;
561                         type |= PKT_COMPRESSED;
562                 }
563         }
564
565         sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
566         return;
567 }
568
569 static void adapt_socket(const sockaddr_t *sa, int *sock) {
570         /* Make sure we have a suitable socket for the chosen address */
571         if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
572                 for(int i = 0; i < listen_sockets; i++) {
573                         if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
574                                 *sock = i;
575                                 break;
576                         }
577                 }
578         }
579 }
580
581 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
582         /* Latest guess */
583         *sa = &n->address;
584         *sock = n->sock;
585
586         /* If the UDP address is confirmed, use it. */
587         if(n->status.udp_confirmed)
588                 return;
589
590         /* Send every third packet to n->address; that could be set
591            to the node's reflexive UDP address discovered during key
592            exchange. */
593
594         static int x = 0;
595         if(++x >= 3) {
596                 x = 0;
597                 return;
598         }
599
600         /* Otherwise, address are found in edges to this node.
601            So we pick a random edge and a random socket. */
602
603         int i = 0;
604         int j = rand() % n->edge_tree->count;
605         edge_t *candidate = NULL;
606
607         for splay_each(edge_t, e, n->edge_tree) {
608                 if(i++ == j) {
609                         candidate = e->reverse;
610                         break;
611                 }
612         }
613
614         if(candidate) {
615                 *sa = &candidate->address;
616                 *sock = rand() % listen_sockets;
617         }
618
619         adapt_socket(*sa, sock);
620 }
621
622 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
623         *sa = NULL;
624
625         /* Pick one of the edges from this node at random, then use its local address. */
626
627         int i = 0;
628         int j = rand() % n->edge_tree->count;
629         edge_t *candidate = NULL;
630
631         for splay_each(edge_t, e, n->edge_tree) {
632                 if(i++ == j) {
633                         candidate = e;
634                         break;
635                 }
636         }
637
638         if (candidate && candidate->local_address.sa.sa_family) {
639                 *sa = &candidate->local_address;
640                 *sock = rand() % listen_sockets;
641                 adapt_socket(*sa, sock);
642         }
643 }
644
645 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
646         vpn_packet_t pkt1, pkt2;
647         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
648         vpn_packet_t *inpkt = origpkt;
649         int nextpkt = 0;
650         vpn_packet_t *outpkt;
651         int origlen = origpkt->len;
652         size_t outlen;
653 #if defined(SOL_IP) && defined(IP_TOS)
654         static int priority = 0;
655         int origpriority = origpkt->priority;
656 #endif
657
658         pkt1.offset = DEFAULT_PACKET_OFFSET;
659         pkt2.offset = DEFAULT_PACKET_OFFSET;
660
661         if(!n->status.reachable) {
662                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
663                 return;
664         }
665
666         if(n->status.sptps)
667                 return send_sptps_packet(n, origpkt);
668
669         /* Make sure we have a valid key */
670
671         if(!n->status.validkey) {
672                 logger(DEBUG_TRAFFIC, LOG_INFO,
673                                    "No valid key known yet for %s (%s), forwarding via TCP",
674                                    n->name, n->hostname);
675
676                 if(n->last_req_key + 10 <= now.tv_sec) {
677                         send_req_key(n);
678                         n->last_req_key = now.tv_sec;
679                 }
680
681                 send_tcppacket(n->nexthop->connection, origpkt);
682
683                 return;
684         }
685
686         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
687                 logger(DEBUG_TRAFFIC, LOG_INFO,
688                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
689                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
690
691                 if(n != n->nexthop)
692                         send_packet(n->nexthop, origpkt);
693                 else
694                         send_tcppacket(n->nexthop->connection, origpkt);
695
696                 return;
697         }
698
699         /* Compress the packet */
700
701         if(n->outcompression) {
702                 outpkt = pkt[nextpkt++];
703
704                 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
705                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
706                                    n->name, n->hostname);
707                         return;
708                 }
709
710                 inpkt = outpkt;
711         }
712
713         /* Add sequence number */
714
715         seqno_t seqno = htonl(++(n->sent_seqno));
716         memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
717         inpkt->len += sizeof seqno;
718
719         /* Encrypt the packet */
720
721         if(cipher_active(n->outcipher)) {
722                 outpkt = pkt[nextpkt++];
723                 outlen = MAXSIZE;
724
725                 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
726                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
727                         goto end;
728                 }
729
730                 outpkt->len = outlen;
731                 inpkt = outpkt;
732         }
733
734         /* Add the message authentication code */
735
736         if(digest_active(n->outdigest)) {
737                 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
738                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
739                         goto end;
740                 }
741
742                 inpkt->len += digest_length(n->outdigest);
743         }
744
745         /* Send the packet */
746
747         const sockaddr_t *sa = NULL;
748         int sock;
749
750         if(n->status.send_locally)
751                 choose_local_address(n, &sa, &sock);
752         if(!sa)
753                 choose_udp_address(n, &sa, &sock);
754
755 #if defined(SOL_IP) && defined(IP_TOS)
756         if(priorityinheritance && origpriority != priority
757            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
758                 priority = origpriority;
759                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
760                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
761                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
762         }
763 #endif
764
765         if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
766                 if(sockmsgsize(sockerrno)) {
767                         if(n->maxmtu >= origlen)
768                                 n->maxmtu = origlen - 1;
769                         if(n->mtu >= origlen)
770                                 n->mtu = origlen - 1;
771                 } else
772                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
773         }
774
775 end:
776         origpkt->len = origlen;
777 }
778
779 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
780         node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
781         bool direct = from == myself && to == relay;
782         bool relay_supported = (relay->options >> 24) >= 4;
783         bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
784
785         /* We don't really need the relay's key, but we need to establish a UDP tunnel with it and discover its MTU. */
786         if (!direct && relay_supported && !tcponly)
787                 try_sptps(relay);
788
789         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
790            TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
791                  This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
792
793         if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
794                 char buf[len * 4 / 3 + 5];
795                 b64encode(data, buf, len);
796                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
797                    to ensure we get to learn the reflexive UDP address. */
798                 if(from == myself && !to->status.validkey) {
799                         to->incompression = myself->incompression;
800                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
801                 } else {
802                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
803                 }
804         }
805
806         size_t overhead = 0;
807         if(relay_supported) overhead += sizeof to->id + sizeof from->id;
808         char buf[len + overhead]; char* buf_ptr = buf;
809         if(relay_supported) {
810                 if(direct) {
811                         /* Inform the recipient that this packet was sent directly. */
812                         node_id_t nullid = {};
813                         memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
814                 } else {
815                         memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
816                 }
817                 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
818
819         }
820         /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
821         memcpy(buf_ptr, data, len); buf_ptr += len;
822
823         const sockaddr_t *sa = NULL;
824         int sock;
825         if(relay->status.send_locally)
826                 choose_local_address(relay, &sa, &sock);
827         if(!sa)
828                 choose_udp_address(relay, &sa, &sock);
829         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
830         if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
831                 if(sockmsgsize(sockerrno)) {
832                         // Compensate for SPTPS overhead
833                         len -= SPTPS_DATAGRAM_OVERHEAD;
834                         if(relay->maxmtu >= len)
835                                 relay->maxmtu = len - 1;
836                         if(relay->mtu >= len)
837                                 relay->mtu = len - 1;
838                 } else {
839                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
840                         return false;
841                 }
842         }
843
844         return true;
845 }
846
847 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
848         return send_sptps_data_priv(handle, myself, type, data, len);
849 }
850
851 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
852         node_t *from = handle;
853
854         if(type == SPTPS_HANDSHAKE) {
855                 if(!from->status.validkey) {
856                         from->status.validkey = true;
857                         from->status.waitingforkey = false;
858                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
859                 }
860                 return true;
861         }
862
863         if(len > MTU) {
864                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
865                 return false;
866         }
867
868         vpn_packet_t inpkt;
869         inpkt.offset = DEFAULT_PACKET_OFFSET;
870
871         if(type == PKT_PROBE) {
872                 inpkt.len = len;
873                 memcpy(DATA(&inpkt), data, len);
874                 mtu_probe_h(from, &inpkt, len);
875                 return true;
876         }
877
878         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
879                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
880                 return false;
881         }
882
883         /* Check if we have the headers we need */
884         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
885                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
886                 return false;
887         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
888                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
889         }
890
891         int offset = (type & PKT_MAC) ? 0 : 14;
892         if(type & PKT_COMPRESSED) {
893                 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
894                 if(ulen < 0) {
895                         return false;
896                 } else {
897                         inpkt.len = ulen + offset;
898                 }
899                 if(inpkt.len > MAXSIZE)
900                         abort();
901         } else {
902                 memcpy(DATA(&inpkt) + offset, data, len);
903                 inpkt.len = len + offset;
904         }
905
906         /* Generate the Ethernet packet type if necessary */
907         if(offset) {
908                 switch(DATA(&inpkt)[14] >> 4) {
909                         case 4:
910                                 DATA(&inpkt)[12] = 0x08;
911                                 DATA(&inpkt)[13] = 0x00;
912                                 break;
913                         case 6:
914                                 DATA(&inpkt)[12] = 0x86;
915                                 DATA(&inpkt)[13] = 0xDD;
916                                 break;
917                         default:
918                                 logger(DEBUG_TRAFFIC, LOG_ERR,
919                                                    "Unknown IP version %d while reading packet from %s (%s)",
920                                                    DATA(&inpkt)[14] >> 4, from->name, from->hostname);
921                                 return false;
922                 }
923         }
924
925         receive_packet(from, &inpkt);
926         return true;
927 }
928
929 /*
930   send a packet to the given vpn ip.
931 */
932 void send_packet(node_t *n, vpn_packet_t *packet) {
933         node_t *via;
934
935         if(n == myself) {
936                 if(overwrite_mac)
937                          memcpy(DATA(packet), mymac.x, ETH_ALEN);
938                 n->out_packets++;
939                 n->out_bytes += packet->len;
940                 devops.write(packet);
941                 return;
942         }
943
944         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
945                            packet->len, n->name, n->hostname);
946
947         if(!n->status.reachable) {
948                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
949                                    n->name, n->hostname);
950                 return;
951         }
952
953         n->out_packets++;
954         n->out_bytes += packet->len;
955
956         if(n->status.sptps) {
957                 send_sptps_packet(n, packet);
958                 return;
959         }
960
961         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
962
963         if(via != n)
964                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
965                            n->name, via->name, n->via->hostname);
966
967         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
968                 if(!send_tcppacket(via->connection, packet))
969                         terminate_connection(via->connection, true);
970         } else
971                 send_udppacket(via, packet);
972 }
973
974 /* Broadcast a packet using the minimum spanning tree */
975
976 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
977         // Always give ourself a copy of the packet.
978         if(from != myself)
979                 send_packet(myself, packet);
980
981         // In TunnelServer mode, do not forward broadcast packets.
982         // The MST might not be valid and create loops.
983         if(tunnelserver || broadcast_mode == BMODE_NONE)
984                 return;
985
986         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
987                            packet->len, from->name, from->hostname);
988
989         switch(broadcast_mode) {
990                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
991                 // This guarantees all nodes receive the broadcast packet, and
992                 // usually distributes the sending of broadcast packets over all nodes.
993                 case BMODE_MST:
994                         for list_each(connection_t, c, connection_list)
995                                 if(c->edge && c->status.mst && c != from->nexthop->connection)
996                                         send_packet(c->node, packet);
997                         break;
998
999                 // In direct mode, we send copies to each node we know of.
1000                 // However, this only reaches nodes that can be reached in a single hop.
1001                 // We don't have enough information to forward broadcast packets in this case.
1002                 case BMODE_DIRECT:
1003                         if(from != myself)
1004                                 break;
1005
1006                         for splay_each(node_t, n, node_tree)
1007                                 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1008                                         send_packet(n, packet);
1009                         break;
1010
1011                 default:
1012                         break;
1013         }
1014 }
1015
1016 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1017         node_t *n = NULL;
1018         bool hard = false;
1019         static time_t last_hard_try = 0;
1020
1021         for splay_each(edge_t, e, edge_weight_tree) {
1022                 if(!e->to->status.reachable || e->to == myself)
1023                         continue;
1024
1025                 if(sockaddrcmp_noport(from, &e->address)) {
1026                         if(last_hard_try == now.tv_sec)
1027                                 continue;
1028                         hard = true;
1029                 }
1030
1031                 if(!try_mac(e->to, pkt))
1032                         continue;
1033
1034                 n = e->to;
1035                 break;
1036         }
1037
1038         if(hard)
1039                 last_hard_try = now.tv_sec;
1040
1041         last_hard_try = now.tv_sec;
1042         return n;
1043 }
1044
1045 void handle_incoming_vpn_data(void *data, int flags) {
1046         listen_socket_t *ls = data;
1047         vpn_packet_t pkt;
1048         char *hostname;
1049         node_id_t nullid = {};
1050         sockaddr_t addr = {};
1051         socklen_t addrlen = sizeof addr;
1052         node_t *from, *to;
1053         bool direct = false;
1054
1055         pkt.offset = 0;
1056         int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1057
1058         if(len <= 0 || len > MAXSIZE) {
1059                 if(!sockwouldblock(sockerrno))
1060                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1061                 return;
1062         }
1063
1064         pkt.len = len;
1065
1066         sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1067
1068         // Try to figure out who sent this packet.
1069
1070         node_t *n = lookup_node_udp(&addr);
1071
1072         if(!n) {
1073                 // It might be from a 1.1 node, which might have a source ID in the packet.
1074                 pkt.offset = 2 * sizeof(node_id_t);
1075                 from = lookup_node_id(SRCID(&pkt));
1076                 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1077                         if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1078                                 n = from;
1079                         else
1080                                 goto skip_harder;
1081                 }
1082         }
1083
1084         if(!n) {
1085                 pkt.offset = 0;
1086                 n = try_harder(&addr, &pkt);
1087         }
1088
1089 skip_harder:
1090         if(!n) {
1091                 if(debug_level >= DEBUG_PROTOCOL) {
1092                         hostname = sockaddr2hostname(&addr);
1093                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1094                         free(hostname);
1095                 }
1096                 return;
1097         }
1098
1099         if(n->status.sptps) {
1100                 pkt.offset = 2 * sizeof(node_id_t);
1101
1102                 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1103                         direct = true;
1104                         from = n;
1105                         to = myself;
1106                 } else {
1107                         from = lookup_node_id(SRCID(&pkt));
1108                         to = lookup_node_id(DSTID(&pkt));
1109                 }
1110                 if(!from || !to) {
1111                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1112                         return;
1113                 }
1114
1115                 if(to != myself) {
1116                         send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1117                         return;
1118                 }
1119         } else {
1120                 direct = true;
1121                 from = n;
1122         }
1123
1124         pkt.offset = 0;
1125         if(!receive_udppacket(from, &pkt))
1126                 return;
1127
1128         n->sock = ls - listen_socket;
1129         if(direct && sockaddrcmp(&addr, &n->address))
1130                 update_node_udp(n, &addr);
1131 }
1132
1133 void handle_device_data(void *data, int flags) {
1134         vpn_packet_t packet;
1135         packet.offset = DEFAULT_PACKET_OFFSET;
1136         packet.priority = 0;
1137
1138         if(devops.read(&packet)) {
1139                 myself->in_packets++;
1140                 myself->in_bytes += packet.len;
1141                 route(myself, &packet);
1142         }
1143 }