Move try_sptps() closer to try_tx().
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "route.h"
46 #include "utils.h"
47 #include "xalloc.h"
48
49 #ifndef MAX
50 #define MAX(a, b) ((a) > (b) ? (a) : (b))
51 #endif
52
53 int keylifetime = 0;
54 #ifdef HAVE_LZO
55 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
56 #endif
57
58 static void send_udppacket(node_t *, vpn_packet_t *);
59
60 unsigned replaywin = 16;
61 bool localdiscovery = true;
62
63 #define MAX_SEQNO 1073741824
64
65 static void send_mtu_probe_packet(node_t *n, int len) {
66         vpn_packet_t packet;
67         packet.offset = DEFAULT_PACKET_OFFSET;
68         memset(DATA(&packet), 0, 14);
69         randomize(DATA(&packet) + 14, len - 14);
70         packet.len = len;
71         packet.priority = 0;
72
73         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
74
75         send_udppacket(n, &packet);
76 }
77
78 static void send_mtu_probe_handler(void *data) {
79         node_t *n = data;
80
81         if(!n->status.reachable || !n->status.validkey) {
82                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
83                 n->mtuprobes = 0;
84                 return;
85         }
86
87         /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
88            mtuprobes ==    31: sleep pinginterval seconds
89            mtuprobes ==    32: send 1 burst, sleep pingtimeout second
90            mtuprobes ==    33: no response from other side, restart PMTU discovery process */
91
92         n->mtuprobes++;
93         int timeout = 1;
94
95         if(n->mtuprobes > 32) {
96                 if(!n->minmtu) {
97                         n->mtuprobes = 31;
98                         timeout = pinginterval;
99                         goto end;
100                 }
101
102                 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
103                 n->status.udp_confirmed = false;
104                 n->mtuprobes = 1;
105                 n->minmtu = 0;
106                 n->maxmtu = MTU;
107         }
108
109         if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
110                 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
111                 n->mtuprobes = 31;
112         }
113
114         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
115                 if(n->minmtu > n->maxmtu)
116                         n->minmtu = n->maxmtu;
117                 else
118                         n->maxmtu = n->minmtu;
119                 n->mtu = n->minmtu;
120                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
121                 n->mtuprobes = 31;
122         }
123
124         if(n->mtuprobes == 31) {
125                 timeout = pinginterval;
126                 goto end;
127         } else if(n->mtuprobes == 32) {
128                 timeout = pingtimeout;
129         }
130
131         /* After the initial discovery, a fourth packet is added to each batch with a
132            size larger than the currently known PMTU, to test if the PMTU has increased. */
133         if (n->mtuprobes >= 30 && n->maxmtu + 8 < MTU)
134                 send_mtu_probe_packet(n, n->maxmtu + 8);
135
136         /* Probes are sent in batches of three, with random sizes between the
137            lower and upper boundaries for the MTU thus far discovered. */
138         for (int i = 0; i < 3; i++) {
139                 int len = n->maxmtu;
140                 if(n->minmtu < n->maxmtu)
141                         len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
142
143                 send_mtu_probe_packet(n, MAX(len, 64));
144         }
145
146         /* In case local discovery is enabled, another packet is added to each batch,
147            which will be broadcast to the local network. */
148         if(localdiscovery && n->mtuprobes <= 10 && n->prevedge) {
149                 n->status.send_locally = true;
150                 send_mtu_probe_packet(n, 16);
151                 n->status.send_locally = false;
152         }
153
154         n->probe_counter = 0;
155         gettimeofday(&n->probe_time, NULL);
156
157         /* Calculate the packet loss of incoming traffic by comparing the rate of
158            packets received to the rate with which the sequence number has increased.
159          */
160
161         if(n->received > n->prev_received)
162                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
163         else
164                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
165
166         n->prev_received_seqno = n->received_seqno;
167         n->prev_received = n->received;
168
169 end:
170         timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
171 }
172
173 void send_mtu_probe(node_t *n) {
174         timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
175         send_mtu_probe_handler(n);
176 }
177
178 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
179         if(!DATA(packet)[0]) {
180                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe request %d from %s (%s)", packet->len, n->name, n->hostname);
181
182                 /* It's a probe request, send back a reply */
183
184                 /* Type 2 probe replies were introduced in protocol 17.3 */
185                 if ((n->options >> 24) >= 3) {
186                         uint8_t *data = DATA(packet);
187                         *data++ = 2;
188                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
189                         struct timeval now;
190                         gettimeofday(&now, NULL);
191                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
192                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
193                         packet->len -= 10;
194                 } else {
195                         /* Legacy protocol: n won't understand type 2 probe replies. */
196                         DATA(packet)[0] = 1;
197                 }
198
199                 /* Temporarily set udp_confirmed, so that the reply is sent
200                    back exactly the way it came in. */
201
202                 bool udp_confirmed = n->status.udp_confirmed;
203                 n->status.udp_confirmed = true;
204                 send_udppacket(n, packet);
205                 n->status.udp_confirmed = udp_confirmed;
206         } else {
207                 length_t probelen = len;
208                 if (DATA(packet)[0] == 2) {
209                         if (len < 3)
210                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) MTU probe reply from %s (%s)", n->name, n->hostname);
211                         else {
212                                 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
213                         }
214                 }
215                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d MTU probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
216
217                 /* It's a valid reply: now we know bidirectional communication
218                    is possible using the address and socket that the reply
219                    packet used. */
220
221                 n->status.udp_confirmed = true;
222
223                 /* If we haven't established the PMTU yet, restart the discovery process. */
224
225                 if(n->mtuprobes > 30) {
226                         if (probelen == n->maxmtu + 8) {
227                                 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
228                                 n->maxmtu = MTU;
229                                 n->mtuprobes = 10;
230                                 return;
231                         }
232
233                         if(n->minmtu)
234                                 n->mtuprobes = 30;
235                         else
236                                 n->mtuprobes = 1;
237                 }
238
239                 /* If applicable, raise the minimum supported MTU */
240
241                 if(probelen > n->maxmtu)
242                         probelen = n->maxmtu;
243                 if(n->minmtu < probelen)
244                         n->minmtu = probelen;
245
246                 /* Calculate RTT and bandwidth.
247                    The RTT is the time between the MTU probe burst was sent and the first
248                    reply is received. The bandwidth is measured using the time between the
249                    arrival of the first and third probe reply (or type 2 probe requests).
250                  */
251
252                 struct timeval now, diff;
253                 gettimeofday(&now, NULL);
254                 timersub(&now, &n->probe_time, &diff);
255
256                 struct timeval probe_timestamp = now;
257                 if (DATA(packet)[0] == 2 && packet->len >= 11) {
258                         uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
259                         uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
260                         probe_timestamp.tv_sec = ntohl(sec);
261                         probe_timestamp.tv_usec = ntohl(usec);
262                 }
263                 
264                 n->probe_counter++;
265
266                 if(n->probe_counter == 1) {
267                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
268                         n->probe_time = probe_timestamp;
269                 } else if(n->probe_counter == 3) {
270                         struct timeval probe_timestamp_diff;
271                         timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
272                         n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
273                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
274                 }
275         }
276 }
277
278 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
279         if(level == 0) {
280                 memcpy(dest, source, len);
281                 return len;
282         } else if(level == 10) {
283 #ifdef HAVE_LZO
284                 lzo_uint lzolen = MAXSIZE;
285                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
286                 return lzolen;
287 #else
288                 return -1;
289 #endif
290         } else if(level < 10) {
291 #ifdef HAVE_ZLIB
292                 unsigned long destlen = MAXSIZE;
293                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
294                         return destlen;
295                 else
296 #endif
297                         return -1;
298         } else {
299 #ifdef HAVE_LZO
300                 lzo_uint lzolen = MAXSIZE;
301                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
302                 return lzolen;
303 #else
304                 return -1;
305 #endif
306         }
307
308         return -1;
309 }
310
311 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
312         if(level == 0) {
313                 memcpy(dest, source, len);
314                 return len;
315         } else if(level > 9) {
316 #ifdef HAVE_LZO
317                 lzo_uint lzolen = MAXSIZE;
318                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
319                         return lzolen;
320                 else
321 #endif
322                         return -1;
323         }
324 #ifdef HAVE_ZLIB
325         else {
326                 unsigned long destlen = MAXSIZE;
327                 if(uncompress(dest, &destlen, source, len) == Z_OK)
328                         return destlen;
329                 else
330                         return -1;
331         }
332 #endif
333
334         return -1;
335 }
336
337 /* VPN packet I/O */
338
339 static void receive_packet(node_t *n, vpn_packet_t *packet) {
340         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
341                            packet->len, n->name, n->hostname);
342
343         n->in_packets++;
344         n->in_bytes += packet->len;
345
346         route(n, packet);
347 }
348
349 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
350         if(n->status.sptps)
351                 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
352
353 #ifdef DISABLE_LEGACY
354         return false;
355 #else
356         if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
357                 return false;
358
359         return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
360 #endif
361 }
362
363 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
364         vpn_packet_t pkt1, pkt2;
365         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
366         int nextpkt = 0;
367         size_t outlen;
368         pkt1.offset = DEFAULT_PACKET_OFFSET;
369         pkt2.offset = DEFAULT_PACKET_OFFSET;
370
371         if(n->status.sptps) {
372                 if(!n->sptps.state) {
373                         if(!n->status.waitingforkey) {
374                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
375                                 send_req_key(n);
376                         } else {
377                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
378                         }
379                         return false;
380                 }
381                 inpkt->offset += 2 * sizeof(node_id_t);
382                 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
383                         logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
384                         return false;
385                 }
386                 return true;
387         }
388
389 #ifdef DISABLE_LEGACY
390         return false;
391 #else
392         if(!n->status.validkey) {
393                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
394                 return false;
395         }
396
397         /* Check packet length */
398
399         if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
400                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
401                                         n->name, n->hostname);
402                 return false;
403         }
404
405         /* It's a legacy UDP packet, the data starts after the seqno */
406
407         inpkt->offset += sizeof(seqno_t);
408
409         /* Check the message authentication code */
410
411         if(digest_active(n->indigest)) {
412                 inpkt->len -= digest_length(n->indigest);
413                 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
414                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
415                         return false;
416                 }
417         }
418         /* Decrypt the packet */
419
420         if(cipher_active(n->incipher)) {
421                 vpn_packet_t *outpkt = pkt[nextpkt++];
422                 outlen = MAXSIZE;
423
424                 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
425                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
426                         return false;
427                 }
428
429                 outpkt->len = outlen;
430                 inpkt = outpkt;
431         }
432
433         /* Check the sequence number */
434
435         seqno_t seqno;
436         memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
437         seqno = ntohl(seqno);
438         inpkt->len -= sizeof seqno;
439
440         if(replaywin) {
441                 if(seqno != n->received_seqno + 1) {
442                         if(seqno >= n->received_seqno + replaywin * 8) {
443                                 if(n->farfuture++ < replaywin >> 2) {
444                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
445                                                 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
446                                         return false;
447                                 }
448                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
449                                                 seqno - n->received_seqno - 1, n->name, n->hostname);
450                                 memset(n->late, 0, replaywin);
451                         } else if (seqno <= n->received_seqno) {
452                                 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
453                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
454                                                 n->name, n->hostname, seqno, n->received_seqno);
455                                         return false;
456                                 }
457                         } else {
458                                 for(int i = n->received_seqno + 1; i < seqno; i++)
459                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
460                         }
461                 }
462
463                 n->farfuture = 0;
464                 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
465         }
466
467         if(seqno > n->received_seqno)
468                 n->received_seqno = seqno;
469
470         n->received++;
471
472         if(n->received_seqno > MAX_SEQNO)
473                 regenerate_key();
474
475         /* Decompress the packet */
476
477         length_t origlen = inpkt->len;
478
479         if(n->incompression) {
480                 vpn_packet_t *outpkt = pkt[nextpkt++];
481
482                 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
483                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
484                                                  n->name, n->hostname);
485                         return false;
486                 }
487
488                 inpkt = outpkt;
489
490                 origlen -= MTU/64 + 20;
491         }
492
493         inpkt->priority = 0;
494
495         if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
496                 mtu_probe_h(n, inpkt, origlen);
497         else
498                 receive_packet(n, inpkt);
499         return true;
500 #endif
501 }
502
503 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
504         vpn_packet_t outpkt;
505         outpkt.offset = DEFAULT_PACKET_OFFSET;
506
507         if(len > sizeof outpkt.data - outpkt.offset)
508                 return;
509
510         outpkt.len = len;
511         if(c->options & OPTION_TCPONLY)
512                 outpkt.priority = 0;
513         else
514                 outpkt.priority = -1;
515         memcpy(DATA(&outpkt), buffer, len);
516
517         receive_packet(c->node, &outpkt);
518 }
519
520 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
521         if(!n->status.validkey && !n->connection)
522                 return;
523
524         uint8_t type = 0;
525         int offset = 0;
526
527         if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
528                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
529                 return;
530         }
531
532         if(routing_mode == RMODE_ROUTER)
533                 offset = 14;
534         else
535                 type = PKT_MAC;
536
537         if(origpkt->len < offset)
538                 return;
539
540         vpn_packet_t outpkt;
541
542         if(n->outcompression) {
543                 outpkt.offset = 0;
544                 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
545                 if(len < 0) {
546                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
547                 } else if(len < origpkt->len - offset) {
548                         outpkt.len = len + offset;
549                         origpkt = &outpkt;
550                         type |= PKT_COMPRESSED;
551                 }
552         }
553
554         /* If we have a direct metaconnection to n, and we can't use UDP, then
555            don't bother with SPTPS and just use a "plaintext" PACKET message.
556            We don't really care about end-to-end security since we're not
557            sending the message through any intermediate nodes. */
558         if(n->connection && origpkt->len > n->minmtu)
559                 send_tcppacket(n->connection, origpkt);
560         else
561                 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
562         return;
563 }
564
565 static void adapt_socket(const sockaddr_t *sa, int *sock) {
566         /* Make sure we have a suitable socket for the chosen address */
567         if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
568                 for(int i = 0; i < listen_sockets; i++) {
569                         if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
570                                 *sock = i;
571                                 break;
572                         }
573                 }
574         }
575 }
576
577 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
578         /* Latest guess */
579         *sa = &n->address;
580         *sock = n->sock;
581
582         /* If the UDP address is confirmed, use it. */
583         if(n->status.udp_confirmed)
584                 return;
585
586         /* Send every third packet to n->address; that could be set
587            to the node's reflexive UDP address discovered during key
588            exchange. */
589
590         static int x = 0;
591         if(++x >= 3) {
592                 x = 0;
593                 return;
594         }
595
596         /* Otherwise, address are found in edges to this node.
597            So we pick a random edge and a random socket. */
598
599         int i = 0;
600         int j = rand() % n->edge_tree->count;
601         edge_t *candidate = NULL;
602
603         for splay_each(edge_t, e, n->edge_tree) {
604                 if(i++ == j) {
605                         candidate = e->reverse;
606                         break;
607                 }
608         }
609
610         if(candidate) {
611                 *sa = &candidate->address;
612                 *sock = rand() % listen_sockets;
613         }
614
615         adapt_socket(*sa, sock);
616 }
617
618 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
619         *sa = NULL;
620
621         /* Pick one of the edges from this node at random, then use its local address. */
622
623         int i = 0;
624         int j = rand() % n->edge_tree->count;
625         edge_t *candidate = NULL;
626
627         for splay_each(edge_t, e, n->edge_tree) {
628                 if(i++ == j) {
629                         candidate = e;
630                         break;
631                 }
632         }
633
634         if (candidate && candidate->local_address.sa.sa_family) {
635                 *sa = &candidate->local_address;
636                 *sock = rand() % listen_sockets;
637                 adapt_socket(*sa, sock);
638         }
639 }
640
641 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
642         vpn_packet_t pkt1, pkt2;
643         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
644         vpn_packet_t *inpkt = origpkt;
645         int nextpkt = 0;
646         vpn_packet_t *outpkt;
647         int origlen = origpkt->len;
648         size_t outlen;
649 #if defined(SOL_IP) && defined(IP_TOS)
650         static int priority = 0;
651         int origpriority = origpkt->priority;
652 #endif
653
654         pkt1.offset = DEFAULT_PACKET_OFFSET;
655         pkt2.offset = DEFAULT_PACKET_OFFSET;
656
657         if(!n->status.reachable) {
658                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
659                 return;
660         }
661
662         if(n->status.sptps)
663                 return send_sptps_packet(n, origpkt);
664
665 #ifdef DISABLE_LEGACY
666         return;
667 #else
668         /* Make sure we have a valid key */
669
670         if(!n->status.validkey) {
671                 logger(DEBUG_TRAFFIC, LOG_INFO,
672                                    "No valid key known yet for %s (%s), forwarding via TCP",
673                                    n->name, n->hostname);
674                 send_tcppacket(n->nexthop->connection, origpkt);
675                 return;
676         }
677
678         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
679                 logger(DEBUG_TRAFFIC, LOG_INFO,
680                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
681                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
682
683                 if(n != n->nexthop)
684                         send_packet(n->nexthop, origpkt);
685                 else
686                         send_tcppacket(n->nexthop->connection, origpkt);
687
688                 return;
689         }
690
691         /* Compress the packet */
692
693         if(n->outcompression) {
694                 outpkt = pkt[nextpkt++];
695
696                 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
697                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
698                                    n->name, n->hostname);
699                         return;
700                 }
701
702                 inpkt = outpkt;
703         }
704
705         /* Add sequence number */
706
707         seqno_t seqno = htonl(++(n->sent_seqno));
708         memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
709         inpkt->len += sizeof seqno;
710
711         /* Encrypt the packet */
712
713         if(cipher_active(n->outcipher)) {
714                 outpkt = pkt[nextpkt++];
715                 outlen = MAXSIZE;
716
717                 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
718                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
719                         goto end;
720                 }
721
722                 outpkt->len = outlen;
723                 inpkt = outpkt;
724         }
725
726         /* Add the message authentication code */
727
728         if(digest_active(n->outdigest)) {
729                 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
730                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
731                         goto end;
732                 }
733
734                 inpkt->len += digest_length(n->outdigest);
735         }
736
737         /* Send the packet */
738
739         const sockaddr_t *sa = NULL;
740         int sock;
741
742         if(n->status.send_locally)
743                 choose_local_address(n, &sa, &sock);
744         if(!sa)
745                 choose_udp_address(n, &sa, &sock);
746
747 #if defined(SOL_IP) && defined(IP_TOS)
748         if(priorityinheritance && origpriority != priority
749            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
750                 priority = origpriority;
751                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
752                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
753                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
754         }
755 #endif
756
757         if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
758                 if(sockmsgsize(sockerrno)) {
759                         if(n->maxmtu >= origlen)
760                                 n->maxmtu = origlen - 1;
761                         if(n->mtu >= origlen)
762                                 n->mtu = origlen - 1;
763                 } else
764                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
765         }
766
767 end:
768         origpkt->len = origlen;
769 #endif
770 }
771
772 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
773         node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
774         bool direct = from == myself && to == relay;
775         bool relay_supported = (relay->options >> 24) >= 4;
776         bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
777
778         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
779            TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
780                  This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
781
782         if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
783                 char buf[len * 4 / 3 + 5];
784                 b64encode(data, buf, len);
785                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
786                    to ensure we get to learn the reflexive UDP address. */
787                 if(from == myself && !to->status.validkey) {
788                         to->incompression = myself->incompression;
789                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
790                 } else {
791                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
792                 }
793         }
794
795         size_t overhead = 0;
796         if(relay_supported) overhead += sizeof to->id + sizeof from->id;
797         char buf[len + overhead]; char* buf_ptr = buf;
798         if(relay_supported) {
799                 if(direct) {
800                         /* Inform the recipient that this packet was sent directly. */
801                         node_id_t nullid = {};
802                         memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
803                 } else {
804                         memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
805                 }
806                 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
807
808         }
809         /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
810         memcpy(buf_ptr, data, len); buf_ptr += len;
811
812         const sockaddr_t *sa = NULL;
813         int sock;
814         if(relay->status.send_locally)
815                 choose_local_address(relay, &sa, &sock);
816         if(!sa)
817                 choose_udp_address(relay, &sa, &sock);
818         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
819         if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
820                 if(sockmsgsize(sockerrno)) {
821                         // Compensate for SPTPS overhead
822                         len -= SPTPS_DATAGRAM_OVERHEAD;
823                         if(relay->maxmtu >= len)
824                                 relay->maxmtu = len - 1;
825                         if(relay->mtu >= len)
826                                 relay->mtu = len - 1;
827                 } else {
828                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
829                         return false;
830                 }
831         }
832
833         return true;
834 }
835
836 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
837         return send_sptps_data_priv(handle, myself, type, data, len);
838 }
839
840 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
841         node_t *from = handle;
842
843         if(type == SPTPS_HANDSHAKE) {
844                 if(!from->status.validkey) {
845                         from->status.validkey = true;
846                         from->status.waitingforkey = false;
847                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
848                 }
849                 return true;
850         }
851
852         if(len > MTU) {
853                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
854                 return false;
855         }
856
857         vpn_packet_t inpkt;
858         inpkt.offset = DEFAULT_PACKET_OFFSET;
859
860         if(type == PKT_PROBE) {
861                 inpkt.len = len;
862                 memcpy(DATA(&inpkt), data, len);
863                 mtu_probe_h(from, &inpkt, len);
864                 return true;
865         }
866
867         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
868                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
869                 return false;
870         }
871
872         /* Check if we have the headers we need */
873         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
874                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
875                 return false;
876         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
877                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
878         }
879
880         int offset = (type & PKT_MAC) ? 0 : 14;
881         if(type & PKT_COMPRESSED) {
882                 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
883                 if(ulen < 0) {
884                         return false;
885                 } else {
886                         inpkt.len = ulen + offset;
887                 }
888                 if(inpkt.len > MAXSIZE)
889                         abort();
890         } else {
891                 memcpy(DATA(&inpkt) + offset, data, len);
892                 inpkt.len = len + offset;
893         }
894
895         /* Generate the Ethernet packet type if necessary */
896         if(offset) {
897                 switch(DATA(&inpkt)[14] >> 4) {
898                         case 4:
899                                 DATA(&inpkt)[12] = 0x08;
900                                 DATA(&inpkt)[13] = 0x00;
901                                 break;
902                         case 6:
903                                 DATA(&inpkt)[12] = 0x86;
904                                 DATA(&inpkt)[13] = 0xDD;
905                                 break;
906                         default:
907                                 logger(DEBUG_TRAFFIC, LOG_ERR,
908                                                    "Unknown IP version %d while reading packet from %s (%s)",
909                                                    DATA(&inpkt)[14] >> 4, from->name, from->hostname);
910                                 return false;
911                 }
912         }
913
914         receive_packet(from, &inpkt);
915         return true;
916 }
917
918 // This function tries to get SPTPS keys, if they aren't already known.
919 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
920 static void try_sptps(node_t *n) {
921         if(n->status.validkey)
922                 return;
923
924         logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
925
926         if(!n->status.waitingforkey)
927                 send_req_key(n);
928         else if(n->last_req_key + 10 < now.tv_sec) {
929                 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
930                 sptps_stop(&n->sptps);
931                 n->status.waitingforkey = false;
932                 send_req_key(n);
933         }
934
935         return;
936 }
937
938 // This function tries to establish a tunnel to a node (or its relay) so that packets can be sent (e.g. get SPTPS keys).
939 // If a tunnel is already established, it tries to improve it (e.g. by trying to establish a UDP tunnel instead of TCP).
940 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if TCP and/or UDP is usable.
941 // By calling this function repeatedly, the tunnel is gradually improved until we hit the wall imposed by the underlying network environment.
942 // It is recommended to call this function every time a packet is sent (or intended to be sent) to a node,
943 // so that the tunnel keeps improving as packets flow, and then gracefully downgrades itself as it goes idle.
944 static void try_tx(node_t *n) {
945         /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
946            messages anyway, so there's no need for SPTPS at all. Otherwise, get the keys. */
947         if(n->status.sptps && !(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))) {
948                 try_sptps(n);
949                 if (!n->status.validkey)
950                         return;
951         }
952
953         node_t *via = (n->via == myself) ? n->nexthop : n->via;
954         
955         if((myself->options | via->options) & OPTION_TCPONLY)
956                 return;
957
958         if(!n->status.sptps && !via->status.validkey && via->last_req_key + 10 <= now.tv_sec) {
959                 send_req_key(via);
960                 via->last_req_key = now.tv_sec;
961         }
962
963         /* If we don't know how to reach "via" yet, then try to reach it through a relay. */
964         if(n->status.sptps && !via->status.udp_confirmed && via->nexthop != via && (via->nexthop->options >> 24) >= 4)
965                 try_tx(via->nexthop);
966 }
967
968 /*
969   send a packet to the given vpn ip.
970 */
971 void send_packet(node_t *n, vpn_packet_t *packet) {
972         node_t *via;
973
974         if(n == myself) {
975                 if(overwrite_mac)
976                          memcpy(DATA(packet), mymac.x, ETH_ALEN);
977                 n->out_packets++;
978                 n->out_bytes += packet->len;
979                 devops.write(packet);
980                 return;
981         }
982
983         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
984                            packet->len, n->name, n->hostname);
985
986         if(!n->status.reachable) {
987                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
988                                    n->name, n->hostname);
989                 return;
990         }
991
992         n->out_packets++;
993         n->out_bytes += packet->len;
994
995         if(n->status.sptps) {
996                 send_sptps_packet(n, packet);
997                 goto end;
998         }
999
1000         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
1001
1002         if(via != n)
1003                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
1004                            n->name, via->name, n->via->hostname);
1005
1006         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
1007                 if(!send_tcppacket(via->connection, packet))
1008                         terminate_connection(via->connection, true);
1009         } else
1010                 send_udppacket(via, packet);
1011
1012 end:
1013         /* Try to improve the tunnel.
1014            Note that we do this *after* we send the packet because sending actual packets take priority
1015            with regard to the send buffer space and latency. */
1016         try_tx(n);
1017 }
1018
1019 /* Broadcast a packet using the minimum spanning tree */
1020
1021 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1022         // Always give ourself a copy of the packet.
1023         if(from != myself)
1024                 send_packet(myself, packet);
1025
1026         // In TunnelServer mode, do not forward broadcast packets.
1027         // The MST might not be valid and create loops.
1028         if(tunnelserver || broadcast_mode == BMODE_NONE)
1029                 return;
1030
1031         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1032                            packet->len, from->name, from->hostname);
1033
1034         switch(broadcast_mode) {
1035                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1036                 // This guarantees all nodes receive the broadcast packet, and
1037                 // usually distributes the sending of broadcast packets over all nodes.
1038                 case BMODE_MST:
1039                         for list_each(connection_t, c, connection_list)
1040                                 if(c->edge && c->status.mst && c != from->nexthop->connection)
1041                                         send_packet(c->node, packet);
1042                         break;
1043
1044                 // In direct mode, we send copies to each node we know of.
1045                 // However, this only reaches nodes that can be reached in a single hop.
1046                 // We don't have enough information to forward broadcast packets in this case.
1047                 case BMODE_DIRECT:
1048                         if(from != myself)
1049                                 break;
1050
1051                         for splay_each(node_t, n, node_tree)
1052                                 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1053                                         send_packet(n, packet);
1054                         break;
1055
1056                 default:
1057                         break;
1058         }
1059 }
1060
1061 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1062         node_t *n = NULL;
1063         bool hard = false;
1064         static time_t last_hard_try = 0;
1065
1066         for splay_each(edge_t, e, edge_weight_tree) {
1067                 if(!e->to->status.reachable || e->to == myself)
1068                         continue;
1069
1070                 if(sockaddrcmp_noport(from, &e->address)) {
1071                         if(last_hard_try == now.tv_sec)
1072                                 continue;
1073                         hard = true;
1074                 }
1075
1076                 if(!try_mac(e->to, pkt))
1077                         continue;
1078
1079                 n = e->to;
1080                 break;
1081         }
1082
1083         if(hard)
1084                 last_hard_try = now.tv_sec;
1085
1086         last_hard_try = now.tv_sec;
1087         return n;
1088 }
1089
1090 void handle_incoming_vpn_data(void *data, int flags) {
1091         listen_socket_t *ls = data;
1092         vpn_packet_t pkt;
1093         char *hostname;
1094         node_id_t nullid = {};
1095         sockaddr_t addr = {};
1096         socklen_t addrlen = sizeof addr;
1097         node_t *from, *to;
1098         bool direct = false;
1099
1100         pkt.offset = 0;
1101         int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1102
1103         if(len <= 0 || len > MAXSIZE) {
1104                 if(!sockwouldblock(sockerrno))
1105                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1106                 return;
1107         }
1108
1109         pkt.len = len;
1110
1111         sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1112
1113         // Try to figure out who sent this packet.
1114
1115         node_t *n = lookup_node_udp(&addr);
1116
1117         if(!n) {
1118                 // It might be from a 1.1 node, which might have a source ID in the packet.
1119                 pkt.offset = 2 * sizeof(node_id_t);
1120                 from = lookup_node_id(SRCID(&pkt));
1121                 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1122                         if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1123                                 n = from;
1124                         else
1125                                 goto skip_harder;
1126                 }
1127         }
1128
1129         if(!n) {
1130                 pkt.offset = 0;
1131                 n = try_harder(&addr, &pkt);
1132         }
1133
1134 skip_harder:
1135         if(!n) {
1136                 if(debug_level >= DEBUG_PROTOCOL) {
1137                         hostname = sockaddr2hostname(&addr);
1138                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1139                         free(hostname);
1140                 }
1141                 return;
1142         }
1143
1144         if(n->status.sptps) {
1145                 pkt.offset = 2 * sizeof(node_id_t);
1146
1147                 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1148                         direct = true;
1149                         from = n;
1150                         to = myself;
1151                 } else {
1152                         from = lookup_node_id(SRCID(&pkt));
1153                         to = lookup_node_id(DSTID(&pkt));
1154                 }
1155                 if(!from || !to) {
1156                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1157                         return;
1158                 }
1159
1160                 if(to != myself) {
1161                         send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1162                         return;
1163                 }
1164         } else {
1165                 direct = true;
1166                 from = n;
1167         }
1168
1169         pkt.offset = 0;
1170         if(!receive_udppacket(from, &pkt))
1171                 return;
1172
1173         n->sock = ls - listen_socket;
1174         if(direct && sockaddrcmp(&addr, &n->address))
1175                 update_node_udp(n, &addr);
1176 }
1177
1178 void handle_device_data(void *data, int flags) {
1179         vpn_packet_t packet;
1180         packet.offset = DEFAULT_PACKET_OFFSET;
1181         packet.priority = 0;
1182
1183         if(devops.read(&packet)) {
1184                 myself->in_packets++;
1185                 myself->in_bytes += packet.len;
1186                 route(myself, &packet);
1187         }
1188 }