Clarify the send_mtu_probe() function.
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "route.h"
46 #include "utils.h"
47 #include "xalloc.h"
48
49 #ifndef MAX
50 #define MAX(a, b) ((a) > (b) ? (a) : (b))
51 #endif
52
53 int keylifetime = 0;
54 #ifdef HAVE_LZO
55 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
56 #endif
57
58 static void send_udppacket(node_t *, vpn_packet_t *);
59
60 unsigned replaywin = 16;
61 bool localdiscovery = true;
62
63 #define MAX_SEQNO 1073741824
64
65 static void send_mtu_probe_packet(node_t *n, int len) {
66         vpn_packet_t packet;
67         packet.offset = DEFAULT_PACKET_OFFSET;
68         memset(DATA(&packet), 0, 14);
69         randomize(DATA(&packet) + 14, len - 14);
70         packet.len = len;
71         packet.priority = 0;
72
73         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
74
75         send_udppacket(n, &packet);
76 }
77
78 static void send_mtu_probe_handler(void *data) {
79         node_t *n = data;
80
81         if(!n->status.reachable || !n->status.validkey) {
82                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
83                 n->mtuprobes = 0;
84                 return;
85         }
86
87         /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
88            mtuprobes ==    31: sleep pinginterval seconds
89            mtuprobes ==    32: send 1 burst, sleep pingtimeout second
90            mtuprobes ==    33: no response from other side, restart PMTU discovery process */
91
92         n->mtuprobes++;
93         int timeout = 1;
94
95         if(n->mtuprobes > 32) {
96                 if(!n->minmtu) {
97                         n->mtuprobes = 31;
98                         timeout = pinginterval;
99                         goto end;
100                 }
101
102                 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
103                 n->status.udp_confirmed = false;
104                 n->mtuprobes = 1;
105                 n->minmtu = 0;
106                 n->maxmtu = MTU;
107         }
108
109         if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
110                 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
111                 n->mtuprobes = 31;
112         }
113
114         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
115                 if(n->minmtu > n->maxmtu)
116                         n->minmtu = n->maxmtu;
117                 else
118                         n->maxmtu = n->minmtu;
119                 n->mtu = n->minmtu;
120                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
121                 n->mtuprobes = 31;
122         }
123
124         if(n->mtuprobes == 31) {
125                 timeout = pinginterval;
126                 goto end;
127         } else if(n->mtuprobes == 32) {
128                 timeout = pingtimeout;
129         }
130
131         /* After the initial discovery, a fourth packet is added to each batch with a
132            size larger than the currently known PMTU, to test if the PMTU has increased. */
133         if (n->mtuprobes >= 30 && n->maxmtu + 8 < MTU)
134                 send_mtu_probe_packet(n, n->maxmtu + 8);
135
136         /* Probes are sent in batches of three, with random sizes between the
137            lower and upper boundaries for the MTU thus far discovered. */
138         for (int i = 0; i < 3; i++) {
139                 int len = n->maxmtu;
140                 if(n->minmtu < n->maxmtu)
141                         len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
142
143                 send_mtu_probe_packet(n, MAX(len, 64));
144         }
145
146         /* In case local discovery is enabled, another packet is added to each batch,
147            which will be broadcast to the local network. */
148         if(localdiscovery && n->mtuprobes <= 10 && n->prevedge) {
149                 n->status.send_locally = true;
150                 send_mtu_probe_packet(n, 16);
151                 n->status.send_locally = false;
152         }
153
154         n->probe_counter = 0;
155         gettimeofday(&n->probe_time, NULL);
156
157         /* Calculate the packet loss of incoming traffic by comparing the rate of
158            packets received to the rate with which the sequence number has increased.
159          */
160
161         if(n->received > n->prev_received)
162                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
163         else
164                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
165
166         n->prev_received_seqno = n->received_seqno;
167         n->prev_received = n->received;
168
169 end:
170         timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
171 }
172
173 void send_mtu_probe(node_t *n) {
174         timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
175         send_mtu_probe_handler(n);
176 }
177
178 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
179         if(!DATA(packet)[0]) {
180                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe request %d from %s (%s)", packet->len, n->name, n->hostname);
181
182                 /* It's a probe request, send back a reply */
183
184                 /* Type 2 probe replies were introduced in protocol 17.3 */
185                 if ((n->options >> 24) >= 3) {
186                         uint8_t *data = DATA(packet);
187                         *data++ = 2;
188                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
189                         struct timeval now;
190                         gettimeofday(&now, NULL);
191                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
192                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
193                         packet->len -= 10;
194                 } else {
195                         /* Legacy protocol: n won't understand type 2 probe replies. */
196                         DATA(packet)[0] = 1;
197                 }
198
199                 /* Temporarily set udp_confirmed, so that the reply is sent
200                    back exactly the way it came in. */
201
202                 bool udp_confirmed = n->status.udp_confirmed;
203                 n->status.udp_confirmed = true;
204                 send_udppacket(n, packet);
205                 n->status.udp_confirmed = udp_confirmed;
206         } else {
207                 length_t probelen = len;
208                 if (DATA(packet)[0] == 2) {
209                         if (len < 3)
210                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) MTU probe reply from %s (%s)", n->name, n->hostname);
211                         else {
212                                 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
213                         }
214                 }
215                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d MTU probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
216
217                 /* It's a valid reply: now we know bidirectional communication
218                    is possible using the address and socket that the reply
219                    packet used. */
220
221                 n->status.udp_confirmed = true;
222
223                 /* If we haven't established the PMTU yet, restart the discovery process. */
224
225                 if(n->mtuprobes > 30) {
226                         if (probelen == n->maxmtu + 8) {
227                                 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
228                                 n->maxmtu = MTU;
229                                 n->mtuprobes = 10;
230                                 return;
231                         }
232
233                         if(n->minmtu)
234                                 n->mtuprobes = 30;
235                         else
236                                 n->mtuprobes = 1;
237                 }
238
239                 /* If applicable, raise the minimum supported MTU */
240
241                 if(probelen > n->maxmtu)
242                         probelen = n->maxmtu;
243                 if(n->minmtu < probelen)
244                         n->minmtu = probelen;
245
246                 /* Calculate RTT and bandwidth.
247                    The RTT is the time between the MTU probe burst was sent and the first
248                    reply is received. The bandwidth is measured using the time between the
249                    arrival of the first and third probe reply (or type 2 probe requests).
250                  */
251
252                 struct timeval now, diff;
253                 gettimeofday(&now, NULL);
254                 timersub(&now, &n->probe_time, &diff);
255
256                 struct timeval probe_timestamp = now;
257                 if (DATA(packet)[0] == 2 && packet->len >= 11) {
258                         uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
259                         uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
260                         probe_timestamp.tv_sec = ntohl(sec);
261                         probe_timestamp.tv_usec = ntohl(usec);
262                 }
263                 
264                 n->probe_counter++;
265
266                 if(n->probe_counter == 1) {
267                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
268                         n->probe_time = probe_timestamp;
269                 } else if(n->probe_counter == 3) {
270                         struct timeval probe_timestamp_diff;
271                         timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
272                         n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
273                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
274                 }
275         }
276 }
277
278 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
279         if(level == 0) {
280                 memcpy(dest, source, len);
281                 return len;
282         } else if(level == 10) {
283 #ifdef HAVE_LZO
284                 lzo_uint lzolen = MAXSIZE;
285                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
286                 return lzolen;
287 #else
288                 return -1;
289 #endif
290         } else if(level < 10) {
291 #ifdef HAVE_ZLIB
292                 unsigned long destlen = MAXSIZE;
293                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
294                         return destlen;
295                 else
296 #endif
297                         return -1;
298         } else {
299 #ifdef HAVE_LZO
300                 lzo_uint lzolen = MAXSIZE;
301                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
302                 return lzolen;
303 #else
304                 return -1;
305 #endif
306         }
307
308         return -1;
309 }
310
311 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
312         if(level == 0) {
313                 memcpy(dest, source, len);
314                 return len;
315         } else if(level > 9) {
316 #ifdef HAVE_LZO
317                 lzo_uint lzolen = MAXSIZE;
318                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
319                         return lzolen;
320                 else
321 #endif
322                         return -1;
323         }
324 #ifdef HAVE_ZLIB
325         else {
326                 unsigned long destlen = MAXSIZE;
327                 if(uncompress(dest, &destlen, source, len) == Z_OK)
328                         return destlen;
329                 else
330                         return -1;
331         }
332 #endif
333
334         return -1;
335 }
336
337 /* VPN packet I/O */
338
339 static void receive_packet(node_t *n, vpn_packet_t *packet) {
340         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
341                            packet->len, n->name, n->hostname);
342
343         n->in_packets++;
344         n->in_bytes += packet->len;
345
346         route(n, packet);
347 }
348
349 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
350         if(n->status.sptps)
351                 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
352
353 #ifdef DISABLE_LEGACY
354         return false;
355 #else
356         if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
357                 return false;
358
359         return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
360 #endif
361 }
362
363 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
364         vpn_packet_t pkt1, pkt2;
365         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
366         int nextpkt = 0;
367         size_t outlen;
368         pkt1.offset = DEFAULT_PACKET_OFFSET;
369         pkt2.offset = DEFAULT_PACKET_OFFSET;
370
371         if(n->status.sptps) {
372                 if(!n->sptps.state) {
373                         if(!n->status.waitingforkey) {
374                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
375                                 send_req_key(n);
376                         } else {
377                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
378                         }
379                         return false;
380                 }
381                 inpkt->offset += 2 * sizeof(node_id_t);
382                 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
383                         logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
384                         return false;
385                 }
386                 return true;
387         }
388
389 #ifdef DISABLE_LEGACY
390         return false;
391 #else
392         if(!n->status.validkey) {
393                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
394                 return false;
395         }
396
397         /* Check packet length */
398
399         if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
400                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
401                                         n->name, n->hostname);
402                 return false;
403         }
404
405         /* It's a legacy UDP packet, the data starts after the seqno */
406
407         inpkt->offset += sizeof(seqno_t);
408
409         /* Check the message authentication code */
410
411         if(digest_active(n->indigest)) {
412                 inpkt->len -= digest_length(n->indigest);
413                 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
414                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
415                         return false;
416                 }
417         }
418         /* Decrypt the packet */
419
420         if(cipher_active(n->incipher)) {
421                 vpn_packet_t *outpkt = pkt[nextpkt++];
422                 outlen = MAXSIZE;
423
424                 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
425                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
426                         return false;
427                 }
428
429                 outpkt->len = outlen;
430                 inpkt = outpkt;
431         }
432
433         /* Check the sequence number */
434
435         seqno_t seqno;
436         memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
437         seqno = ntohl(seqno);
438         inpkt->len -= sizeof seqno;
439
440         if(replaywin) {
441                 if(seqno != n->received_seqno + 1) {
442                         if(seqno >= n->received_seqno + replaywin * 8) {
443                                 if(n->farfuture++ < replaywin >> 2) {
444                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
445                                                 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
446                                         return false;
447                                 }
448                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
449                                                 seqno - n->received_seqno - 1, n->name, n->hostname);
450                                 memset(n->late, 0, replaywin);
451                         } else if (seqno <= n->received_seqno) {
452                                 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
453                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
454                                                 n->name, n->hostname, seqno, n->received_seqno);
455                                         return false;
456                                 }
457                         } else {
458                                 for(int i = n->received_seqno + 1; i < seqno; i++)
459                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
460                         }
461                 }
462
463                 n->farfuture = 0;
464                 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
465         }
466
467         if(seqno > n->received_seqno)
468                 n->received_seqno = seqno;
469
470         n->received++;
471
472         if(n->received_seqno > MAX_SEQNO)
473                 regenerate_key();
474
475         /* Decompress the packet */
476
477         length_t origlen = inpkt->len;
478
479         if(n->incompression) {
480                 vpn_packet_t *outpkt = pkt[nextpkt++];
481
482                 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
483                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
484                                                  n->name, n->hostname);
485                         return false;
486                 }
487
488                 inpkt = outpkt;
489
490                 origlen -= MTU/64 + 20;
491         }
492
493         inpkt->priority = 0;
494
495         if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
496                 mtu_probe_h(n, inpkt, origlen);
497         else
498                 receive_packet(n, inpkt);
499         return true;
500 #endif
501 }
502
503 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
504         vpn_packet_t outpkt;
505         outpkt.offset = DEFAULT_PACKET_OFFSET;
506
507         if(len > sizeof outpkt.data - outpkt.offset)
508                 return;
509
510         outpkt.len = len;
511         if(c->options & OPTION_TCPONLY)
512                 outpkt.priority = 0;
513         else
514                 outpkt.priority = -1;
515         memcpy(DATA(&outpkt), buffer, len);
516
517         receive_packet(c->node, &outpkt);
518 }
519
520 static bool try_sptps(node_t *n) {
521         if(n->status.validkey)
522                 return true;
523
524         /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
525            messages anyway, so there's no need for SPTPS at all. */
526         if(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))
527                 return false;
528
529         logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
530
531         if(!n->status.waitingforkey)
532                 send_req_key(n);
533         else if(n->last_req_key + 10 < now.tv_sec) {
534                 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
535                 sptps_stop(&n->sptps);
536                 n->status.waitingforkey = false;
537                 send_req_key(n);
538         }
539
540         return false;
541 }
542
543 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
544         /* Note: condition order is as intended - even if we have a direct
545            metaconnection, we want to try SPTPS anyway as it's the only way to
546            get UDP going */
547         if(!try_sptps(n) && !n->connection)
548                 return;
549
550         uint8_t type = 0;
551         int offset = 0;
552
553         if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
554                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
555                 return;
556         }
557
558         if(routing_mode == RMODE_ROUTER)
559                 offset = 14;
560         else
561                 type = PKT_MAC;
562
563         if(origpkt->len < offset)
564                 return;
565
566         vpn_packet_t outpkt;
567
568         if(n->outcompression) {
569                 outpkt.offset = 0;
570                 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
571                 if(len < 0) {
572                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
573                 } else if(len < origpkt->len - offset) {
574                         outpkt.len = len + offset;
575                         origpkt = &outpkt;
576                         type |= PKT_COMPRESSED;
577                 }
578         }
579
580         /* If we have a direct metaconnection to n, and we can't use UDP, then
581            don't bother with SPTPS and just use a "plaintext" PACKET message.
582            We don't really care about end-to-end security since we're not
583            sending the message through any intermediate nodes. */
584         if(n->connection && origpkt->len > n->minmtu)
585                 send_tcppacket(n->connection, origpkt);
586         else
587                 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
588         return;
589 }
590
591 static void adapt_socket(const sockaddr_t *sa, int *sock) {
592         /* Make sure we have a suitable socket for the chosen address */
593         if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
594                 for(int i = 0; i < listen_sockets; i++) {
595                         if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
596                                 *sock = i;
597                                 break;
598                         }
599                 }
600         }
601 }
602
603 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
604         /* Latest guess */
605         *sa = &n->address;
606         *sock = n->sock;
607
608         /* If the UDP address is confirmed, use it. */
609         if(n->status.udp_confirmed)
610                 return;
611
612         /* Send every third packet to n->address; that could be set
613            to the node's reflexive UDP address discovered during key
614            exchange. */
615
616         static int x = 0;
617         if(++x >= 3) {
618                 x = 0;
619                 return;
620         }
621
622         /* Otherwise, address are found in edges to this node.
623            So we pick a random edge and a random socket. */
624
625         int i = 0;
626         int j = rand() % n->edge_tree->count;
627         edge_t *candidate = NULL;
628
629         for splay_each(edge_t, e, n->edge_tree) {
630                 if(i++ == j) {
631                         candidate = e->reverse;
632                         break;
633                 }
634         }
635
636         if(candidate) {
637                 *sa = &candidate->address;
638                 *sock = rand() % listen_sockets;
639         }
640
641         adapt_socket(*sa, sock);
642 }
643
644 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
645         *sa = NULL;
646
647         /* Pick one of the edges from this node at random, then use its local address. */
648
649         int i = 0;
650         int j = rand() % n->edge_tree->count;
651         edge_t *candidate = NULL;
652
653         for splay_each(edge_t, e, n->edge_tree) {
654                 if(i++ == j) {
655                         candidate = e;
656                         break;
657                 }
658         }
659
660         if (candidate && candidate->local_address.sa.sa_family) {
661                 *sa = &candidate->local_address;
662                 *sock = rand() % listen_sockets;
663                 adapt_socket(*sa, sock);
664         }
665 }
666
667 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
668         vpn_packet_t pkt1, pkt2;
669         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
670         vpn_packet_t *inpkt = origpkt;
671         int nextpkt = 0;
672         vpn_packet_t *outpkt;
673         int origlen = origpkt->len;
674         size_t outlen;
675 #if defined(SOL_IP) && defined(IP_TOS)
676         static int priority = 0;
677         int origpriority = origpkt->priority;
678 #endif
679
680         pkt1.offset = DEFAULT_PACKET_OFFSET;
681         pkt2.offset = DEFAULT_PACKET_OFFSET;
682
683         if(!n->status.reachable) {
684                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
685                 return;
686         }
687
688         if(n->status.sptps)
689                 return send_sptps_packet(n, origpkt);
690
691 #ifdef DISABLE_LEGACY
692         return;
693 #else
694         /* Make sure we have a valid key */
695
696         if(!n->status.validkey) {
697                 logger(DEBUG_TRAFFIC, LOG_INFO,
698                                    "No valid key known yet for %s (%s), forwarding via TCP",
699                                    n->name, n->hostname);
700
701                 if(n->last_req_key + 10 <= now.tv_sec) {
702                         send_req_key(n);
703                         n->last_req_key = now.tv_sec;
704                 }
705
706                 send_tcppacket(n->nexthop->connection, origpkt);
707
708                 return;
709         }
710
711         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
712                 logger(DEBUG_TRAFFIC, LOG_INFO,
713                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
714                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
715
716                 if(n != n->nexthop)
717                         send_packet(n->nexthop, origpkt);
718                 else
719                         send_tcppacket(n->nexthop->connection, origpkt);
720
721                 return;
722         }
723
724         /* Compress the packet */
725
726         if(n->outcompression) {
727                 outpkt = pkt[nextpkt++];
728
729                 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
730                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
731                                    n->name, n->hostname);
732                         return;
733                 }
734
735                 inpkt = outpkt;
736         }
737
738         /* Add sequence number */
739
740         seqno_t seqno = htonl(++(n->sent_seqno));
741         memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
742         inpkt->len += sizeof seqno;
743
744         /* Encrypt the packet */
745
746         if(cipher_active(n->outcipher)) {
747                 outpkt = pkt[nextpkt++];
748                 outlen = MAXSIZE;
749
750                 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
751                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
752                         goto end;
753                 }
754
755                 outpkt->len = outlen;
756                 inpkt = outpkt;
757         }
758
759         /* Add the message authentication code */
760
761         if(digest_active(n->outdigest)) {
762                 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
763                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
764                         goto end;
765                 }
766
767                 inpkt->len += digest_length(n->outdigest);
768         }
769
770         /* Send the packet */
771
772         const sockaddr_t *sa = NULL;
773         int sock;
774
775         if(n->status.send_locally)
776                 choose_local_address(n, &sa, &sock);
777         if(!sa)
778                 choose_udp_address(n, &sa, &sock);
779
780 #if defined(SOL_IP) && defined(IP_TOS)
781         if(priorityinheritance && origpriority != priority
782            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
783                 priority = origpriority;
784                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
785                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
786                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
787         }
788 #endif
789
790         if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
791                 if(sockmsgsize(sockerrno)) {
792                         if(n->maxmtu >= origlen)
793                                 n->maxmtu = origlen - 1;
794                         if(n->mtu >= origlen)
795                                 n->mtu = origlen - 1;
796                 } else
797                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
798         }
799
800 end:
801         origpkt->len = origlen;
802 #endif
803 }
804
805 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
806         node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
807         bool direct = from == myself && to == relay;
808         bool relay_supported = (relay->options >> 24) >= 4;
809         bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
810
811         /* We don't really need the relay's key, but we need to establish a UDP tunnel with it and discover its MTU. */
812         if (!direct && relay_supported && !tcponly)
813                 try_sptps(relay);
814
815         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
816            TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
817                  This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
818
819         if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
820                 char buf[len * 4 / 3 + 5];
821                 b64encode(data, buf, len);
822                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
823                    to ensure we get to learn the reflexive UDP address. */
824                 if(from == myself && !to->status.validkey) {
825                         to->incompression = myself->incompression;
826                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
827                 } else {
828                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
829                 }
830         }
831
832         size_t overhead = 0;
833         if(relay_supported) overhead += sizeof to->id + sizeof from->id;
834         char buf[len + overhead]; char* buf_ptr = buf;
835         if(relay_supported) {
836                 if(direct) {
837                         /* Inform the recipient that this packet was sent directly. */
838                         node_id_t nullid = {};
839                         memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
840                 } else {
841                         memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
842                 }
843                 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
844
845         }
846         /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
847         memcpy(buf_ptr, data, len); buf_ptr += len;
848
849         const sockaddr_t *sa = NULL;
850         int sock;
851         if(relay->status.send_locally)
852                 choose_local_address(relay, &sa, &sock);
853         if(!sa)
854                 choose_udp_address(relay, &sa, &sock);
855         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
856         if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
857                 if(sockmsgsize(sockerrno)) {
858                         // Compensate for SPTPS overhead
859                         len -= SPTPS_DATAGRAM_OVERHEAD;
860                         if(relay->maxmtu >= len)
861                                 relay->maxmtu = len - 1;
862                         if(relay->mtu >= len)
863                                 relay->mtu = len - 1;
864                 } else {
865                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
866                         return false;
867                 }
868         }
869
870         return true;
871 }
872
873 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
874         return send_sptps_data_priv(handle, myself, type, data, len);
875 }
876
877 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
878         node_t *from = handle;
879
880         if(type == SPTPS_HANDSHAKE) {
881                 if(!from->status.validkey) {
882                         from->status.validkey = true;
883                         from->status.waitingforkey = false;
884                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
885                 }
886                 return true;
887         }
888
889         if(len > MTU) {
890                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
891                 return false;
892         }
893
894         vpn_packet_t inpkt;
895         inpkt.offset = DEFAULT_PACKET_OFFSET;
896
897         if(type == PKT_PROBE) {
898                 inpkt.len = len;
899                 memcpy(DATA(&inpkt), data, len);
900                 mtu_probe_h(from, &inpkt, len);
901                 return true;
902         }
903
904         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
905                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
906                 return false;
907         }
908
909         /* Check if we have the headers we need */
910         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
911                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
912                 return false;
913         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
914                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
915         }
916
917         int offset = (type & PKT_MAC) ? 0 : 14;
918         if(type & PKT_COMPRESSED) {
919                 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
920                 if(ulen < 0) {
921                         return false;
922                 } else {
923                         inpkt.len = ulen + offset;
924                 }
925                 if(inpkt.len > MAXSIZE)
926                         abort();
927         } else {
928                 memcpy(DATA(&inpkt) + offset, data, len);
929                 inpkt.len = len + offset;
930         }
931
932         /* Generate the Ethernet packet type if necessary */
933         if(offset) {
934                 switch(DATA(&inpkt)[14] >> 4) {
935                         case 4:
936                                 DATA(&inpkt)[12] = 0x08;
937                                 DATA(&inpkt)[13] = 0x00;
938                                 break;
939                         case 6:
940                                 DATA(&inpkt)[12] = 0x86;
941                                 DATA(&inpkt)[13] = 0xDD;
942                                 break;
943                         default:
944                                 logger(DEBUG_TRAFFIC, LOG_ERR,
945                                                    "Unknown IP version %d while reading packet from %s (%s)",
946                                                    DATA(&inpkt)[14] >> 4, from->name, from->hostname);
947                                 return false;
948                 }
949         }
950
951         receive_packet(from, &inpkt);
952         return true;
953 }
954
955 /*
956   send a packet to the given vpn ip.
957 */
958 void send_packet(node_t *n, vpn_packet_t *packet) {
959         node_t *via;
960
961         if(n == myself) {
962                 if(overwrite_mac)
963                          memcpy(DATA(packet), mymac.x, ETH_ALEN);
964                 n->out_packets++;
965                 n->out_bytes += packet->len;
966                 devops.write(packet);
967                 return;
968         }
969
970         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
971                            packet->len, n->name, n->hostname);
972
973         if(!n->status.reachable) {
974                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
975                                    n->name, n->hostname);
976                 return;
977         }
978
979         n->out_packets++;
980         n->out_bytes += packet->len;
981
982         if(n->status.sptps) {
983                 send_sptps_packet(n, packet);
984                 return;
985         }
986
987         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
988
989         if(via != n)
990                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
991                            n->name, via->name, n->via->hostname);
992
993         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
994                 if(!send_tcppacket(via->connection, packet))
995                         terminate_connection(via->connection, true);
996         } else
997                 send_udppacket(via, packet);
998 }
999
1000 /* Broadcast a packet using the minimum spanning tree */
1001
1002 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1003         // Always give ourself a copy of the packet.
1004         if(from != myself)
1005                 send_packet(myself, packet);
1006
1007         // In TunnelServer mode, do not forward broadcast packets.
1008         // The MST might not be valid and create loops.
1009         if(tunnelserver || broadcast_mode == BMODE_NONE)
1010                 return;
1011
1012         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1013                            packet->len, from->name, from->hostname);
1014
1015         switch(broadcast_mode) {
1016                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1017                 // This guarantees all nodes receive the broadcast packet, and
1018                 // usually distributes the sending of broadcast packets over all nodes.
1019                 case BMODE_MST:
1020                         for list_each(connection_t, c, connection_list)
1021                                 if(c->edge && c->status.mst && c != from->nexthop->connection)
1022                                         send_packet(c->node, packet);
1023                         break;
1024
1025                 // In direct mode, we send copies to each node we know of.
1026                 // However, this only reaches nodes that can be reached in a single hop.
1027                 // We don't have enough information to forward broadcast packets in this case.
1028                 case BMODE_DIRECT:
1029                         if(from != myself)
1030                                 break;
1031
1032                         for splay_each(node_t, n, node_tree)
1033                                 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1034                                         send_packet(n, packet);
1035                         break;
1036
1037                 default:
1038                         break;
1039         }
1040 }
1041
1042 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1043         node_t *n = NULL;
1044         bool hard = false;
1045         static time_t last_hard_try = 0;
1046
1047         for splay_each(edge_t, e, edge_weight_tree) {
1048                 if(!e->to->status.reachable || e->to == myself)
1049                         continue;
1050
1051                 if(sockaddrcmp_noport(from, &e->address)) {
1052                         if(last_hard_try == now.tv_sec)
1053                                 continue;
1054                         hard = true;
1055                 }
1056
1057                 if(!try_mac(e->to, pkt))
1058                         continue;
1059
1060                 n = e->to;
1061                 break;
1062         }
1063
1064         if(hard)
1065                 last_hard_try = now.tv_sec;
1066
1067         last_hard_try = now.tv_sec;
1068         return n;
1069 }
1070
1071 void handle_incoming_vpn_data(void *data, int flags) {
1072         listen_socket_t *ls = data;
1073         vpn_packet_t pkt;
1074         char *hostname;
1075         node_id_t nullid = {};
1076         sockaddr_t addr = {};
1077         socklen_t addrlen = sizeof addr;
1078         node_t *from, *to;
1079         bool direct = false;
1080
1081         pkt.offset = 0;
1082         int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1083
1084         if(len <= 0 || len > MAXSIZE) {
1085                 if(!sockwouldblock(sockerrno))
1086                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1087                 return;
1088         }
1089
1090         pkt.len = len;
1091
1092         sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1093
1094         // Try to figure out who sent this packet.
1095
1096         node_t *n = lookup_node_udp(&addr);
1097
1098         if(!n) {
1099                 // It might be from a 1.1 node, which might have a source ID in the packet.
1100                 pkt.offset = 2 * sizeof(node_id_t);
1101                 from = lookup_node_id(SRCID(&pkt));
1102                 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1103                         if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1104                                 n = from;
1105                         else
1106                                 goto skip_harder;
1107                 }
1108         }
1109
1110         if(!n) {
1111                 pkt.offset = 0;
1112                 n = try_harder(&addr, &pkt);
1113         }
1114
1115 skip_harder:
1116         if(!n) {
1117                 if(debug_level >= DEBUG_PROTOCOL) {
1118                         hostname = sockaddr2hostname(&addr);
1119                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1120                         free(hostname);
1121                 }
1122                 return;
1123         }
1124
1125         if(n->status.sptps) {
1126                 pkt.offset = 2 * sizeof(node_id_t);
1127
1128                 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1129                         direct = true;
1130                         from = n;
1131                         to = myself;
1132                 } else {
1133                         from = lookup_node_id(SRCID(&pkt));
1134                         to = lookup_node_id(DSTID(&pkt));
1135                 }
1136                 if(!from || !to) {
1137                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1138                         return;
1139                 }
1140
1141                 if(to != myself) {
1142                         send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1143                         return;
1144                 }
1145         } else {
1146                 direct = true;
1147                 from = n;
1148         }
1149
1150         pkt.offset = 0;
1151         if(!receive_udppacket(from, &pkt))
1152                 return;
1153
1154         n->sock = ls - listen_socket;
1155         if(direct && sockaddrcmp(&addr, &n->address))
1156                 update_node_udp(n, &addr);
1157 }
1158
1159 void handle_device_data(void *data, int flags) {
1160         vpn_packet_t packet;
1161         packet.offset = DEFAULT_PACKET_OFFSET;
1162         packet.priority = 0;
1163
1164         if(devops.read(&packet)) {
1165                 myself->in_packets++;
1166                 myself->in_bytes += packet.len;
1167                 route(myself, &packet);
1168         }
1169 }