Allow tinc to be compiled without OpenSSL.
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "route.h"
46 #include "utils.h"
47 #include "xalloc.h"
48
49 int keylifetime = 0;
50 #ifdef HAVE_LZO
51 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
52 #endif
53
54 static void send_udppacket(node_t *, vpn_packet_t *);
55
56 unsigned replaywin = 16;
57 bool localdiscovery = true;
58
59 #define MAX_SEQNO 1073741824
60
61 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
62    mtuprobes ==    31: sleep pinginterval seconds
63    mtuprobes ==    32: send 1 burst, sleep pingtimeout second
64    mtuprobes ==    33: no response from other side, restart PMTU discovery process
65
66    Probes are sent in batches of at least three, with random sizes between the
67    lower and upper boundaries for the MTU thus far discovered.
68
69    After the initial discovery, a fourth packet is added to each batch with a
70    size larger than the currently known PMTU, to test if the PMTU has increased.
71
72    In case local discovery is enabled, another packet is added to each batch,
73    which will be broadcast to the local network.
74
75 */
76
77 static void send_mtu_probe_handler(void *data) {
78         node_t *n = data;
79         int timeout = 1;
80
81         n->mtuprobes++;
82
83         if(!n->status.reachable || !n->status.validkey) {
84                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
85                 n->mtuprobes = 0;
86                 return;
87         }
88
89         if(n->mtuprobes > 32) {
90                 if(!n->minmtu) {
91                         n->mtuprobes = 31;
92                         timeout = pinginterval;
93                         goto end;
94                 }
95
96                 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
97                 n->status.udp_confirmed = false;
98                 n->mtuprobes = 1;
99                 n->minmtu = 0;
100                 n->maxmtu = MTU;
101         }
102
103         if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
104                 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
105                 n->mtuprobes = 31;
106         }
107
108         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
109                 if(n->minmtu > n->maxmtu)
110                         n->minmtu = n->maxmtu;
111                 else
112                         n->maxmtu = n->minmtu;
113                 n->mtu = n->minmtu;
114                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
115                 n->mtuprobes = 31;
116         }
117
118         if(n->mtuprobes == 31) {
119                 timeout = pinginterval;
120                 goto end;
121         } else if(n->mtuprobes == 32) {
122                 timeout = pingtimeout;
123         }
124
125         for(int i = 0; i < 4 + localdiscovery; i++) {
126                 int len;
127
128                 if(i == 0) {
129                         if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
130                                 continue;
131                         len = n->maxmtu + 8;
132                 } else if(n->maxmtu <= n->minmtu) {
133                         len = n->maxmtu;
134                 } else {
135                         len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
136                 }
137
138                 if(len < 64)
139                         len = 64;
140
141                 vpn_packet_t packet;
142                 packet.offset = DEFAULT_PACKET_OFFSET;
143                 memset(DATA(&packet), 0, 14);
144                 randomize(DATA(&packet) + 14, len - 14);
145                 packet.len = len;
146                 packet.priority = 0;
147                 n->status.send_locally = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
148
149                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
150
151                 send_udppacket(n, &packet);
152         }
153
154         n->status.send_locally = false;
155         n->probe_counter = 0;
156         gettimeofday(&n->probe_time, NULL);
157
158         /* Calculate the packet loss of incoming traffic by comparing the rate of
159            packets received to the rate with which the sequence number has increased.
160          */
161
162         if(n->received > n->prev_received)
163                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
164         else
165                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
166
167         n->prev_received_seqno = n->received_seqno;
168         n->prev_received = n->received;
169
170 end:
171         timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
172 }
173
174 void send_mtu_probe(node_t *n) {
175         timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
176         send_mtu_probe_handler(n);
177 }
178
179 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
180         if(!DATA(packet)[0]) {
181                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe request %d from %s (%s)", packet->len, n->name, n->hostname);
182
183                 /* It's a probe request, send back a reply */
184
185                 /* Type 2 probe replies were introduced in protocol 17.3 */
186                 if ((n->options >> 24) >= 3) {
187                         uint8_t *data = DATA(packet);
188                         *data++ = 2;
189                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
190                         struct timeval now;
191                         gettimeofday(&now, NULL);
192                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
193                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
194                         packet->len -= 10;
195                 } else {
196                         /* Legacy protocol: n won't understand type 2 probe replies. */
197                         DATA(packet)[0] = 1;
198                 }
199
200                 /* Temporarily set udp_confirmed, so that the reply is sent
201                    back exactly the way it came in. */
202
203                 bool udp_confirmed = n->status.udp_confirmed;
204                 n->status.udp_confirmed = true;
205                 send_udppacket(n, packet);
206                 n->status.udp_confirmed = udp_confirmed;
207         } else {
208                 length_t probelen = len;
209                 if (DATA(packet)[0] == 2) {
210                         if (len < 3)
211                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) MTU probe reply from %s (%s)", n->name, n->hostname);
212                         else {
213                                 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
214                         }
215                 }
216                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d MTU probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
217
218                 /* It's a valid reply: now we know bidirectional communication
219                    is possible using the address and socket that the reply
220                    packet used. */
221
222                 n->status.udp_confirmed = true;
223
224                 /* If we haven't established the PMTU yet, restart the discovery process. */
225
226                 if(n->mtuprobes > 30) {
227                         if (probelen == n->maxmtu + 8) {
228                                 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
229                                 n->maxmtu = MTU;
230                                 n->mtuprobes = 10;
231                                 return;
232                         }
233
234                         if(n->minmtu)
235                                 n->mtuprobes = 30;
236                         else
237                                 n->mtuprobes = 1;
238                 }
239
240                 /* If applicable, raise the minimum supported MTU */
241
242                 if(probelen > n->maxmtu)
243                         probelen = n->maxmtu;
244                 if(n->minmtu < probelen)
245                         n->minmtu = probelen;
246
247                 /* Calculate RTT and bandwidth.
248                    The RTT is the time between the MTU probe burst was sent and the first
249                    reply is received. The bandwidth is measured using the time between the
250                    arrival of the first and third probe reply (or type 2 probe requests).
251                  */
252
253                 struct timeval now, diff;
254                 gettimeofday(&now, NULL);
255                 timersub(&now, &n->probe_time, &diff);
256
257                 struct timeval probe_timestamp = now;
258                 if (DATA(packet)[0] == 2 && packet->len >= 11) {
259                         uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
260                         uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
261                         probe_timestamp.tv_sec = ntohl(sec);
262                         probe_timestamp.tv_usec = ntohl(usec);
263                 }
264                 
265                 n->probe_counter++;
266
267                 if(n->probe_counter == 1) {
268                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
269                         n->probe_time = probe_timestamp;
270                 } else if(n->probe_counter == 3) {
271                         struct timeval probe_timestamp_diff;
272                         timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
273                         n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
274                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
275                 }
276         }
277 }
278
279 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
280         if(level == 0) {
281                 memcpy(dest, source, len);
282                 return len;
283         } else if(level == 10) {
284 #ifdef HAVE_LZO
285                 lzo_uint lzolen = MAXSIZE;
286                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
287                 return lzolen;
288 #else
289                 return -1;
290 #endif
291         } else if(level < 10) {
292 #ifdef HAVE_ZLIB
293                 unsigned long destlen = MAXSIZE;
294                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
295                         return destlen;
296                 else
297 #endif
298                         return -1;
299         } else {
300 #ifdef HAVE_LZO
301                 lzo_uint lzolen = MAXSIZE;
302                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
303                 return lzolen;
304 #else
305                 return -1;
306 #endif
307         }
308
309         return -1;
310 }
311
312 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
313         if(level == 0) {
314                 memcpy(dest, source, len);
315                 return len;
316         } else if(level > 9) {
317 #ifdef HAVE_LZO
318                 lzo_uint lzolen = MAXSIZE;
319                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
320                         return lzolen;
321                 else
322 #endif
323                         return -1;
324         }
325 #ifdef HAVE_ZLIB
326         else {
327                 unsigned long destlen = MAXSIZE;
328                 if(uncompress(dest, &destlen, source, len) == Z_OK)
329                         return destlen;
330                 else
331                         return -1;
332         }
333 #endif
334
335         return -1;
336 }
337
338 /* VPN packet I/O */
339
340 static void receive_packet(node_t *n, vpn_packet_t *packet) {
341         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
342                            packet->len, n->name, n->hostname);
343
344         n->in_packets++;
345         n->in_bytes += packet->len;
346
347         route(n, packet);
348 }
349
350 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
351         if(n->status.sptps)
352                 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
353
354 #ifdef DISABLE_LEGACY
355         return false;
356 #else
357         if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
358                 return false;
359
360         return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
361 #endif
362 }
363
364 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
365         vpn_packet_t pkt1, pkt2;
366         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
367         int nextpkt = 0;
368         size_t outlen;
369         pkt1.offset = DEFAULT_PACKET_OFFSET;
370         pkt2.offset = DEFAULT_PACKET_OFFSET;
371
372         if(n->status.sptps) {
373                 if(!n->sptps.state) {
374                         if(!n->status.waitingforkey) {
375                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
376                                 send_req_key(n);
377                         } else {
378                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
379                         }
380                         return false;
381                 }
382                 inpkt->offset += 2 * sizeof(node_id_t);
383                 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
384                         logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
385                         return false;
386                 }
387                 return true;
388         }
389
390 #ifdef DISABLE_LEGACY
391         return false;
392 #else
393         if(!n->status.validkey) {
394                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
395                 return false;
396         }
397
398         /* Check packet length */
399
400         if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
401                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
402                                         n->name, n->hostname);
403                 return false;
404         }
405
406         /* It's a legacy UDP packet, the data starts after the seqno */
407
408         inpkt->offset += sizeof(seqno_t);
409
410         /* Check the message authentication code */
411
412         if(digest_active(n->indigest)) {
413                 inpkt->len -= digest_length(n->indigest);
414                 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
415                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
416                         return false;
417                 }
418         }
419         /* Decrypt the packet */
420
421         if(cipher_active(n->incipher)) {
422                 vpn_packet_t *outpkt = pkt[nextpkt++];
423                 outlen = MAXSIZE;
424
425                 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
426                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
427                         return false;
428                 }
429
430                 outpkt->len = outlen;
431                 inpkt = outpkt;
432         }
433
434         /* Check the sequence number */
435
436         seqno_t seqno;
437         memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
438         seqno = ntohl(seqno);
439         inpkt->len -= sizeof seqno;
440
441         if(replaywin) {
442                 if(seqno != n->received_seqno + 1) {
443                         if(seqno >= n->received_seqno + replaywin * 8) {
444                                 if(n->farfuture++ < replaywin >> 2) {
445                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
446                                                 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
447                                         return false;
448                                 }
449                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
450                                                 seqno - n->received_seqno - 1, n->name, n->hostname);
451                                 memset(n->late, 0, replaywin);
452                         } else if (seqno <= n->received_seqno) {
453                                 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
454                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
455                                                 n->name, n->hostname, seqno, n->received_seqno);
456                                         return false;
457                                 }
458                         } else {
459                                 for(int i = n->received_seqno + 1; i < seqno; i++)
460                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
461                         }
462                 }
463
464                 n->farfuture = 0;
465                 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
466         }
467
468         if(seqno > n->received_seqno)
469                 n->received_seqno = seqno;
470
471         n->received++;
472
473         if(n->received_seqno > MAX_SEQNO)
474                 regenerate_key();
475
476         /* Decompress the packet */
477
478         length_t origlen = inpkt->len;
479
480         if(n->incompression) {
481                 vpn_packet_t *outpkt = pkt[nextpkt++];
482
483                 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
484                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
485                                                  n->name, n->hostname);
486                         return false;
487                 }
488
489                 inpkt = outpkt;
490
491                 origlen -= MTU/64 + 20;
492         }
493
494         inpkt->priority = 0;
495
496         if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
497                 mtu_probe_h(n, inpkt, origlen);
498         else
499                 receive_packet(n, inpkt);
500         return true;
501 #endif
502 }
503
504 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
505         vpn_packet_t outpkt;
506         outpkt.offset = DEFAULT_PACKET_OFFSET;
507
508         if(len > sizeof outpkt.data - outpkt.offset)
509                 return;
510
511         outpkt.len = len;
512         if(c->options & OPTION_TCPONLY)
513                 outpkt.priority = 0;
514         else
515                 outpkt.priority = -1;
516         memcpy(DATA(&outpkt), buffer, len);
517
518         receive_packet(c->node, &outpkt);
519 }
520
521 static bool try_sptps(node_t *n) {
522         if(n->status.validkey)
523                 return true;
524
525         /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
526            messages anyway, so there's no need for SPTPS at all. */
527         if(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))
528                 return false;
529
530         logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
531
532         if(!n->status.waitingforkey)
533                 send_req_key(n);
534         else if(n->last_req_key + 10 < now.tv_sec) {
535                 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
536                 sptps_stop(&n->sptps);
537                 n->status.waitingforkey = false;
538                 send_req_key(n);
539         }
540
541         return false;
542 }
543
544 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
545         /* Note: condition order is as intended - even if we have a direct
546            metaconnection, we want to try SPTPS anyway as it's the only way to
547            get UDP going */
548         if(!try_sptps(n) && !n->connection)
549                 return;
550
551         uint8_t type = 0;
552         int offset = 0;
553
554         if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
555                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
556                 return;
557         }
558
559         if(routing_mode == RMODE_ROUTER)
560                 offset = 14;
561         else
562                 type = PKT_MAC;
563
564         if(origpkt->len < offset)
565                 return;
566
567         vpn_packet_t outpkt;
568
569         if(n->outcompression) {
570                 outpkt.offset = 0;
571                 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
572                 if(len < 0) {
573                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
574                 } else if(len < origpkt->len - offset) {
575                         outpkt.len = len + offset;
576                         origpkt = &outpkt;
577                         type |= PKT_COMPRESSED;
578                 }
579         }
580
581         /* If we have a direct metaconnection to n, and we can't use UDP, then
582            don't bother with SPTPS and just use a "plaintext" PACKET message.
583            We don't really care about end-to-end security since we're not
584            sending the message through any intermediate nodes. */
585         if(n->connection && origpkt->len > n->minmtu)
586                 send_tcppacket(n->connection, origpkt);
587         else
588                 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
589         return;
590 }
591
592 static void adapt_socket(const sockaddr_t *sa, int *sock) {
593         /* Make sure we have a suitable socket for the chosen address */
594         if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
595                 for(int i = 0; i < listen_sockets; i++) {
596                         if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
597                                 *sock = i;
598                                 break;
599                         }
600                 }
601         }
602 }
603
604 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
605         /* Latest guess */
606         *sa = &n->address;
607         *sock = n->sock;
608
609         /* If the UDP address is confirmed, use it. */
610         if(n->status.udp_confirmed)
611                 return;
612
613         /* Send every third packet to n->address; that could be set
614            to the node's reflexive UDP address discovered during key
615            exchange. */
616
617         static int x = 0;
618         if(++x >= 3) {
619                 x = 0;
620                 return;
621         }
622
623         /* Otherwise, address are found in edges to this node.
624            So we pick a random edge and a random socket. */
625
626         int i = 0;
627         int j = rand() % n->edge_tree->count;
628         edge_t *candidate = NULL;
629
630         for splay_each(edge_t, e, n->edge_tree) {
631                 if(i++ == j) {
632                         candidate = e->reverse;
633                         break;
634                 }
635         }
636
637         if(candidate) {
638                 *sa = &candidate->address;
639                 *sock = rand() % listen_sockets;
640         }
641
642         adapt_socket(*sa, sock);
643 }
644
645 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
646         *sa = NULL;
647
648         /* Pick one of the edges from this node at random, then use its local address. */
649
650         int i = 0;
651         int j = rand() % n->edge_tree->count;
652         edge_t *candidate = NULL;
653
654         for splay_each(edge_t, e, n->edge_tree) {
655                 if(i++ == j) {
656                         candidate = e;
657                         break;
658                 }
659         }
660
661         if (candidate && candidate->local_address.sa.sa_family) {
662                 *sa = &candidate->local_address;
663                 *sock = rand() % listen_sockets;
664                 adapt_socket(*sa, sock);
665         }
666 }
667
668 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
669         vpn_packet_t pkt1, pkt2;
670         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
671         vpn_packet_t *inpkt = origpkt;
672         int nextpkt = 0;
673         vpn_packet_t *outpkt;
674         int origlen = origpkt->len;
675         size_t outlen;
676 #if defined(SOL_IP) && defined(IP_TOS)
677         static int priority = 0;
678         int origpriority = origpkt->priority;
679 #endif
680
681         pkt1.offset = DEFAULT_PACKET_OFFSET;
682         pkt2.offset = DEFAULT_PACKET_OFFSET;
683
684         if(!n->status.reachable) {
685                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
686                 return;
687         }
688
689         if(n->status.sptps)
690                 return send_sptps_packet(n, origpkt);
691
692 #ifdef DISABLE_LEGACY
693         return;
694 #else
695         /* Make sure we have a valid key */
696
697         if(!n->status.validkey) {
698                 logger(DEBUG_TRAFFIC, LOG_INFO,
699                                    "No valid key known yet for %s (%s), forwarding via TCP",
700                                    n->name, n->hostname);
701
702                 if(n->last_req_key + 10 <= now.tv_sec) {
703                         send_req_key(n);
704                         n->last_req_key = now.tv_sec;
705                 }
706
707                 send_tcppacket(n->nexthop->connection, origpkt);
708
709                 return;
710         }
711
712         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
713                 logger(DEBUG_TRAFFIC, LOG_INFO,
714                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
715                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
716
717                 if(n != n->nexthop)
718                         send_packet(n->nexthop, origpkt);
719                 else
720                         send_tcppacket(n->nexthop->connection, origpkt);
721
722                 return;
723         }
724
725         /* Compress the packet */
726
727         if(n->outcompression) {
728                 outpkt = pkt[nextpkt++];
729
730                 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
731                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
732                                    n->name, n->hostname);
733                         return;
734                 }
735
736                 inpkt = outpkt;
737         }
738
739         /* Add sequence number */
740
741         seqno_t seqno = htonl(++(n->sent_seqno));
742         memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
743         inpkt->len += sizeof seqno;
744
745         /* Encrypt the packet */
746
747         if(cipher_active(n->outcipher)) {
748                 outpkt = pkt[nextpkt++];
749                 outlen = MAXSIZE;
750
751                 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
752                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
753                         goto end;
754                 }
755
756                 outpkt->len = outlen;
757                 inpkt = outpkt;
758         }
759
760         /* Add the message authentication code */
761
762         if(digest_active(n->outdigest)) {
763                 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
764                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
765                         goto end;
766                 }
767
768                 inpkt->len += digest_length(n->outdigest);
769         }
770
771         /* Send the packet */
772
773         const sockaddr_t *sa = NULL;
774         int sock;
775
776         if(n->status.send_locally)
777                 choose_local_address(n, &sa, &sock);
778         if(!sa)
779                 choose_udp_address(n, &sa, &sock);
780
781 #if defined(SOL_IP) && defined(IP_TOS)
782         if(priorityinheritance && origpriority != priority
783            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
784                 priority = origpriority;
785                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
786                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
787                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
788         }
789 #endif
790
791         if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
792                 if(sockmsgsize(sockerrno)) {
793                         if(n->maxmtu >= origlen)
794                                 n->maxmtu = origlen - 1;
795                         if(n->mtu >= origlen)
796                                 n->mtu = origlen - 1;
797                 } else
798                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
799         }
800
801 end:
802         origpkt->len = origlen;
803 #endif
804 }
805
806 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
807         node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
808         bool direct = from == myself && to == relay;
809         bool relay_supported = (relay->options >> 24) >= 4;
810         bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
811
812         /* We don't really need the relay's key, but we need to establish a UDP tunnel with it and discover its MTU. */
813         if (!direct && relay_supported && !tcponly)
814                 try_sptps(relay);
815
816         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
817            TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
818                  This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
819
820         if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
821                 char buf[len * 4 / 3 + 5];
822                 b64encode(data, buf, len);
823                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
824                    to ensure we get to learn the reflexive UDP address. */
825                 if(from == myself && !to->status.validkey) {
826                         to->incompression = myself->incompression;
827                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
828                 } else {
829                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
830                 }
831         }
832
833         size_t overhead = 0;
834         if(relay_supported) overhead += sizeof to->id + sizeof from->id;
835         char buf[len + overhead]; char* buf_ptr = buf;
836         if(relay_supported) {
837                 if(direct) {
838                         /* Inform the recipient that this packet was sent directly. */
839                         node_id_t nullid = {};
840                         memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
841                 } else {
842                         memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
843                 }
844                 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
845
846         }
847         /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
848         memcpy(buf_ptr, data, len); buf_ptr += len;
849
850         const sockaddr_t *sa = NULL;
851         int sock;
852         if(relay->status.send_locally)
853                 choose_local_address(relay, &sa, &sock);
854         if(!sa)
855                 choose_udp_address(relay, &sa, &sock);
856         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
857         if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
858                 if(sockmsgsize(sockerrno)) {
859                         // Compensate for SPTPS overhead
860                         len -= SPTPS_DATAGRAM_OVERHEAD;
861                         if(relay->maxmtu >= len)
862                                 relay->maxmtu = len - 1;
863                         if(relay->mtu >= len)
864                                 relay->mtu = len - 1;
865                 } else {
866                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
867                         return false;
868                 }
869         }
870
871         return true;
872 }
873
874 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
875         return send_sptps_data_priv(handle, myself, type, data, len);
876 }
877
878 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
879         node_t *from = handle;
880
881         if(type == SPTPS_HANDSHAKE) {
882                 if(!from->status.validkey) {
883                         from->status.validkey = true;
884                         from->status.waitingforkey = false;
885                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
886                 }
887                 return true;
888         }
889
890         if(len > MTU) {
891                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
892                 return false;
893         }
894
895         vpn_packet_t inpkt;
896         inpkt.offset = DEFAULT_PACKET_OFFSET;
897
898         if(type == PKT_PROBE) {
899                 inpkt.len = len;
900                 memcpy(DATA(&inpkt), data, len);
901                 mtu_probe_h(from, &inpkt, len);
902                 return true;
903         }
904
905         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
906                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
907                 return false;
908         }
909
910         /* Check if we have the headers we need */
911         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
912                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
913                 return false;
914         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
915                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
916         }
917
918         int offset = (type & PKT_MAC) ? 0 : 14;
919         if(type & PKT_COMPRESSED) {
920                 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
921                 if(ulen < 0) {
922                         return false;
923                 } else {
924                         inpkt.len = ulen + offset;
925                 }
926                 if(inpkt.len > MAXSIZE)
927                         abort();
928         } else {
929                 memcpy(DATA(&inpkt) + offset, data, len);
930                 inpkt.len = len + offset;
931         }
932
933         /* Generate the Ethernet packet type if necessary */
934         if(offset) {
935                 switch(DATA(&inpkt)[14] >> 4) {
936                         case 4:
937                                 DATA(&inpkt)[12] = 0x08;
938                                 DATA(&inpkt)[13] = 0x00;
939                                 break;
940                         case 6:
941                                 DATA(&inpkt)[12] = 0x86;
942                                 DATA(&inpkt)[13] = 0xDD;
943                                 break;
944                         default:
945                                 logger(DEBUG_TRAFFIC, LOG_ERR,
946                                                    "Unknown IP version %d while reading packet from %s (%s)",
947                                                    DATA(&inpkt)[14] >> 4, from->name, from->hostname);
948                                 return false;
949                 }
950         }
951
952         receive_packet(from, &inpkt);
953         return true;
954 }
955
956 /*
957   send a packet to the given vpn ip.
958 */
959 void send_packet(node_t *n, vpn_packet_t *packet) {
960         node_t *via;
961
962         if(n == myself) {
963                 if(overwrite_mac)
964                          memcpy(DATA(packet), mymac.x, ETH_ALEN);
965                 n->out_packets++;
966                 n->out_bytes += packet->len;
967                 devops.write(packet);
968                 return;
969         }
970
971         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
972                            packet->len, n->name, n->hostname);
973
974         if(!n->status.reachable) {
975                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
976                                    n->name, n->hostname);
977                 return;
978         }
979
980         n->out_packets++;
981         n->out_bytes += packet->len;
982
983         if(n->status.sptps) {
984                 send_sptps_packet(n, packet);
985                 return;
986         }
987
988         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
989
990         if(via != n)
991                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
992                            n->name, via->name, n->via->hostname);
993
994         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
995                 if(!send_tcppacket(via->connection, packet))
996                         terminate_connection(via->connection, true);
997         } else
998                 send_udppacket(via, packet);
999 }
1000
1001 /* Broadcast a packet using the minimum spanning tree */
1002
1003 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1004         // Always give ourself a copy of the packet.
1005         if(from != myself)
1006                 send_packet(myself, packet);
1007
1008         // In TunnelServer mode, do not forward broadcast packets.
1009         // The MST might not be valid and create loops.
1010         if(tunnelserver || broadcast_mode == BMODE_NONE)
1011                 return;
1012
1013         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1014                            packet->len, from->name, from->hostname);
1015
1016         switch(broadcast_mode) {
1017                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1018                 // This guarantees all nodes receive the broadcast packet, and
1019                 // usually distributes the sending of broadcast packets over all nodes.
1020                 case BMODE_MST:
1021                         for list_each(connection_t, c, connection_list)
1022                                 if(c->edge && c->status.mst && c != from->nexthop->connection)
1023                                         send_packet(c->node, packet);
1024                         break;
1025
1026                 // In direct mode, we send copies to each node we know of.
1027                 // However, this only reaches nodes that can be reached in a single hop.
1028                 // We don't have enough information to forward broadcast packets in this case.
1029                 case BMODE_DIRECT:
1030                         if(from != myself)
1031                                 break;
1032
1033                         for splay_each(node_t, n, node_tree)
1034                                 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1035                                         send_packet(n, packet);
1036                         break;
1037
1038                 default:
1039                         break;
1040         }
1041 }
1042
1043 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1044         node_t *n = NULL;
1045         bool hard = false;
1046         static time_t last_hard_try = 0;
1047
1048         for splay_each(edge_t, e, edge_weight_tree) {
1049                 if(!e->to->status.reachable || e->to == myself)
1050                         continue;
1051
1052                 if(sockaddrcmp_noport(from, &e->address)) {
1053                         if(last_hard_try == now.tv_sec)
1054                                 continue;
1055                         hard = true;
1056                 }
1057
1058                 if(!try_mac(e->to, pkt))
1059                         continue;
1060
1061                 n = e->to;
1062                 break;
1063         }
1064
1065         if(hard)
1066                 last_hard_try = now.tv_sec;
1067
1068         last_hard_try = now.tv_sec;
1069         return n;
1070 }
1071
1072 void handle_incoming_vpn_data(void *data, int flags) {
1073         listen_socket_t *ls = data;
1074         vpn_packet_t pkt;
1075         char *hostname;
1076         node_id_t nullid = {};
1077         sockaddr_t addr = {};
1078         socklen_t addrlen = sizeof addr;
1079         node_t *from, *to;
1080         bool direct = false;
1081
1082         pkt.offset = 0;
1083         int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1084
1085         if(len <= 0 || len > MAXSIZE) {
1086                 if(!sockwouldblock(sockerrno))
1087                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1088                 return;
1089         }
1090
1091         pkt.len = len;
1092
1093         sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1094
1095         // Try to figure out who sent this packet.
1096
1097         node_t *n = lookup_node_udp(&addr);
1098
1099         if(!n) {
1100                 // It might be from a 1.1 node, which might have a source ID in the packet.
1101                 pkt.offset = 2 * sizeof(node_id_t);
1102                 from = lookup_node_id(SRCID(&pkt));
1103                 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1104                         if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1105                                 n = from;
1106                         else
1107                                 goto skip_harder;
1108                 }
1109         }
1110
1111         if(!n) {
1112                 pkt.offset = 0;
1113                 n = try_harder(&addr, &pkt);
1114         }
1115
1116 skip_harder:
1117         if(!n) {
1118                 if(debug_level >= DEBUG_PROTOCOL) {
1119                         hostname = sockaddr2hostname(&addr);
1120                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1121                         free(hostname);
1122                 }
1123                 return;
1124         }
1125
1126         if(n->status.sptps) {
1127                 pkt.offset = 2 * sizeof(node_id_t);
1128
1129                 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1130                         direct = true;
1131                         from = n;
1132                         to = myself;
1133                 } else {
1134                         from = lookup_node_id(SRCID(&pkt));
1135                         to = lookup_node_id(DSTID(&pkt));
1136                 }
1137                 if(!from || !to) {
1138                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1139                         return;
1140                 }
1141
1142                 if(to != myself) {
1143                         send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1144                         return;
1145                 }
1146         } else {
1147                 direct = true;
1148                 from = n;
1149         }
1150
1151         pkt.offset = 0;
1152         if(!receive_udppacket(from, &pkt))
1153                 return;
1154
1155         n->sock = ls - listen_socket;
1156         if(direct && sockaddrcmp(&addr, &n->address))
1157                 update_node_udp(n, &addr);
1158 }
1159
1160 void handle_device_data(void *data, int flags) {
1161         vpn_packet_t packet;
1162         packet.offset = DEFAULT_PACKET_OFFSET;
1163         packet.priority = 0;
1164
1165         if(devops.read(&packet)) {
1166                 myself->in_packets++;
1167                 myself->in_bytes += packet.len;
1168                 route(myself, &packet);
1169         }
1170 }