Use plain old PACKET for TCP packets sent directly to a neighbor.
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "route.h"
46 #include "utils.h"
47 #include "xalloc.h"
48
49 int keylifetime = 0;
50 #ifdef HAVE_LZO
51 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
52 #endif
53
54 static void send_udppacket(node_t *, vpn_packet_t *);
55
56 unsigned replaywin = 16;
57 bool localdiscovery = true;
58
59 #define MAX_SEQNO 1073741824
60
61 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
62    mtuprobes ==    31: sleep pinginterval seconds
63    mtuprobes ==    32: send 1 burst, sleep pingtimeout second
64    mtuprobes ==    33: no response from other side, restart PMTU discovery process
65
66    Probes are sent in batches of at least three, with random sizes between the
67    lower and upper boundaries for the MTU thus far discovered.
68
69    After the initial discovery, a fourth packet is added to each batch with a
70    size larger than the currently known PMTU, to test if the PMTU has increased.
71
72    In case local discovery is enabled, another packet is added to each batch,
73    which will be broadcast to the local network.
74
75 */
76
77 static void send_mtu_probe_handler(void *data) {
78         node_t *n = data;
79         int timeout = 1;
80
81         n->mtuprobes++;
82
83         if(!n->status.reachable || !n->status.validkey) {
84                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
85                 n->mtuprobes = 0;
86                 return;
87         }
88
89         if(n->mtuprobes > 32) {
90                 if(!n->minmtu) {
91                         n->mtuprobes = 31;
92                         timeout = pinginterval;
93                         goto end;
94                 }
95
96                 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
97                 n->status.udp_confirmed = false;
98                 n->mtuprobes = 1;
99                 n->minmtu = 0;
100                 n->maxmtu = MTU;
101         }
102
103         if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
104                 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
105                 n->mtuprobes = 31;
106         }
107
108         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
109                 if(n->minmtu > n->maxmtu)
110                         n->minmtu = n->maxmtu;
111                 else
112                         n->maxmtu = n->minmtu;
113                 n->mtu = n->minmtu;
114                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
115                 n->mtuprobes = 31;
116         }
117
118         if(n->mtuprobes == 31) {
119                 timeout = pinginterval;
120                 goto end;
121         } else if(n->mtuprobes == 32) {
122                 timeout = pingtimeout;
123         }
124
125         for(int i = 0; i < 4 + localdiscovery; i++) {
126                 int len;
127
128                 if(i == 0) {
129                         if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
130                                 continue;
131                         len = n->maxmtu + 8;
132                 } else if(n->maxmtu <= n->minmtu) {
133                         len = n->maxmtu;
134                 } else {
135                         len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
136                 }
137
138                 if(len < 64)
139                         len = 64;
140
141                 vpn_packet_t packet;
142                 packet.offset = DEFAULT_PACKET_OFFSET;
143                 memset(DATA(&packet), 0, 14);
144                 randomize(DATA(&packet) + 14, len - 14);
145                 packet.len = len;
146                 packet.priority = 0;
147                 n->status.send_locally = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
148
149                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
150
151                 send_udppacket(n, &packet);
152         }
153
154         n->status.send_locally = false;
155         n->probe_counter = 0;
156         gettimeofday(&n->probe_time, NULL);
157
158         /* Calculate the packet loss of incoming traffic by comparing the rate of
159            packets received to the rate with which the sequence number has increased.
160          */
161
162         if(n->received > n->prev_received)
163                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
164         else
165                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
166
167         n->prev_received_seqno = n->received_seqno;
168         n->prev_received = n->received;
169
170 end:
171         timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
172 }
173
174 void send_mtu_probe(node_t *n) {
175         timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
176         send_mtu_probe_handler(n);
177 }
178
179 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
180         if(!DATA(packet)[0]) {
181                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe request %d from %s (%s)", packet->len, n->name, n->hostname);
182
183                 /* It's a probe request, send back a reply */
184
185                 /* Type 2 probe replies were introduced in protocol 17.3 */
186                 if ((n->options >> 24) >= 3) {
187                         uint8_t *data = DATA(packet);
188                         *data++ = 2;
189                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
190                         struct timeval now;
191                         gettimeofday(&now, NULL);
192                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
193                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
194                         packet->len -= 10;
195                 } else {
196                         /* Legacy protocol: n won't understand type 2 probe replies. */
197                         DATA(packet)[0] = 1;
198                 }
199
200                 /* Temporarily set udp_confirmed, so that the reply is sent
201                    back exactly the way it came in. */
202
203                 bool udp_confirmed = n->status.udp_confirmed;
204                 n->status.udp_confirmed = true;
205                 send_udppacket(n, packet);
206                 n->status.udp_confirmed = udp_confirmed;
207         } else {
208                 length_t probelen = len;
209                 if (DATA(packet)[0] == 2) {
210                         if (len < 3)
211                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) MTU probe reply from %s (%s)", n->name, n->hostname);
212                         else {
213                                 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
214                         }
215                 }
216                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d MTU probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
217
218                 /* It's a valid reply: now we know bidirectional communication
219                    is possible using the address and socket that the reply
220                    packet used. */
221
222                 n->status.udp_confirmed = true;
223
224                 /* If we haven't established the PMTU yet, restart the discovery process. */
225
226                 if(n->mtuprobes > 30) {
227                         if (probelen == n->maxmtu + 8) {
228                                 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
229                                 n->maxmtu = MTU;
230                                 n->mtuprobes = 10;
231                                 return;
232                         }
233
234                         if(n->minmtu)
235                                 n->mtuprobes = 30;
236                         else
237                                 n->mtuprobes = 1;
238                 }
239
240                 /* If applicable, raise the minimum supported MTU */
241
242                 if(probelen > n->maxmtu)
243                         probelen = n->maxmtu;
244                 if(n->minmtu < probelen)
245                         n->minmtu = probelen;
246
247                 /* Calculate RTT and bandwidth.
248                    The RTT is the time between the MTU probe burst was sent and the first
249                    reply is received. The bandwidth is measured using the time between the
250                    arrival of the first and third probe reply (or type 2 probe requests).
251                  */
252
253                 struct timeval now, diff;
254                 gettimeofday(&now, NULL);
255                 timersub(&now, &n->probe_time, &diff);
256
257                 struct timeval probe_timestamp = now;
258                 if (DATA(packet)[0] == 2 && packet->len >= 11) {
259                         uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
260                         uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
261                         probe_timestamp.tv_sec = ntohl(sec);
262                         probe_timestamp.tv_usec = ntohl(usec);
263                 }
264                 
265                 n->probe_counter++;
266
267                 if(n->probe_counter == 1) {
268                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
269                         n->probe_time = probe_timestamp;
270                 } else if(n->probe_counter == 3) {
271                         struct timeval probe_timestamp_diff;
272                         timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
273                         n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
274                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
275                 }
276         }
277 }
278
279 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
280         if(level == 0) {
281                 memcpy(dest, source, len);
282                 return len;
283         } else if(level == 10) {
284 #ifdef HAVE_LZO
285                 lzo_uint lzolen = MAXSIZE;
286                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
287                 return lzolen;
288 #else
289                 return -1;
290 #endif
291         } else if(level < 10) {
292 #ifdef HAVE_ZLIB
293                 unsigned long destlen = MAXSIZE;
294                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
295                         return destlen;
296                 else
297 #endif
298                         return -1;
299         } else {
300 #ifdef HAVE_LZO
301                 lzo_uint lzolen = MAXSIZE;
302                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
303                 return lzolen;
304 #else
305                 return -1;
306 #endif
307         }
308
309         return -1;
310 }
311
312 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
313         if(level == 0) {
314                 memcpy(dest, source, len);
315                 return len;
316         } else if(level > 9) {
317 #ifdef HAVE_LZO
318                 lzo_uint lzolen = MAXSIZE;
319                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
320                         return lzolen;
321                 else
322 #endif
323                         return -1;
324         }
325 #ifdef HAVE_ZLIB
326         else {
327                 unsigned long destlen = MAXSIZE;
328                 if(uncompress(dest, &destlen, source, len) == Z_OK)
329                         return destlen;
330                 else
331                         return -1;
332         }
333 #endif
334
335         return -1;
336 }
337
338 /* VPN packet I/O */
339
340 static void receive_packet(node_t *n, vpn_packet_t *packet) {
341         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
342                            packet->len, n->name, n->hostname);
343
344         n->in_packets++;
345         n->in_bytes += packet->len;
346
347         route(n, packet);
348 }
349
350 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
351         if(n->status.sptps)
352                 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
353
354         if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
355                 return false;
356
357         return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
358 }
359
360 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
361         vpn_packet_t pkt1, pkt2;
362         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
363         int nextpkt = 0;
364         size_t outlen;
365         pkt1.offset = DEFAULT_PACKET_OFFSET;
366         pkt2.offset = DEFAULT_PACKET_OFFSET;
367
368         if(n->status.sptps) {
369                 if(!n->sptps.state) {
370                         if(!n->status.waitingforkey) {
371                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
372                                 send_req_key(n);
373                         } else {
374                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
375                         }
376                         return false;
377                 }
378                 inpkt->offset += 2 * sizeof(node_id_t);
379                 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
380                         logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
381                         return false;
382                 }
383                 return true;
384         }
385
386         if(!n->status.validkey) {
387                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
388                 return false;
389         }
390
391         /* Check packet length */
392
393         if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
394                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
395                                         n->name, n->hostname);
396                 return false;
397         }
398
399         /* It's a legacy UDP packet, the data starts after the seqno */
400
401         inpkt->offset += sizeof(seqno_t);
402
403         /* Check the message authentication code */
404
405         if(digest_active(n->indigest)) {
406                 inpkt->len -= digest_length(n->indigest);
407                 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
408                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
409                         return false;
410                 }
411         }
412         /* Decrypt the packet */
413
414         if(cipher_active(n->incipher)) {
415                 vpn_packet_t *outpkt = pkt[nextpkt++];
416                 outlen = MAXSIZE;
417
418                 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
419                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
420                         return false;
421                 }
422
423                 outpkt->len = outlen;
424                 inpkt = outpkt;
425         }
426
427         /* Check the sequence number */
428
429         seqno_t seqno;
430         memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
431         seqno = ntohl(seqno);
432         inpkt->len -= sizeof seqno;
433
434         if(replaywin) {
435                 if(seqno != n->received_seqno + 1) {
436                         if(seqno >= n->received_seqno + replaywin * 8) {
437                                 if(n->farfuture++ < replaywin >> 2) {
438                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
439                                                 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
440                                         return false;
441                                 }
442                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
443                                                 seqno - n->received_seqno - 1, n->name, n->hostname);
444                                 memset(n->late, 0, replaywin);
445                         } else if (seqno <= n->received_seqno) {
446                                 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
447                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
448                                                 n->name, n->hostname, seqno, n->received_seqno);
449                                         return false;
450                                 }
451                         } else {
452                                 for(int i = n->received_seqno + 1; i < seqno; i++)
453                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
454                         }
455                 }
456
457                 n->farfuture = 0;
458                 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
459         }
460
461         if(seqno > n->received_seqno)
462                 n->received_seqno = seqno;
463
464         n->received++;
465
466         if(n->received_seqno > MAX_SEQNO)
467                 regenerate_key();
468
469         /* Decompress the packet */
470
471         length_t origlen = inpkt->len;
472
473         if(n->incompression) {
474                 vpn_packet_t *outpkt = pkt[nextpkt++];
475
476                 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
477                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
478                                                  n->name, n->hostname);
479                         return false;
480                 }
481
482                 inpkt = outpkt;
483
484                 origlen -= MTU/64 + 20;
485         }
486
487         inpkt->priority = 0;
488
489         if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
490                 mtu_probe_h(n, inpkt, origlen);
491         else
492                 receive_packet(n, inpkt);
493         return true;
494 }
495
496 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
497         vpn_packet_t outpkt;
498         outpkt.offset = DEFAULT_PACKET_OFFSET;
499
500         if(len > sizeof outpkt.data - outpkt.offset)
501                 return;
502
503         outpkt.len = len;
504         if(c->options & OPTION_TCPONLY)
505                 outpkt.priority = 0;
506         else
507                 outpkt.priority = -1;
508         memcpy(DATA(&outpkt), buffer, len);
509
510         receive_packet(c->node, &outpkt);
511 }
512
513 static bool try_sptps(node_t *n) {
514         if(n->status.validkey)
515                 return true;
516
517         /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
518            messages anyway, so there's no need for SPTPS at all. */
519         if(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))
520                 return false;
521
522         logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
523
524         if(!n->status.waitingforkey)
525                 send_req_key(n);
526         else if(n->last_req_key + 10 < now.tv_sec) {
527                 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
528                 sptps_stop(&n->sptps);
529                 n->status.waitingforkey = false;
530                 send_req_key(n);
531         }
532
533         return false;
534 }
535
536 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
537         /* Note: condition order is as intended - even if we have a direct
538            metaconnection, we want to try SPTPS anyway as it's the only way to
539            get UDP going */
540         if(!try_sptps(n) && !n->connection)
541                 return;
542
543         uint8_t type = 0;
544         int offset = 0;
545
546         if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
547                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
548                 return;
549         }
550
551         if(routing_mode == RMODE_ROUTER)
552                 offset = 14;
553         else
554                 type = PKT_MAC;
555
556         if(origpkt->len < offset)
557                 return;
558
559         vpn_packet_t outpkt;
560
561         if(n->outcompression) {
562                 outpkt.offset = 0;
563                 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
564                 if(len < 0) {
565                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
566                 } else if(len < origpkt->len - offset) {
567                         outpkt.len = len + offset;
568                         origpkt = &outpkt;
569                         type |= PKT_COMPRESSED;
570                 }
571         }
572
573         /* If we have a direct metaconnection to n, and we can't use UDP, then
574            don't bother with SPTPS and just use a "plaintext" PACKET message.
575            We don't really care about end-to-end security since we're not
576            sending the message through any intermediate nodes. */
577         if(n->connection && origpkt->len > n->minmtu)
578                 send_tcppacket(n->connection, origpkt);
579         else
580                 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
581         return;
582 }
583
584 static void adapt_socket(const sockaddr_t *sa, int *sock) {
585         /* Make sure we have a suitable socket for the chosen address */
586         if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
587                 for(int i = 0; i < listen_sockets; i++) {
588                         if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
589                                 *sock = i;
590                                 break;
591                         }
592                 }
593         }
594 }
595
596 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
597         /* Latest guess */
598         *sa = &n->address;
599         *sock = n->sock;
600
601         /* If the UDP address is confirmed, use it. */
602         if(n->status.udp_confirmed)
603                 return;
604
605         /* Send every third packet to n->address; that could be set
606            to the node's reflexive UDP address discovered during key
607            exchange. */
608
609         static int x = 0;
610         if(++x >= 3) {
611                 x = 0;
612                 return;
613         }
614
615         /* Otherwise, address are found in edges to this node.
616            So we pick a random edge and a random socket. */
617
618         int i = 0;
619         int j = rand() % n->edge_tree->count;
620         edge_t *candidate = NULL;
621
622         for splay_each(edge_t, e, n->edge_tree) {
623                 if(i++ == j) {
624                         candidate = e->reverse;
625                         break;
626                 }
627         }
628
629         if(candidate) {
630                 *sa = &candidate->address;
631                 *sock = rand() % listen_sockets;
632         }
633
634         adapt_socket(*sa, sock);
635 }
636
637 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
638         *sa = NULL;
639
640         /* Pick one of the edges from this node at random, then use its local address. */
641
642         int i = 0;
643         int j = rand() % n->edge_tree->count;
644         edge_t *candidate = NULL;
645
646         for splay_each(edge_t, e, n->edge_tree) {
647                 if(i++ == j) {
648                         candidate = e;
649                         break;
650                 }
651         }
652
653         if (candidate && candidate->local_address.sa.sa_family) {
654                 *sa = &candidate->local_address;
655                 *sock = rand() % listen_sockets;
656                 adapt_socket(*sa, sock);
657         }
658 }
659
660 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
661         vpn_packet_t pkt1, pkt2;
662         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
663         vpn_packet_t *inpkt = origpkt;
664         int nextpkt = 0;
665         vpn_packet_t *outpkt;
666         int origlen = origpkt->len;
667         size_t outlen;
668 #if defined(SOL_IP) && defined(IP_TOS)
669         static int priority = 0;
670         int origpriority = origpkt->priority;
671 #endif
672
673         pkt1.offset = DEFAULT_PACKET_OFFSET;
674         pkt2.offset = DEFAULT_PACKET_OFFSET;
675
676         if(!n->status.reachable) {
677                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
678                 return;
679         }
680
681         if(n->status.sptps)
682                 return send_sptps_packet(n, origpkt);
683
684         /* Make sure we have a valid key */
685
686         if(!n->status.validkey) {
687                 logger(DEBUG_TRAFFIC, LOG_INFO,
688                                    "No valid key known yet for %s (%s), forwarding via TCP",
689                                    n->name, n->hostname);
690
691                 if(n->last_req_key + 10 <= now.tv_sec) {
692                         send_req_key(n);
693                         n->last_req_key = now.tv_sec;
694                 }
695
696                 send_tcppacket(n->nexthop->connection, origpkt);
697
698                 return;
699         }
700
701         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
702                 logger(DEBUG_TRAFFIC, LOG_INFO,
703                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
704                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
705
706                 if(n != n->nexthop)
707                         send_packet(n->nexthop, origpkt);
708                 else
709                         send_tcppacket(n->nexthop->connection, origpkt);
710
711                 return;
712         }
713
714         /* Compress the packet */
715
716         if(n->outcompression) {
717                 outpkt = pkt[nextpkt++];
718
719                 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
720                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
721                                    n->name, n->hostname);
722                         return;
723                 }
724
725                 inpkt = outpkt;
726         }
727
728         /* Add sequence number */
729
730         seqno_t seqno = htonl(++(n->sent_seqno));
731         memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
732         inpkt->len += sizeof seqno;
733
734         /* Encrypt the packet */
735
736         if(cipher_active(n->outcipher)) {
737                 outpkt = pkt[nextpkt++];
738                 outlen = MAXSIZE;
739
740                 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
741                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
742                         goto end;
743                 }
744
745                 outpkt->len = outlen;
746                 inpkt = outpkt;
747         }
748
749         /* Add the message authentication code */
750
751         if(digest_active(n->outdigest)) {
752                 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
753                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
754                         goto end;
755                 }
756
757                 inpkt->len += digest_length(n->outdigest);
758         }
759
760         /* Send the packet */
761
762         const sockaddr_t *sa = NULL;
763         int sock;
764
765         if(n->status.send_locally)
766                 choose_local_address(n, &sa, &sock);
767         if(!sa)
768                 choose_udp_address(n, &sa, &sock);
769
770 #if defined(SOL_IP) && defined(IP_TOS)
771         if(priorityinheritance && origpriority != priority
772            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
773                 priority = origpriority;
774                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
775                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
776                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
777         }
778 #endif
779
780         if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
781                 if(sockmsgsize(sockerrno)) {
782                         if(n->maxmtu >= origlen)
783                                 n->maxmtu = origlen - 1;
784                         if(n->mtu >= origlen)
785                                 n->mtu = origlen - 1;
786                 } else
787                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
788         }
789
790 end:
791         origpkt->len = origlen;
792 }
793
794 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
795         node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
796         bool direct = from == myself && to == relay;
797         bool relay_supported = (relay->options >> 24) >= 4;
798         bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
799
800         /* We don't really need the relay's key, but we need to establish a UDP tunnel with it and discover its MTU. */
801         if (!direct && relay_supported && !tcponly)
802                 try_sptps(relay);
803
804         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
805            TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
806                  This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
807
808         if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
809                 char buf[len * 4 / 3 + 5];
810                 b64encode(data, buf, len);
811                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
812                    to ensure we get to learn the reflexive UDP address. */
813                 if(from == myself && !to->status.validkey) {
814                         to->incompression = myself->incompression;
815                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
816                 } else {
817                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
818                 }
819         }
820
821         size_t overhead = 0;
822         if(relay_supported) overhead += sizeof to->id + sizeof from->id;
823         char buf[len + overhead]; char* buf_ptr = buf;
824         if(relay_supported) {
825                 if(direct) {
826                         /* Inform the recipient that this packet was sent directly. */
827                         node_id_t nullid = {};
828                         memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
829                 } else {
830                         memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
831                 }
832                 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
833
834         }
835         /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
836         memcpy(buf_ptr, data, len); buf_ptr += len;
837
838         const sockaddr_t *sa = NULL;
839         int sock;
840         if(relay->status.send_locally)
841                 choose_local_address(relay, &sa, &sock);
842         if(!sa)
843                 choose_udp_address(relay, &sa, &sock);
844         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
845         if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
846                 if(sockmsgsize(sockerrno)) {
847                         // Compensate for SPTPS overhead
848                         len -= SPTPS_DATAGRAM_OVERHEAD;
849                         if(relay->maxmtu >= len)
850                                 relay->maxmtu = len - 1;
851                         if(relay->mtu >= len)
852                                 relay->mtu = len - 1;
853                 } else {
854                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
855                         return false;
856                 }
857         }
858
859         return true;
860 }
861
862 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
863         return send_sptps_data_priv(handle, myself, type, data, len);
864 }
865
866 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
867         node_t *from = handle;
868
869         if(type == SPTPS_HANDSHAKE) {
870                 if(!from->status.validkey) {
871                         from->status.validkey = true;
872                         from->status.waitingforkey = false;
873                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
874                 }
875                 return true;
876         }
877
878         if(len > MTU) {
879                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
880                 return false;
881         }
882
883         vpn_packet_t inpkt;
884         inpkt.offset = DEFAULT_PACKET_OFFSET;
885
886         if(type == PKT_PROBE) {
887                 inpkt.len = len;
888                 memcpy(DATA(&inpkt), data, len);
889                 mtu_probe_h(from, &inpkt, len);
890                 return true;
891         }
892
893         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
894                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
895                 return false;
896         }
897
898         /* Check if we have the headers we need */
899         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
900                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
901                 return false;
902         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
903                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
904         }
905
906         int offset = (type & PKT_MAC) ? 0 : 14;
907         if(type & PKT_COMPRESSED) {
908                 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
909                 if(ulen < 0) {
910                         return false;
911                 } else {
912                         inpkt.len = ulen + offset;
913                 }
914                 if(inpkt.len > MAXSIZE)
915                         abort();
916         } else {
917                 memcpy(DATA(&inpkt) + offset, data, len);
918                 inpkt.len = len + offset;
919         }
920
921         /* Generate the Ethernet packet type if necessary */
922         if(offset) {
923                 switch(DATA(&inpkt)[14] >> 4) {
924                         case 4:
925                                 DATA(&inpkt)[12] = 0x08;
926                                 DATA(&inpkt)[13] = 0x00;
927                                 break;
928                         case 6:
929                                 DATA(&inpkt)[12] = 0x86;
930                                 DATA(&inpkt)[13] = 0xDD;
931                                 break;
932                         default:
933                                 logger(DEBUG_TRAFFIC, LOG_ERR,
934                                                    "Unknown IP version %d while reading packet from %s (%s)",
935                                                    DATA(&inpkt)[14] >> 4, from->name, from->hostname);
936                                 return false;
937                 }
938         }
939
940         receive_packet(from, &inpkt);
941         return true;
942 }
943
944 /*
945   send a packet to the given vpn ip.
946 */
947 void send_packet(node_t *n, vpn_packet_t *packet) {
948         node_t *via;
949
950         if(n == myself) {
951                 if(overwrite_mac)
952                          memcpy(DATA(packet), mymac.x, ETH_ALEN);
953                 n->out_packets++;
954                 n->out_bytes += packet->len;
955                 devops.write(packet);
956                 return;
957         }
958
959         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
960                            packet->len, n->name, n->hostname);
961
962         if(!n->status.reachable) {
963                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
964                                    n->name, n->hostname);
965                 return;
966         }
967
968         n->out_packets++;
969         n->out_bytes += packet->len;
970
971         if(n->status.sptps) {
972                 send_sptps_packet(n, packet);
973                 return;
974         }
975
976         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
977
978         if(via != n)
979                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
980                            n->name, via->name, n->via->hostname);
981
982         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
983                 if(!send_tcppacket(via->connection, packet))
984                         terminate_connection(via->connection, true);
985         } else
986                 send_udppacket(via, packet);
987 }
988
989 /* Broadcast a packet using the minimum spanning tree */
990
991 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
992         // Always give ourself a copy of the packet.
993         if(from != myself)
994                 send_packet(myself, packet);
995
996         // In TunnelServer mode, do not forward broadcast packets.
997         // The MST might not be valid and create loops.
998         if(tunnelserver || broadcast_mode == BMODE_NONE)
999                 return;
1000
1001         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1002                            packet->len, from->name, from->hostname);
1003
1004         switch(broadcast_mode) {
1005                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1006                 // This guarantees all nodes receive the broadcast packet, and
1007                 // usually distributes the sending of broadcast packets over all nodes.
1008                 case BMODE_MST:
1009                         for list_each(connection_t, c, connection_list)
1010                                 if(c->edge && c->status.mst && c != from->nexthop->connection)
1011                                         send_packet(c->node, packet);
1012                         break;
1013
1014                 // In direct mode, we send copies to each node we know of.
1015                 // However, this only reaches nodes that can be reached in a single hop.
1016                 // We don't have enough information to forward broadcast packets in this case.
1017                 case BMODE_DIRECT:
1018                         if(from != myself)
1019                                 break;
1020
1021                         for splay_each(node_t, n, node_tree)
1022                                 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1023                                         send_packet(n, packet);
1024                         break;
1025
1026                 default:
1027                         break;
1028         }
1029 }
1030
1031 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1032         node_t *n = NULL;
1033         bool hard = false;
1034         static time_t last_hard_try = 0;
1035
1036         for splay_each(edge_t, e, edge_weight_tree) {
1037                 if(!e->to->status.reachable || e->to == myself)
1038                         continue;
1039
1040                 if(sockaddrcmp_noport(from, &e->address)) {
1041                         if(last_hard_try == now.tv_sec)
1042                                 continue;
1043                         hard = true;
1044                 }
1045
1046                 if(!try_mac(e->to, pkt))
1047                         continue;
1048
1049                 n = e->to;
1050                 break;
1051         }
1052
1053         if(hard)
1054                 last_hard_try = now.tv_sec;
1055
1056         last_hard_try = now.tv_sec;
1057         return n;
1058 }
1059
1060 void handle_incoming_vpn_data(void *data, int flags) {
1061         listen_socket_t *ls = data;
1062         vpn_packet_t pkt;
1063         char *hostname;
1064         node_id_t nullid = {};
1065         sockaddr_t addr = {};
1066         socklen_t addrlen = sizeof addr;
1067         node_t *from, *to;
1068         bool direct = false;
1069
1070         pkt.offset = 0;
1071         int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1072
1073         if(len <= 0 || len > MAXSIZE) {
1074                 if(!sockwouldblock(sockerrno))
1075                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1076                 return;
1077         }
1078
1079         pkt.len = len;
1080
1081         sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1082
1083         // Try to figure out who sent this packet.
1084
1085         node_t *n = lookup_node_udp(&addr);
1086
1087         if(!n) {
1088                 // It might be from a 1.1 node, which might have a source ID in the packet.
1089                 pkt.offset = 2 * sizeof(node_id_t);
1090                 from = lookup_node_id(SRCID(&pkt));
1091                 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1092                         if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1093                                 n = from;
1094                         else
1095                                 goto skip_harder;
1096                 }
1097         }
1098
1099         if(!n) {
1100                 pkt.offset = 0;
1101                 n = try_harder(&addr, &pkt);
1102         }
1103
1104 skip_harder:
1105         if(!n) {
1106                 if(debug_level >= DEBUG_PROTOCOL) {
1107                         hostname = sockaddr2hostname(&addr);
1108                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1109                         free(hostname);
1110                 }
1111                 return;
1112         }
1113
1114         if(n->status.sptps) {
1115                 pkt.offset = 2 * sizeof(node_id_t);
1116
1117                 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1118                         direct = true;
1119                         from = n;
1120                         to = myself;
1121                 } else {
1122                         from = lookup_node_id(SRCID(&pkt));
1123                         to = lookup_node_id(DSTID(&pkt));
1124                 }
1125                 if(!from || !to) {
1126                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1127                         return;
1128                 }
1129
1130                 if(to != myself) {
1131                         send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1132                         return;
1133                 }
1134         } else {
1135                 direct = true;
1136                 from = n;
1137         }
1138
1139         pkt.offset = 0;
1140         if(!receive_udppacket(from, &pkt))
1141                 return;
1142
1143         n->sock = ls - listen_socket;
1144         if(direct && sockaddrcmp(&addr, &n->address))
1145                 update_node_udp(n, &addr);
1146 }
1147
1148 void handle_device_data(void *data, int flags) {
1149         vpn_packet_t packet;
1150         packet.offset = DEFAULT_PACKET_OFFSET;
1151         packet.priority = 0;
1152
1153         if(devops.read(&packet)) {
1154                 myself->in_packets++;
1155                 myself->in_bytes += packet.len;
1156                 route(myself, &packet);
1157         }
1158 }