Use edge local addresses for local discovery.
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2013 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "route.h"
46 #include "utils.h"
47 #include "xalloc.h"
48
49 int keylifetime = 0;
50 #ifdef HAVE_LZO
51 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
52 #endif
53
54 static void send_udppacket(node_t *, vpn_packet_t *);
55
56 unsigned replaywin = 16;
57 bool localdiscovery = false;
58 sockaddr_t localdiscovery_address;
59
60 #define MAX_SEQNO 1073741824
61
62 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
63    mtuprobes ==    31: sleep pinginterval seconds
64    mtuprobes ==    32: send 1 burst, sleep pingtimeout second
65    mtuprobes ==    33: no response from other side, restart PMTU discovery process
66
67    Probes are sent in batches of at least three, with random sizes between the
68    lower and upper boundaries for the MTU thus far discovered.
69
70    After the initial discovery, a fourth packet is added to each batch with a
71    size larger than the currently known PMTU, to test if the PMTU has increased.
72
73    In case local discovery is enabled, another packet is added to each batch,
74    which will be broadcast to the local network.
75
76 */
77
78 static void send_mtu_probe_handler(void *data) {
79         node_t *n = data;
80         int timeout = 1;
81
82         n->mtuprobes++;
83
84         if(!n->status.reachable || !n->status.validkey) {
85                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
86                 n->mtuprobes = 0;
87                 return;
88         }
89
90         if(n->mtuprobes > 32) {
91                 if(!n->minmtu) {
92                         n->mtuprobes = 31;
93                         timeout = pinginterval;
94                         goto end;
95                 }
96
97                 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
98                 n->status.udp_confirmed = false;
99                 n->mtuprobes = 1;
100                 n->minmtu = 0;
101                 n->maxmtu = MTU;
102         }
103
104         if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
105                 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
106                 n->mtuprobes = 31;
107         }
108
109         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
110                 if(n->minmtu > n->maxmtu)
111                         n->minmtu = n->maxmtu;
112                 else
113                         n->maxmtu = n->minmtu;
114                 n->mtu = n->minmtu;
115                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
116                 n->mtuprobes = 31;
117         }
118
119         if(n->mtuprobes == 31) {
120                 timeout = pinginterval;
121                 goto end;
122         } else if(n->mtuprobes == 32) {
123                 timeout = pingtimeout;
124         }
125
126         for(int i = 0; i < 4 + localdiscovery; i++) {
127                 int len;
128
129                 if(i == 0) {
130                         if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
131                                 continue;
132                         len = n->maxmtu + 8;
133                 } else if(n->maxmtu <= n->minmtu) {
134                         len = n->maxmtu;
135                 } else {
136                         len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
137                 }
138
139                 if(len < 64)
140                         len = 64;
141
142                 vpn_packet_t packet;
143                 memset(packet.data, 0, 14);
144                 randomize(packet.data + 14, len - 14);
145                 packet.len = len;
146                 packet.priority = 0;
147                 n->status.send_locally = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
148
149                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
150
151                 send_udppacket(n, &packet);
152         }
153
154         n->status.send_locally = false;
155         n->probe_counter = 0;
156         gettimeofday(&n->probe_time, NULL);
157
158         /* Calculate the packet loss of incoming traffic by comparing the rate of
159            packets received to the rate with which the sequence number has increased.
160          */
161
162         if(n->received > n->prev_received)
163                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
164         else
165                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
166
167         n->prev_received_seqno = n->received_seqno;
168         n->prev_received = n->received;
169
170 end:
171         timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
172 }
173
174 void send_mtu_probe(node_t *n) {
175         timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
176         send_mtu_probe_handler(n);
177 }
178
179 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
180         if(!packet->data[0]) {
181                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe request %d from %s (%s)", packet->len, n->name, n->hostname);
182
183                 /* It's a probe request, send back a reply */
184
185                 /* Type 2 probe replies were introduced in protocol 17.3 */
186                 if ((n->options >> 24) == 3) {
187                         uint8_t* data = packet->data;
188                         *data++ = 2;
189                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
190                         struct timeval now;
191                         gettimeofday(&now, NULL);
192                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
193                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
194                         packet->len = data - packet->data;
195                 } else {
196                         /* Legacy protocol: n won't understand type 2 probe replies. */
197                         packet->data[0] = 1;
198                 }
199
200                 /* Temporarily set udp_confirmed, so that the reply is sent
201                    back exactly the way it came in. */
202
203                 bool udp_confirmed = n->status.udp_confirmed;
204                 n->status.udp_confirmed = true;
205                 send_udppacket(n, packet);
206                 n->status.udp_confirmed = udp_confirmed;
207         } else {
208                 length_t probelen = len;
209                 if (packet->data[0] == 2) {
210                         if (len < 3)
211                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) MTU probe reply from %s (%s)", n->name, n->hostname);
212                         else {
213                                 uint16_t probelen16; memcpy(&probelen16, packet->data + 1, 2); probelen = ntohs(probelen16);
214                         }
215                 }
216                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d MTU probe reply %d from %s (%s)", packet->data[0], probelen, n->name, n->hostname);
217
218                 /* It's a valid reply: now we know bidirectional communication
219                    is possible using the address and socket that the reply
220                    packet used. */
221
222                 n->status.udp_confirmed = true;
223
224                 /* If we haven't established the PMTU yet, restart the discovery process. */
225
226                 if(n->mtuprobes > 30) {
227                         if (probelen == n->maxmtu + 8) {
228                                 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
229                                 n->maxmtu = MTU;
230                                 n->mtuprobes = 10;
231                                 return;
232                         }
233
234                         if(n->minmtu)
235                                 n->mtuprobes = 30;
236                         else
237                                 n->mtuprobes = 1;
238                 }
239
240                 /* If applicable, raise the minimum supported MTU */
241
242                 if(probelen > n->maxmtu)
243                         probelen = n->maxmtu;
244                 if(n->minmtu < probelen)
245                         n->minmtu = probelen;
246
247                 /* Calculate RTT and bandwidth.
248                    The RTT is the time between the MTU probe burst was sent and the first
249                    reply is received. The bandwidth is measured using the time between the
250                    arrival of the first and third probe reply (or type 2 probe requests).
251                  */
252
253                 struct timeval now, diff;
254                 gettimeofday(&now, NULL);
255                 timersub(&now, &n->probe_time, &diff);
256
257                 struct timeval probe_timestamp = now;
258                 if (packet->data[0] == 2 && packet->len >= 11) {
259                         uint32_t sec; memcpy(&sec, packet->data + 3, 4);
260                         uint32_t usec; memcpy(&usec, packet->data + 7, 4);
261                         probe_timestamp.tv_sec = ntohl(sec);
262                         probe_timestamp.tv_usec = ntohl(usec);
263                 }
264                 
265                 n->probe_counter++;
266
267                 if(n->probe_counter == 1) {
268                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
269                         n->probe_time = probe_timestamp;
270                 } else if(n->probe_counter == 3) {
271                         struct timeval probe_timestamp_diff;
272                         timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
273                         n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
274                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
275                 }
276         }
277 }
278
279 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
280         if(level == 0) {
281                 memcpy(dest, source, len);
282                 return len;
283         } else if(level == 10) {
284 #ifdef HAVE_LZO
285                 lzo_uint lzolen = MAXSIZE;
286                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
287                 return lzolen;
288 #else
289                 return -1;
290 #endif
291         } else if(level < 10) {
292 #ifdef HAVE_ZLIB
293                 unsigned long destlen = MAXSIZE;
294                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
295                         return destlen;
296                 else
297 #endif
298                         return -1;
299         } else {
300 #ifdef HAVE_LZO
301                 lzo_uint lzolen = MAXSIZE;
302                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
303                 return lzolen;
304 #else
305                 return -1;
306 #endif
307         }
308
309         return -1;
310 }
311
312 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
313         if(level == 0) {
314                 memcpy(dest, source, len);
315                 return len;
316         } else if(level > 9) {
317 #ifdef HAVE_LZO
318                 lzo_uint lzolen = MAXSIZE;
319                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
320                         return lzolen;
321                 else
322 #endif
323                         return -1;
324         }
325 #ifdef HAVE_ZLIB
326         else {
327                 unsigned long destlen = MAXSIZE;
328                 if(uncompress(dest, &destlen, source, len) == Z_OK)
329                         return destlen;
330                 else
331                         return -1;
332         }
333 #endif
334
335         return -1;
336 }
337
338 /* VPN packet I/O */
339
340 static void receive_packet(node_t *n, vpn_packet_t *packet) {
341         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
342                            packet->len, n->name, n->hostname);
343
344         n->in_packets++;
345         n->in_bytes += packet->len;
346
347         route(n, packet);
348 }
349
350 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
351         if(n->status.sptps)
352                 return sptps_verify_datagram(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
353
354         if(!digest_active(n->indigest) || inpkt->len < sizeof inpkt->seqno + digest_length(n->indigest))
355                 return false;
356
357         return digest_verify(n->indigest, &inpkt->seqno, inpkt->len - digest_length(n->indigest), (const char *)&inpkt->seqno + inpkt->len - digest_length(n->indigest));
358 }
359
360 static void receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
361         vpn_packet_t pkt1, pkt2;
362         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
363         int nextpkt = 0;
364         vpn_packet_t *outpkt = pkt[0];
365         size_t outlen;
366
367         if(n->status.sptps) {
368                 if(!n->sptps.state) {
369                         if(!n->status.waitingforkey) {
370                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
371                                 send_req_key(n);
372                         } else {
373                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
374                         }
375                         return;
376                 }
377                 sptps_receive_data(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
378                 return;
379         }
380
381         if(!n->status.validkey) {
382                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
383                 return;
384         }
385
386         /* Check packet length */
387
388         if(inpkt->len < sizeof inpkt->seqno + digest_length(n->indigest)) {
389                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
390                                         n->name, n->hostname);
391                 return;
392         }
393
394         /* Check the message authentication code */
395
396         if(digest_active(n->indigest)) {
397                 inpkt->len -= digest_length(n->indigest);
398                 if(!digest_verify(n->indigest, &inpkt->seqno, inpkt->len, (const char *)&inpkt->seqno + inpkt->len)) {
399                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
400                         return;
401                 }
402         }
403         /* Decrypt the packet */
404
405         if(cipher_active(n->incipher)) {
406                 outpkt = pkt[nextpkt++];
407                 outlen = MAXSIZE;
408
409                 if(!cipher_decrypt(n->incipher, &inpkt->seqno, inpkt->len, &outpkt->seqno, &outlen, true)) {
410                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
411                         return;
412                 }
413
414                 outpkt->len = outlen;
415                 inpkt = outpkt;
416         }
417
418         /* Check the sequence number */
419
420         inpkt->len -= sizeof inpkt->seqno;
421         inpkt->seqno = ntohl(inpkt->seqno);
422
423         if(replaywin) {
424                 if(inpkt->seqno != n->received_seqno + 1) {
425                         if(inpkt->seqno >= n->received_seqno + replaywin * 8) {
426                                 if(n->farfuture++ < replaywin >> 2) {
427                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
428                                                 n->name, n->hostname, inpkt->seqno - n->received_seqno - 1, n->farfuture);
429                                         return;
430                                 }
431                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
432                                                 inpkt->seqno - n->received_seqno - 1, n->name, n->hostname);
433                                 memset(n->late, 0, replaywin);
434                         } else if (inpkt->seqno <= n->received_seqno) {
435                                 if((n->received_seqno >= replaywin * 8 && inpkt->seqno <= n->received_seqno - replaywin * 8) || !(n->late[(inpkt->seqno / 8) % replaywin] & (1 << inpkt->seqno % 8))) {
436                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
437                                                 n->name, n->hostname, inpkt->seqno, n->received_seqno);
438                                         return;
439                                 }
440                         } else {
441                                 for(int i = n->received_seqno + 1; i < inpkt->seqno; i++)
442                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
443                         }
444                 }
445
446                 n->farfuture = 0;
447                 n->late[(inpkt->seqno / 8) % replaywin] &= ~(1 << inpkt->seqno % 8);
448         }
449
450         if(inpkt->seqno > n->received_seqno)
451                 n->received_seqno = inpkt->seqno;
452
453         n->received++;
454
455         if(n->received_seqno > MAX_SEQNO)
456                 regenerate_key();
457
458         /* Decompress the packet */
459
460         length_t origlen = inpkt->len;
461
462         if(n->incompression) {
463                 outpkt = pkt[nextpkt++];
464
465                 if((outpkt->len = uncompress_packet(outpkt->data, inpkt->data, inpkt->len, n->incompression)) < 0) {
466                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
467                                                  n->name, n->hostname);
468                         return;
469                 }
470
471                 inpkt = outpkt;
472
473                 origlen -= MTU/64 + 20;
474         }
475
476         inpkt->priority = 0;
477
478         if(!inpkt->data[12] && !inpkt->data[13])
479                 mtu_probe_h(n, inpkt, origlen);
480         else
481                 receive_packet(n, inpkt);
482 }
483
484 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
485         vpn_packet_t outpkt;
486
487         if(len > sizeof outpkt.data)
488                 return;
489
490         outpkt.len = len;
491         if(c->options & OPTION_TCPONLY)
492                 outpkt.priority = 0;
493         else
494                 outpkt.priority = -1;
495         memcpy(outpkt.data, buffer, len);
496
497         receive_packet(c->node, &outpkt);
498 }
499
500 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
501         if(!n->status.validkey) {
502                 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
503                 if(!n->status.waitingforkey)
504                         send_req_key(n);
505                 else if(n->last_req_key + 10 < now.tv_sec) {
506                         logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
507                         sptps_stop(&n->sptps);
508                         n->status.waitingforkey = false;
509                         send_req_key(n);
510                 }
511                 return;
512         }
513
514         uint8_t type = 0;
515         int offset = 0;
516
517         if(!(origpkt->data[12] | origpkt->data[13])) {
518                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)origpkt->data, origpkt->len);
519                 return;
520         }
521
522         if(routing_mode == RMODE_ROUTER)
523                 offset = 14;
524         else
525                 type = PKT_MAC;
526
527         if(origpkt->len < offset)
528                 return;
529
530         vpn_packet_t outpkt;
531
532         if(n->outcompression) {
533                 int len = compress_packet(outpkt.data + offset, origpkt->data + offset, origpkt->len - offset, n->outcompression);
534                 if(len < 0) {
535                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
536                 } else if(len < origpkt->len - offset) {
537                         outpkt.len = len + offset;
538                         origpkt = &outpkt;
539                         type |= PKT_COMPRESSED;
540                 }
541         }
542
543         sptps_send_record(&n->sptps, type, (char *)origpkt->data + offset, origpkt->len - offset);
544         return;
545 }
546
547 static void adapt_socket(const sockaddr_t *sa, int *sock) {
548         /* Make sure we have a suitable socket for the chosen address */
549         if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
550                 for(int i = 0; i < listen_sockets; i++) {
551                         if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
552                                 *sock = i;
553                                 break;
554                         }
555                 }
556         }
557 }
558
559 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
560         /* Latest guess */
561         *sa = &n->address;
562         *sock = n->sock;
563
564         /* If the UDP address is confirmed, use it. */
565         if(n->status.udp_confirmed)
566                 return;
567
568         /* Send every third packet to n->address; that could be set
569            to the node's reflexive UDP address discovered during key
570            exchange. */
571
572         static int x = 0;
573         if(++x >= 3) {
574                 x = 0;
575                 return;
576         }
577
578         /* Otherwise, address are found in edges to this node.
579            So we pick a random edge and a random socket. */
580
581         int i = 0;
582         int j = rand() % n->edge_tree->count;
583         edge_t *candidate = NULL;
584
585         for splay_each(edge_t, e, n->edge_tree) {
586                 if(i++ == j) {
587                         candidate = e->reverse;
588                         break;
589                 }
590         }
591
592         if(candidate) {
593                 *sa = &candidate->address;
594                 *sock = rand() % listen_sockets;
595         }
596
597         adapt_socket(*sa, sock);
598 }
599
600 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
601         /* Pick one of the edges from this node at random, then use its local address. */
602
603         int i = 0;
604         int j = rand() % n->edge_tree->count;
605         edge_t *candidate = NULL;
606
607         for splay_each(edge_t, e, n->edge_tree) {
608                 if(i++ == j) {
609                         candidate = e;
610                         break;
611                 }
612         }
613
614         if (candidate && candidate->local_address.sa.sa_family) {
615                 *sa = &candidate->local_address;
616                 *sock = rand() % listen_sockets;
617                 adapt_socket(*sa, sock);
618                 return;
619         }
620
621         /* No candidate? Use broadcasts instead. */
622
623         static sockaddr_t broadcast_ipv4 = {
624                 .in = {
625                         .sin_family = AF_INET,
626                         .sin_addr.s_addr = -1,
627                 }
628         };
629
630         static sockaddr_t broadcast_ipv6 = {
631                 .in6 = {
632                         .sin6_family = AF_INET6,
633                         .sin6_addr.s6_addr[0x0] = 0xff,
634                         .sin6_addr.s6_addr[0x1] = 0x02,
635                         .sin6_addr.s6_addr[0xf] = 0x01,
636                 }
637         };
638
639         *sock = rand() % listen_sockets;
640
641         if(listen_socket[*sock].sa.sa.sa_family == AF_INET6) {
642                 if(localdiscovery_address.sa.sa_family == AF_INET6) {
643                         localdiscovery_address.in6.sin6_port = n->prevedge->address.in.sin_port;
644                         *sa = &localdiscovery_address;
645                 } else {
646                         broadcast_ipv6.in6.sin6_port = n->prevedge->address.in.sin_port;
647                         broadcast_ipv6.in6.sin6_scope_id = listen_socket[*sock].sa.in6.sin6_scope_id;
648                         *sa = &broadcast_ipv6;
649                 }
650         } else {
651                 if(localdiscovery_address.sa.sa_family == AF_INET) {
652                         localdiscovery_address.in.sin_port = n->prevedge->address.in.sin_port;
653                         *sa = &localdiscovery_address;
654                 } else {
655                         broadcast_ipv4.in.sin_port = n->prevedge->address.in.sin_port;
656                         *sa = &broadcast_ipv4;
657                 }
658         }
659 }
660
661 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
662         vpn_packet_t pkt1, pkt2;
663         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
664         vpn_packet_t *inpkt = origpkt;
665         int nextpkt = 0;
666         vpn_packet_t *outpkt;
667         int origlen = origpkt->len;
668         size_t outlen;
669 #if defined(SOL_IP) && defined(IP_TOS)
670         static int priority = 0;
671 #endif
672         int origpriority = origpkt->priority;
673
674         if(!n->status.reachable) {
675                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
676                 return;
677         }
678
679         if(n->status.sptps)
680                 return send_sptps_packet(n, origpkt);
681
682         /* Make sure we have a valid key */
683
684         if(!n->status.validkey) {
685                 logger(DEBUG_TRAFFIC, LOG_INFO,
686                                    "No valid key known yet for %s (%s), forwarding via TCP",
687                                    n->name, n->hostname);
688
689                 if(n->last_req_key + 10 <= now.tv_sec) {
690                         send_req_key(n);
691                         n->last_req_key = now.tv_sec;
692                 }
693
694                 send_tcppacket(n->nexthop->connection, origpkt);
695
696                 return;
697         }
698
699         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (inpkt->data[12] | inpkt->data[13])) {
700                 logger(DEBUG_TRAFFIC, LOG_INFO,
701                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
702                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
703
704                 if(n != n->nexthop)
705                         send_packet(n->nexthop, origpkt);
706                 else
707                         send_tcppacket(n->nexthop->connection, origpkt);
708
709                 return;
710         }
711
712         /* Compress the packet */
713
714         if(n->outcompression) {
715                 outpkt = pkt[nextpkt++];
716
717                 if((outpkt->len = compress_packet(outpkt->data, inpkt->data, inpkt->len, n->outcompression)) < 0) {
718                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
719                                    n->name, n->hostname);
720                         return;
721                 }
722
723                 inpkt = outpkt;
724         }
725
726         /* Add sequence number */
727
728         inpkt->seqno = htonl(++(n->sent_seqno));
729         inpkt->len += sizeof inpkt->seqno;
730
731         /* Encrypt the packet */
732
733         if(cipher_active(n->outcipher)) {
734                 outpkt = pkt[nextpkt++];
735                 outlen = MAXSIZE;
736
737                 if(!cipher_encrypt(n->outcipher, &inpkt->seqno, inpkt->len, &outpkt->seqno, &outlen, true)) {
738                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
739                         goto end;
740                 }
741
742                 outpkt->len = outlen;
743                 inpkt = outpkt;
744         }
745
746         /* Add the message authentication code */
747
748         if(digest_active(n->outdigest)) {
749                 if(!digest_create(n->outdigest, &inpkt->seqno, inpkt->len, (char *)&inpkt->seqno + inpkt->len)) {
750                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
751                         goto end;
752                 }
753
754                 inpkt->len += digest_length(n->outdigest);
755         }
756
757         /* Send the packet */
758
759         const sockaddr_t *sa;
760         int sock;
761
762         if(n->status.send_locally)
763                 choose_local_address(n, &sa, &sock);
764         else
765                 choose_udp_address(n, &sa, &sock);
766
767 #if defined(SOL_IP) && defined(IP_TOS)
768         if(priorityinheritance && origpriority != priority
769            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
770                 priority = origpriority;
771                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
772                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
773                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
774         }
775 #endif
776
777         if(sendto(listen_socket[sock].udp.fd, (char *) &inpkt->seqno, inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
778                 if(sockmsgsize(sockerrno)) {
779                         if(n->maxmtu >= origlen)
780                                 n->maxmtu = origlen - 1;
781                         if(n->mtu >= origlen)
782                                 n->mtu = origlen - 1;
783                 } else
784                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
785         }
786
787 end:
788         origpkt->len = origlen;
789 }
790
791 bool send_sptps_data(void *handle, uint8_t type, const char *data, size_t len) {
792         node_t *to = handle;
793
794         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, or this packet is larger than the MTU. */
795
796         if(type >= SPTPS_HANDSHAKE || ((myself->options | to->options) & OPTION_TCPONLY) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > to->minmtu)) {
797                 char buf[len * 4 / 3 + 5];
798                 b64encode(data, buf, len);
799                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
800                    to ensure we get to learn the reflexive UDP address. */
801                 if(!to->status.validkey) {
802                         to->incompression = myself->incompression;
803                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, myself->name, to->name, buf, to->incompression);
804                 } else {
805                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, myself->name, to->name, REQ_SPTPS, buf);
806                 }
807         }
808
809         /* Otherwise, send the packet via UDP */
810
811         const sockaddr_t *sa;
812         int sock;
813
814         if(to->status.send_locally)
815                 choose_local_address(to, &sa, &sock);
816         else
817                 choose_udp_address(to, &sa, &sock);
818
819         if(sendto(listen_socket[sock].udp.fd, data, len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
820                 if(sockmsgsize(sockerrno)) {
821                         // Compensate for SPTPS overhead
822                         len -= SPTPS_DATAGRAM_OVERHEAD;
823                         if(to->maxmtu >= len)
824                                 to->maxmtu = len - 1;
825                         if(to->mtu >= len)
826                                 to->mtu = len - 1;
827                 } else {
828                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", to->name, to->hostname, sockstrerror(sockerrno));
829                         return false;
830                 }
831         }
832
833         return true;
834 }
835
836 bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t len) {
837         node_t *from = handle;
838
839         if(type == SPTPS_HANDSHAKE) {
840                 if(!from->status.validkey) {
841                         from->status.validkey = true;
842                         from->status.waitingforkey = false;
843                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
844                 }
845                 return true;
846         }
847
848         if(len > MTU) {
849                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
850                 return false;
851         }
852
853         vpn_packet_t inpkt;
854
855         if(type == PKT_PROBE) {
856                 inpkt.len = len;
857                 memcpy(inpkt.data, data, len);
858                 mtu_probe_h(from, &inpkt, len);
859                 return true;
860         }
861
862         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
863                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
864                 return false;
865         }
866
867         /* Check if we have the headers we need */
868         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
869                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
870                 return false;
871         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
872                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
873         }
874
875         int offset = (type & PKT_MAC) ? 0 : 14;
876         if(type & PKT_COMPRESSED) {
877                 length_t ulen = uncompress_packet(inpkt.data + offset, (const uint8_t *)data, len, from->incompression);
878                 if(ulen < 0) {
879                         return false;
880                 } else {
881                         inpkt.len = ulen + offset;
882                 }
883                 if(inpkt.len > MAXSIZE)
884                         abort();
885         } else {
886                 memcpy(inpkt.data + offset, data, len);
887                 inpkt.len = len + offset;
888         }
889
890         /* Generate the Ethernet packet type if necessary */
891         if(offset) {
892                 switch(inpkt.data[14] >> 4) {
893                         case 4:
894                                 inpkt.data[12] = 0x08;
895                                 inpkt.data[13] = 0x00;
896                                 break;
897                         case 6:
898                                 inpkt.data[12] = 0x86;
899                                 inpkt.data[13] = 0xDD;
900                                 break;
901                         default:
902                                 logger(DEBUG_TRAFFIC, LOG_ERR,
903                                                    "Unknown IP version %d while reading packet from %s (%s)",
904                                                    inpkt.data[14] >> 4, from->name, from->hostname);
905                                 return false;
906                 }
907         }
908
909         receive_packet(from, &inpkt);
910         return true;
911 }
912
913 /*
914   send a packet to the given vpn ip.
915 */
916 void send_packet(node_t *n, vpn_packet_t *packet) {
917         node_t *via;
918
919         if(n == myself) {
920                 if(overwrite_mac)
921                          memcpy(packet->data, mymac.x, ETH_ALEN);
922                 n->out_packets++;
923                 n->out_bytes += packet->len;
924                 devops.write(packet);
925                 return;
926         }
927
928         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
929                            packet->len, n->name, n->hostname);
930
931         if(!n->status.reachable) {
932                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
933                                    n->name, n->hostname);
934                 return;
935         }
936
937         n->out_packets++;
938         n->out_bytes += packet->len;
939
940         if(n->status.sptps) {
941                 send_sptps_packet(n, packet);
942                 return;
943         }
944
945         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
946
947         if(via != n)
948                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
949                            n->name, via->name, n->via->hostname);
950
951         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
952                 if(!send_tcppacket(via->connection, packet))
953                         terminate_connection(via->connection, true);
954         } else
955                 send_udppacket(via, packet);
956 }
957
958 /* Broadcast a packet using the minimum spanning tree */
959
960 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
961         // Always give ourself a copy of the packet.
962         if(from != myself)
963                 send_packet(myself, packet);
964
965         // In TunnelServer mode, do not forward broadcast packets.
966         // The MST might not be valid and create loops.
967         if(tunnelserver || broadcast_mode == BMODE_NONE)
968                 return;
969
970         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
971                            packet->len, from->name, from->hostname);
972
973         switch(broadcast_mode) {
974                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
975                 // This guarantees all nodes receive the broadcast packet, and
976                 // usually distributes the sending of broadcast packets over all nodes.
977                 case BMODE_MST:
978                         for list_each(connection_t, c, connection_list)
979                                 if(c->status.active && c->status.mst && c != from->nexthop->connection)
980                                         send_packet(c->node, packet);
981                         break;
982
983                 // In direct mode, we send copies to each node we know of.
984                 // However, this only reaches nodes that can be reached in a single hop.
985                 // We don't have enough information to forward broadcast packets in this case.
986                 case BMODE_DIRECT:
987                         if(from != myself)
988                                 break;
989
990                         for splay_each(node_t, n, node_tree)
991                                 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
992                                         send_packet(n, packet);
993                         break;
994
995                 default:
996                         break;
997         }
998 }
999
1000 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1001         node_t *n = NULL;
1002         bool hard = false;
1003         static time_t last_hard_try = 0;
1004
1005         for splay_each(edge_t, e, edge_weight_tree) {
1006                 if(!e->to->status.reachable || e->to == myself)
1007                         continue;
1008
1009                 if(sockaddrcmp_noport(from, &e->address)) {
1010                         if(last_hard_try == now.tv_sec)
1011                                 continue;
1012                         hard = true;
1013                 }
1014
1015                 if(!try_mac(e->to, pkt))
1016                         continue;
1017
1018                 n = e->to;
1019                 break;
1020         }
1021
1022         if(hard)
1023                 last_hard_try = now.tv_sec;
1024
1025         last_hard_try = now.tv_sec;
1026         return n;
1027 }
1028
1029 void handle_incoming_vpn_data(void *data, int flags) {
1030         listen_socket_t *ls = data;
1031         vpn_packet_t pkt;
1032         char *hostname;
1033         sockaddr_t from = {{0}};
1034         socklen_t fromlen = sizeof from;
1035         node_t *n;
1036         int len;
1037
1038         len = recvfrom(ls->udp.fd, (char *) &pkt.seqno, MAXSIZE, 0, &from.sa, &fromlen);
1039
1040         if(len <= 0 || len > MAXSIZE) {
1041                 if(!sockwouldblock(sockerrno))
1042                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1043                 return;
1044         }
1045
1046         pkt.len = len;
1047
1048         sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
1049
1050         n = lookup_node_udp(&from);
1051
1052         if(!n) {
1053                 n = try_harder(&from, &pkt);
1054                 if(n)
1055                         update_node_udp(n, &from);
1056                 else if(debug_level >= DEBUG_PROTOCOL) {
1057                         hostname = sockaddr2hostname(&from);
1058                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1059                         free(hostname);
1060                         return;
1061                 }
1062                 else
1063                         return;
1064         }
1065
1066         n->sock = ls - listen_socket;
1067
1068         receive_udppacket(n, &pkt);
1069 }
1070
1071 void handle_device_data(void *data, int flags) {
1072         vpn_packet_t packet;
1073
1074         packet.priority = 0;
1075
1076         if(devops.read(&packet)) {
1077                 myself->in_packets++;
1078                 myself->in_bytes += packet.len;
1079                 route(myself, &packet);
1080         }
1081 }