Add UDP datagram relay support to SPTPS.
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2013 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "route.h"
46 #include "utils.h"
47 #include "xalloc.h"
48
49 int keylifetime = 0;
50 #ifdef HAVE_LZO
51 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
52 #endif
53
54 static void send_udppacket(node_t *, vpn_packet_t *);
55
56 unsigned replaywin = 16;
57 bool localdiscovery = true;
58
59 #define MAX_SEQNO 1073741824
60
61 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
62    mtuprobes ==    31: sleep pinginterval seconds
63    mtuprobes ==    32: send 1 burst, sleep pingtimeout second
64    mtuprobes ==    33: no response from other side, restart PMTU discovery process
65
66    Probes are sent in batches of at least three, with random sizes between the
67    lower and upper boundaries for the MTU thus far discovered.
68
69    After the initial discovery, a fourth packet is added to each batch with a
70    size larger than the currently known PMTU, to test if the PMTU has increased.
71
72    In case local discovery is enabled, another packet is added to each batch,
73    which will be broadcast to the local network.
74
75 */
76
77 static void send_mtu_probe_handler(void *data) {
78         node_t *n = data;
79         int timeout = 1;
80
81         n->mtuprobes++;
82
83         if(!n->status.reachable || !n->status.validkey) {
84                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
85                 n->mtuprobes = 0;
86                 return;
87         }
88
89         if(n->mtuprobes > 32) {
90                 if(!n->minmtu) {
91                         n->mtuprobes = 31;
92                         timeout = pinginterval;
93                         goto end;
94                 }
95
96                 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
97                 n->status.udp_confirmed = false;
98                 n->mtuprobes = 1;
99                 n->minmtu = 0;
100                 n->maxmtu = MTU;
101         }
102
103         if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
104                 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
105                 n->mtuprobes = 31;
106         }
107
108         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
109                 if(n->minmtu > n->maxmtu)
110                         n->minmtu = n->maxmtu;
111                 else
112                         n->maxmtu = n->minmtu;
113                 n->mtu = n->minmtu;
114                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
115                 n->mtuprobes = 31;
116         }
117
118         if(n->mtuprobes == 31) {
119                 timeout = pinginterval;
120                 goto end;
121         } else if(n->mtuprobes == 32) {
122                 timeout = pingtimeout;
123         }
124
125         for(int i = 0; i < 4 + localdiscovery; i++) {
126                 int len;
127
128                 if(i == 0) {
129                         if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
130                                 continue;
131                         len = n->maxmtu + 8;
132                 } else if(n->maxmtu <= n->minmtu) {
133                         len = n->maxmtu;
134                 } else {
135                         len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
136                 }
137
138                 if(len < 64)
139                         len = 64;
140
141                 vpn_packet_t packet;
142                 memset(packet.data, 0, 14);
143                 randomize(packet.data + 14, len - 14);
144                 packet.len = len;
145                 packet.priority = 0;
146                 n->status.send_locally = i >= 4 && n->mtuprobes <= 10 && n->prevedge;
147
148                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
149
150                 send_udppacket(n, &packet);
151         }
152
153         n->status.send_locally = false;
154         n->probe_counter = 0;
155         gettimeofday(&n->probe_time, NULL);
156
157         /* Calculate the packet loss of incoming traffic by comparing the rate of
158            packets received to the rate with which the sequence number has increased.
159          */
160
161         if(n->received > n->prev_received)
162                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
163         else
164                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
165
166         n->prev_received_seqno = n->received_seqno;
167         n->prev_received = n->received;
168
169 end:
170         timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
171 }
172
173 void send_mtu_probe(node_t *n) {
174         timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
175         send_mtu_probe_handler(n);
176 }
177
178 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
179         if(!packet->data[0]) {
180                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe request %d from %s (%s)", packet->len, n->name, n->hostname);
181
182                 /* It's a probe request, send back a reply */
183
184                 /* Type 2 probe replies were introduced in protocol 17.3 */
185                 if ((n->options >> 24) >= 3) {
186                         uint8_t* data = packet->data;
187                         *data++ = 2;
188                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
189                         struct timeval now;
190                         gettimeofday(&now, NULL);
191                         uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
192                         uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
193                         packet->len = data - packet->data;
194                 } else {
195                         /* Legacy protocol: n won't understand type 2 probe replies. */
196                         packet->data[0] = 1;
197                 }
198
199                 /* Temporarily set udp_confirmed, so that the reply is sent
200                    back exactly the way it came in. */
201
202                 bool udp_confirmed = n->status.udp_confirmed;
203                 n->status.udp_confirmed = true;
204                 send_udppacket(n, packet);
205                 n->status.udp_confirmed = udp_confirmed;
206         } else {
207                 length_t probelen = len;
208                 if (packet->data[0] == 2) {
209                         if (len < 3)
210                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) MTU probe reply from %s (%s)", n->name, n->hostname);
211                         else {
212                                 uint16_t probelen16; memcpy(&probelen16, packet->data + 1, 2); probelen = ntohs(probelen16);
213                         }
214                 }
215                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d MTU probe reply %d from %s (%s)", packet->data[0], probelen, n->name, n->hostname);
216
217                 /* It's a valid reply: now we know bidirectional communication
218                    is possible using the address and socket that the reply
219                    packet used. */
220
221                 n->status.udp_confirmed = true;
222
223                 /* If we haven't established the PMTU yet, restart the discovery process. */
224
225                 if(n->mtuprobes > 30) {
226                         if (probelen == n->maxmtu + 8) {
227                                 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
228                                 n->maxmtu = MTU;
229                                 n->mtuprobes = 10;
230                                 return;
231                         }
232
233                         if(n->minmtu)
234                                 n->mtuprobes = 30;
235                         else
236                                 n->mtuprobes = 1;
237                 }
238
239                 /* If applicable, raise the minimum supported MTU */
240
241                 if(probelen > n->maxmtu)
242                         probelen = n->maxmtu;
243                 if(n->minmtu < probelen)
244                         n->minmtu = probelen;
245
246                 /* Calculate RTT and bandwidth.
247                    The RTT is the time between the MTU probe burst was sent and the first
248                    reply is received. The bandwidth is measured using the time between the
249                    arrival of the first and third probe reply (or type 2 probe requests).
250                  */
251
252                 struct timeval now, diff;
253                 gettimeofday(&now, NULL);
254                 timersub(&now, &n->probe_time, &diff);
255
256                 struct timeval probe_timestamp = now;
257                 if (packet->data[0] == 2 && packet->len >= 11) {
258                         uint32_t sec; memcpy(&sec, packet->data + 3, 4);
259                         uint32_t usec; memcpy(&usec, packet->data + 7, 4);
260                         probe_timestamp.tv_sec = ntohl(sec);
261                         probe_timestamp.tv_usec = ntohl(usec);
262                 }
263                 
264                 n->probe_counter++;
265
266                 if(n->probe_counter == 1) {
267                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
268                         n->probe_time = probe_timestamp;
269                 } else if(n->probe_counter == 3) {
270                         struct timeval probe_timestamp_diff;
271                         timersub(&probe_timestamp, &n->probe_time, &probe_timestamp_diff);
272                         n->bandwidth = 2.0 * probelen / (probe_timestamp_diff.tv_sec + probe_timestamp_diff.tv_usec * 1e-6);
273                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
274                 }
275         }
276 }
277
278 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
279         if(level == 0) {
280                 memcpy(dest, source, len);
281                 return len;
282         } else if(level == 10) {
283 #ifdef HAVE_LZO
284                 lzo_uint lzolen = MAXSIZE;
285                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
286                 return lzolen;
287 #else
288                 return -1;
289 #endif
290         } else if(level < 10) {
291 #ifdef HAVE_ZLIB
292                 unsigned long destlen = MAXSIZE;
293                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
294                         return destlen;
295                 else
296 #endif
297                         return -1;
298         } else {
299 #ifdef HAVE_LZO
300                 lzo_uint lzolen = MAXSIZE;
301                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
302                 return lzolen;
303 #else
304                 return -1;
305 #endif
306         }
307
308         return -1;
309 }
310
311 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
312         if(level == 0) {
313                 memcpy(dest, source, len);
314                 return len;
315         } else if(level > 9) {
316 #ifdef HAVE_LZO
317                 lzo_uint lzolen = MAXSIZE;
318                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
319                         return lzolen;
320                 else
321 #endif
322                         return -1;
323         }
324 #ifdef HAVE_ZLIB
325         else {
326                 unsigned long destlen = MAXSIZE;
327                 if(uncompress(dest, &destlen, source, len) == Z_OK)
328                         return destlen;
329                 else
330                         return -1;
331         }
332 #endif
333
334         return -1;
335 }
336
337 /* VPN packet I/O */
338
339 static void receive_packet(node_t *n, vpn_packet_t *packet) {
340         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
341                            packet->len, n->name, n->hostname);
342
343         n->in_packets++;
344         n->in_bytes += packet->len;
345
346         route(n, packet);
347 }
348
349 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
350         if(n->status.sptps)
351                 return sptps_verify_datagram(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
352
353         if(!digest_active(n->indigest) || inpkt->len < sizeof inpkt->seqno + digest_length(n->indigest))
354                 return false;
355
356         return digest_verify(n->indigest, &inpkt->seqno, inpkt->len - digest_length(n->indigest), (const char *)&inpkt->seqno + inpkt->len - digest_length(n->indigest));
357 }
358
359 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
360         vpn_packet_t pkt1, pkt2;
361         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
362         int nextpkt = 0;
363         size_t outlen;
364
365         if(n->status.sptps) {
366                 if(!n->sptps.state) {
367                         if(!n->status.waitingforkey) {
368                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
369                                 send_req_key(n);
370                         } else {
371                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
372                         }
373                         return false;
374                 }
375                 return sptps_receive_data(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
376         }
377
378         if(!n->status.validkey) {
379                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
380                 return false;
381         }
382
383         /* Check packet length */
384
385         if(inpkt->len < sizeof inpkt->seqno + digest_length(n->indigest)) {
386                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
387                                         n->name, n->hostname);
388                 return false;
389         }
390
391         /* Check the message authentication code */
392
393         if(digest_active(n->indigest)) {
394                 inpkt->len -= digest_length(n->indigest);
395                 if(!digest_verify(n->indigest, &inpkt->seqno, inpkt->len, (const char *)&inpkt->seqno + inpkt->len)) {
396                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
397                         return false;
398                 }
399         }
400         /* Decrypt the packet */
401
402         if(cipher_active(n->incipher)) {
403                 vpn_packet_t *outpkt = pkt[nextpkt++];
404                 outlen = MAXSIZE;
405
406                 if(!cipher_decrypt(n->incipher, &inpkt->seqno, inpkt->len, &outpkt->seqno, &outlen, true)) {
407                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
408                         return false;
409                 }
410
411                 outpkt->len = outlen;
412                 inpkt = outpkt;
413         }
414
415         /* Check the sequence number */
416
417         inpkt->len -= sizeof inpkt->seqno;
418         uint32_t seqno;
419         memcpy(&seqno, inpkt->seqno, sizeof seqno);
420         seqno = ntohl(seqno);
421
422         if(replaywin) {
423                 if(seqno != n->received_seqno + 1) {
424                         if(seqno >= n->received_seqno + replaywin * 8) {
425                                 if(n->farfuture++ < replaywin >> 2) {
426                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
427                                                 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
428                                         return false;
429                                 }
430                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
431                                                 seqno - n->received_seqno - 1, n->name, n->hostname);
432                                 memset(n->late, 0, replaywin);
433                         } else if (seqno <= n->received_seqno) {
434                                 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
435                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
436                                                 n->name, n->hostname, seqno, n->received_seqno);
437                                         return false;
438                                 }
439                         } else {
440                                 for(int i = n->received_seqno + 1; i < seqno; i++)
441                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
442                         }
443                 }
444
445                 n->farfuture = 0;
446                 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
447         }
448
449         if(seqno > n->received_seqno)
450                 n->received_seqno = seqno;
451
452         n->received++;
453
454         if(n->received_seqno > MAX_SEQNO)
455                 regenerate_key();
456
457         /* Decompress the packet */
458
459         length_t origlen = inpkt->len;
460
461         if(n->incompression) {
462                 vpn_packet_t *outpkt = pkt[nextpkt++];
463
464                 if((outpkt->len = uncompress_packet(outpkt->data, inpkt->data, inpkt->len, n->incompression)) < 0) {
465                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
466                                                  n->name, n->hostname);
467                         return false;
468                 }
469
470                 inpkt = outpkt;
471
472                 origlen -= MTU/64 + 20;
473         }
474
475         inpkt->priority = 0;
476
477         if(!inpkt->data[12] && !inpkt->data[13])
478                 mtu_probe_h(n, inpkt, origlen);
479         else
480                 receive_packet(n, inpkt);
481         return true;
482 }
483
484 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
485         vpn_packet_t outpkt;
486
487         if(len > sizeof outpkt.data)
488                 return;
489
490         outpkt.len = len;
491         if(c->options & OPTION_TCPONLY)
492                 outpkt.priority = 0;
493         else
494                 outpkt.priority = -1;
495         memcpy(outpkt.data, buffer, len);
496
497         receive_packet(c->node, &outpkt);
498 }
499
500 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
501         if(!n->status.validkey) {
502                 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
503                 if(!n->status.waitingforkey)
504                         send_req_key(n);
505                 else if(n->last_req_key + 10 < now.tv_sec) {
506                         logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
507                         sptps_stop(&n->sptps);
508                         n->status.waitingforkey = false;
509                         send_req_key(n);
510                 }
511                 return;
512         }
513
514         uint8_t type = 0;
515         int offset = 0;
516
517         if(!(origpkt->data[12] | origpkt->data[13])) {
518                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)origpkt->data, origpkt->len);
519                 return;
520         }
521
522         if(routing_mode == RMODE_ROUTER)
523                 offset = 14;
524         else
525                 type = PKT_MAC;
526
527         if(origpkt->len < offset)
528                 return;
529
530         vpn_packet_t outpkt;
531
532         if(n->outcompression) {
533                 int len = compress_packet(outpkt.data + offset, origpkt->data + offset, origpkt->len - offset, n->outcompression);
534                 if(len < 0) {
535                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
536                 } else if(len < origpkt->len - offset) {
537                         outpkt.len = len + offset;
538                         origpkt = &outpkt;
539                         type |= PKT_COMPRESSED;
540                 }
541         }
542
543         sptps_send_record(&n->sptps, type, (char *)origpkt->data + offset, origpkt->len - offset);
544         return;
545 }
546
547 static void adapt_socket(const sockaddr_t *sa, int *sock) {
548         /* Make sure we have a suitable socket for the chosen address */
549         if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
550                 for(int i = 0; i < listen_sockets; i++) {
551                         if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
552                                 *sock = i;
553                                 break;
554                         }
555                 }
556         }
557 }
558
559 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
560         /* Latest guess */
561         *sa = &n->address;
562         *sock = n->sock;
563
564         /* If the UDP address is confirmed, use it. */
565         if(n->status.udp_confirmed)
566                 return;
567
568         /* Send every third packet to n->address; that could be set
569            to the node's reflexive UDP address discovered during key
570            exchange. */
571
572         static int x = 0;
573         if(++x >= 3) {
574                 x = 0;
575                 return;
576         }
577
578         /* Otherwise, address are found in edges to this node.
579            So we pick a random edge and a random socket. */
580
581         int i = 0;
582         int j = rand() % n->edge_tree->count;
583         edge_t *candidate = NULL;
584
585         for splay_each(edge_t, e, n->edge_tree) {
586                 if(i++ == j) {
587                         candidate = e->reverse;
588                         break;
589                 }
590         }
591
592         if(candidate) {
593                 *sa = &candidate->address;
594                 *sock = rand() % listen_sockets;
595         }
596
597         adapt_socket(*sa, sock);
598 }
599
600 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
601         *sa = NULL;
602
603         /* Pick one of the edges from this node at random, then use its local address. */
604
605         int i = 0;
606         int j = rand() % n->edge_tree->count;
607         edge_t *candidate = NULL;
608
609         for splay_each(edge_t, e, n->edge_tree) {
610                 if(i++ == j) {
611                         candidate = e;
612                         break;
613                 }
614         }
615
616         if (candidate && candidate->local_address.sa.sa_family) {
617                 *sa = &candidate->local_address;
618                 *sock = rand() % listen_sockets;
619                 adapt_socket(*sa, sock);
620         }
621 }
622
623 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
624         vpn_packet_t pkt1, pkt2;
625         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
626         vpn_packet_t *inpkt = origpkt;
627         int nextpkt = 0;
628         vpn_packet_t *outpkt;
629         int origlen = origpkt->len;
630         size_t outlen;
631 #if defined(SOL_IP) && defined(IP_TOS)
632         static int priority = 0;
633         int origpriority = origpkt->priority;
634 #endif
635
636         if(!n->status.reachable) {
637                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
638                 return;
639         }
640
641         if(n->status.sptps)
642                 return send_sptps_packet(n, origpkt);
643
644         /* Make sure we have a valid key */
645
646         if(!n->status.validkey) {
647                 logger(DEBUG_TRAFFIC, LOG_INFO,
648                                    "No valid key known yet for %s (%s), forwarding via TCP",
649                                    n->name, n->hostname);
650
651                 if(n->last_req_key + 10 <= now.tv_sec) {
652                         send_req_key(n);
653                         n->last_req_key = now.tv_sec;
654                 }
655
656                 send_tcppacket(n->nexthop->connection, origpkt);
657
658                 return;
659         }
660
661         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (inpkt->data[12] | inpkt->data[13])) {
662                 logger(DEBUG_TRAFFIC, LOG_INFO,
663                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
664                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
665
666                 if(n != n->nexthop)
667                         send_packet(n->nexthop, origpkt);
668                 else
669                         send_tcppacket(n->nexthop->connection, origpkt);
670
671                 return;
672         }
673
674         /* Compress the packet */
675
676         if(n->outcompression) {
677                 outpkt = pkt[nextpkt++];
678
679                 if((outpkt->len = compress_packet(outpkt->data, inpkt->data, inpkt->len, n->outcompression)) < 0) {
680                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
681                                    n->name, n->hostname);
682                         return;
683                 }
684
685                 inpkt = outpkt;
686         }
687
688         /* Add sequence number */
689
690         uint32_t seqno = htonl(++(n->sent_seqno));
691         memcpy(inpkt->seqno, &seqno, sizeof inpkt->seqno);
692         inpkt->len += sizeof inpkt->seqno;
693
694         /* Encrypt the packet */
695
696         if(cipher_active(n->outcipher)) {
697                 outpkt = pkt[nextpkt++];
698                 outlen = MAXSIZE;
699
700                 if(!cipher_encrypt(n->outcipher, inpkt->seqno, inpkt->len, outpkt->seqno, &outlen, true)) {
701                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
702                         goto end;
703                 }
704
705                 outpkt->len = outlen;
706                 inpkt = outpkt;
707         }
708
709         /* Add the message authentication code */
710
711         if(digest_active(n->outdigest)) {
712                 if(!digest_create(n->outdigest, inpkt->seqno, inpkt->len, inpkt->seqno + inpkt->len)) {
713                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
714                         goto end;
715                 }
716
717                 inpkt->len += digest_length(n->outdigest);
718         }
719
720         /* Send the packet */
721
722         const sockaddr_t *sa = NULL;
723         int sock;
724
725         if(n->status.send_locally)
726                 choose_local_address(n, &sa, &sock);
727         if(!sa)
728                 choose_udp_address(n, &sa, &sock);
729
730 #if defined(SOL_IP) && defined(IP_TOS)
731         if(priorityinheritance && origpriority != priority
732            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
733                 priority = origpriority;
734                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
735                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
736                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
737         }
738 #endif
739
740         if(sendto(listen_socket[sock].udp.fd, inpkt->seqno, inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
741                 if(sockmsgsize(sockerrno)) {
742                         if(n->maxmtu >= origlen)
743                                 n->maxmtu = origlen - 1;
744                         if(n->mtu >= origlen)
745                                 n->mtu = origlen - 1;
746                 } else
747                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
748         }
749
750 end:
751         origpkt->len = origlen;
752 }
753
754 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const char *data, size_t len) {
755         node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
756         bool direct = from == myself && to == relay;
757         bool relay_supported = (relay->options >> 24) >= 4;
758
759         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
760            TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
761                  This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
762
763         if(type == SPTPS_HANDSHAKE || ((myself->options | relay->options) & OPTION_TCPONLY) || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
764                 char buf[len * 4 / 3 + 5];
765                 b64encode(data, buf, len);
766                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
767                    to ensure we get to learn the reflexive UDP address. */
768                 if(from == myself && !to->status.validkey) {
769                         to->incompression = myself->incompression;
770                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
771                 } else {
772                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
773                 }
774         }
775
776         size_t overhead = 0;
777         if(relay_supported) overhead += sizeof to->id + sizeof from->id;
778         char buf[len + overhead]; char* buf_ptr = buf;
779         if(relay_supported) {
780                 if(direct) {
781                         /* Inform the recipient that this packet was sent directly. */
782                         node_id_t nullid = {0};
783                         memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
784                 } else {
785                         memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
786                 }
787                 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
788
789         }
790         /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
791         memcpy(buf_ptr, data, len); buf_ptr += len;
792
793         const sockaddr_t *sa = NULL;
794         int sock;
795         if(relay->status.send_locally)
796                 choose_local_address(relay, &sa, &sock);
797         if(!sa)
798                 choose_udp_address(relay, &sa, &sock);
799         logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
800         if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
801                 if(sockmsgsize(sockerrno)) {
802                         // Compensate for SPTPS overhead
803                         len -= SPTPS_DATAGRAM_OVERHEAD;
804                         if(relay->maxmtu >= len)
805                                 relay->maxmtu = len - 1;
806                         if(relay->mtu >= len)
807                                 relay->mtu = len - 1;
808                 } else {
809                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
810                         return false;
811                 }
812         }
813
814         return true;
815 }
816
817 bool send_sptps_data(void *handle, uint8_t type, const char *data, size_t len) {
818         return send_sptps_data_priv(handle, myself, type, data, len);
819 }
820
821 bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t len) {
822         node_t *from = handle;
823
824         if(type == SPTPS_HANDSHAKE) {
825                 if(!from->status.validkey) {
826                         from->status.validkey = true;
827                         from->status.waitingforkey = false;
828                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
829                 }
830                 return true;
831         }
832
833         if(len > MTU) {
834                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
835                 return false;
836         }
837
838         vpn_packet_t inpkt;
839
840         if(type == PKT_PROBE) {
841                 inpkt.len = len;
842                 memcpy(inpkt.data, data, len);
843                 mtu_probe_h(from, &inpkt, len);
844                 return true;
845         }
846
847         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
848                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
849                 return false;
850         }
851
852         /* Check if we have the headers we need */
853         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
854                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
855                 return false;
856         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
857                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
858         }
859
860         int offset = (type & PKT_MAC) ? 0 : 14;
861         if(type & PKT_COMPRESSED) {
862                 length_t ulen = uncompress_packet(inpkt.data + offset, (const uint8_t *)data, len, from->incompression);
863                 if(ulen < 0) {
864                         return false;
865                 } else {
866                         inpkt.len = ulen + offset;
867                 }
868                 if(inpkt.len > MAXSIZE)
869                         abort();
870         } else {
871                 memcpy(inpkt.data + offset, data, len);
872                 inpkt.len = len + offset;
873         }
874
875         /* Generate the Ethernet packet type if necessary */
876         if(offset) {
877                 switch(inpkt.data[14] >> 4) {
878                         case 4:
879                                 inpkt.data[12] = 0x08;
880                                 inpkt.data[13] = 0x00;
881                                 break;
882                         case 6:
883                                 inpkt.data[12] = 0x86;
884                                 inpkt.data[13] = 0xDD;
885                                 break;
886                         default:
887                                 logger(DEBUG_TRAFFIC, LOG_ERR,
888                                                    "Unknown IP version %d while reading packet from %s (%s)",
889                                                    inpkt.data[14] >> 4, from->name, from->hostname);
890                                 return false;
891                 }
892         }
893
894         receive_packet(from, &inpkt);
895         return true;
896 }
897
898 /*
899   send a packet to the given vpn ip.
900 */
901 void send_packet(node_t *n, vpn_packet_t *packet) {
902         node_t *via;
903
904         if(n == myself) {
905                 if(overwrite_mac)
906                          memcpy(packet->data, mymac.x, ETH_ALEN);
907                 n->out_packets++;
908                 n->out_bytes += packet->len;
909                 devops.write(packet);
910                 return;
911         }
912
913         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
914                            packet->len, n->name, n->hostname);
915
916         if(!n->status.reachable) {
917                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
918                                    n->name, n->hostname);
919                 return;
920         }
921
922         n->out_packets++;
923         n->out_bytes += packet->len;
924
925         if(n->status.sptps) {
926                 send_sptps_packet(n, packet);
927                 return;
928         }
929
930         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
931
932         if(via != n)
933                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
934                            n->name, via->name, n->via->hostname);
935
936         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
937                 if(!send_tcppacket(via->connection, packet))
938                         terminate_connection(via->connection, true);
939         } else
940                 send_udppacket(via, packet);
941 }
942
943 /* Broadcast a packet using the minimum spanning tree */
944
945 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
946         // Always give ourself a copy of the packet.
947         if(from != myself)
948                 send_packet(myself, packet);
949
950         // In TunnelServer mode, do not forward broadcast packets.
951         // The MST might not be valid and create loops.
952         if(tunnelserver || broadcast_mode == BMODE_NONE)
953                 return;
954
955         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
956                            packet->len, from->name, from->hostname);
957
958         switch(broadcast_mode) {
959                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
960                 // This guarantees all nodes receive the broadcast packet, and
961                 // usually distributes the sending of broadcast packets over all nodes.
962                 case BMODE_MST:
963                         for list_each(connection_t, c, connection_list)
964                                 if(c->edge && c->status.mst && c != from->nexthop->connection)
965                                         send_packet(c->node, packet);
966                         break;
967
968                 // In direct mode, we send copies to each node we know of.
969                 // However, this only reaches nodes that can be reached in a single hop.
970                 // We don't have enough information to forward broadcast packets in this case.
971                 case BMODE_DIRECT:
972                         if(from != myself)
973                                 break;
974
975                         for splay_each(node_t, n, node_tree)
976                                 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
977                                         send_packet(n, packet);
978                         break;
979
980                 default:
981                         break;
982         }
983 }
984
985 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
986         node_t *n = NULL;
987         bool hard = false;
988         static time_t last_hard_try = 0;
989
990         for splay_each(edge_t, e, edge_weight_tree) {
991                 if(!e->to->status.reachable || e->to == myself)
992                         continue;
993
994                 if(sockaddrcmp_noport(from, &e->address)) {
995                         if(last_hard_try == now.tv_sec)
996                                 continue;
997                         hard = true;
998                 }
999
1000                 if(!try_mac(e->to, pkt))
1001                         continue;
1002
1003                 n = e->to;
1004                 break;
1005         }
1006
1007         if(hard)
1008                 last_hard_try = now.tv_sec;
1009
1010         last_hard_try = now.tv_sec;
1011         return n;
1012 }
1013
1014 void handle_incoming_vpn_data(void *data, int flags) {
1015         listen_socket_t *ls = data;
1016         vpn_packet_t pkt;
1017         char *hostname;
1018         sockaddr_t from = {{0}};
1019         socklen_t fromlen = sizeof from;
1020         node_t *n = NULL;
1021         node_t *to = myself;
1022         int len;
1023
1024         len = recvfrom(ls->udp.fd, &pkt.dstid, MAXSIZE, 0, &from.sa, &fromlen);
1025
1026         if(len <= 0 || len > MAXSIZE) {
1027                 if(!sockwouldblock(sockerrno))
1028                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1029                 return;
1030         }
1031
1032         pkt.len = len;
1033
1034         sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
1035
1036         bool direct = false;
1037         if(len >= sizeof pkt.dstid + sizeof pkt.srcid) {
1038                 n = lookup_node_id(&pkt.srcid);
1039                 if(n) {
1040                         node_id_t nullid = {0};
1041                         if(memcmp(&pkt.dstid, &nullid, sizeof nullid) == 0) {
1042                                 /* A zero dstid is used to indicate a direct, non-relayed packet. */
1043                                 direct = true;
1044                         } else {
1045                                 to = lookup_node_id(&pkt.dstid);
1046                                 if(!to) {
1047                                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet presumably sent by %s (%s) but with unknown destination ID", n->name, n->hostname);
1048                                         return;
1049                                 }
1050                         }
1051                         pkt.len -= sizeof pkt.dstid + sizeof pkt.srcid;
1052                 }
1053         }
1054
1055         if(to != myself) {
1056                 /* We are being asked to relay this packet. */
1057
1058                 /* Don't allow random strangers to relay through us. Note that we check for *any* known address since we are not necessarily the first relay. */
1059                 if (!lookup_node_udp(&from)) {
1060                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Refusing to relay packet from (presumably) %s (%s) to (presumably) %s (%s) because the packet comes from an unknown address", n->name, n->hostname, to->name, to->hostname);
1061                         return;
1062                 }
1063
1064                 send_sptps_data_priv(to, n, 0, pkt.seqno, pkt.len);
1065                 return;
1066         }
1067
1068         if(!n) {
1069                 /* Most likely an old-style packet without node IDs. */
1070                 direct = true;
1071                 memmove(pkt.seqno, &pkt.dstid, sizeof pkt - offsetof(vpn_packet_t, seqno));
1072                 n = lookup_node_udp(&from);
1073         }
1074
1075         if(!n)
1076                 n = try_harder(&from, &pkt);
1077
1078         if(!n) {
1079                 if(debug_level >= DEBUG_PROTOCOL) {
1080                         hostname = sockaddr2hostname(&from);
1081                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1082                         free(hostname);
1083                 }
1084                 return;
1085         }
1086
1087         if(!receive_udppacket(n, &pkt))
1088                 return;
1089
1090         n->sock = ls - listen_socket;
1091         if(direct && sockaddrcmp(&from, &n->address))
1092                 update_node_udp(n, &from);
1093 }
1094
1095 void handle_device_data(void *data, int flags) {
1096         vpn_packet_t packet;
1097
1098         packet.priority = 0;
1099
1100         if(devops.read(&packet)) {
1101                 myself->in_packets++;
1102                 myself->in_bytes += packet.len;
1103                 route(myself, &packet);
1104         }
1105 }