Make sure PMTU discovery works in switch mode with VLAN tags.
[tinc] / src / route.c
1 /*
2     route.c -- routing
3     Copyright (C) 2000-2005 Ivo Timmermans,
4                   2000-2012 Guus Sliepen <guus@tinc-vpn.org>
5
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     This program is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License along
17     with this program; if not, write to the Free Software Foundation, Inc.,
18     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 */
20
21 #include "system.h"
22
23 #include "connection.h"
24 #include "control_common.h"
25 #include "ethernet.h"
26 #include "ipv4.h"
27 #include "ipv6.h"
28 #include "logger.h"
29 #include "meta.h"
30 #include "net.h"
31 #include "protocol.h"
32 #include "route.h"
33 #include "subnet.h"
34 #include "utils.h"
35
36 rmode_t routing_mode = RMODE_ROUTER;
37 fmode_t forwarding_mode = FMODE_INTERNAL;
38 bmode_t broadcast_mode = BMODE_MST;
39 bool decrement_ttl = false;
40 bool directonly = false;
41 bool priorityinheritance = false;
42 int macexpire = 600;
43 bool overwrite_mac = false;
44 mac_t mymac = {{0xFE, 0xFD, 0, 0, 0, 0}};
45 bool pcap = false;
46
47 /* Sizes of various headers */
48
49 static const size_t ether_size = sizeof(struct ether_header);
50 static const size_t arp_size = sizeof(struct ether_arp);
51 static const size_t ip_size = sizeof(struct ip);
52 static const size_t icmp_size = sizeof(struct icmp) - sizeof(struct ip);
53 static const size_t ip6_size = sizeof(struct ip6_hdr);
54 static const size_t icmp6_size = sizeof(struct icmp6_hdr);
55 static const size_t ns_size = sizeof(struct nd_neighbor_solicit);
56 static const size_t opt_size = sizeof(struct nd_opt_hdr);
57
58 #ifndef MAX
59 #define MAX(a, b) ((a) > (b) ? (a) : (b))
60 #endif
61
62 static struct event age_subnets_event;
63
64 /* RFC 1071 */
65
66 static uint16_t inet_checksum(void *data, int len, uint16_t prevsum) {
67         uint16_t *p = data;
68         uint32_t checksum = prevsum ^ 0xFFFF;
69
70         while(len >= 2) {
71                 checksum += *p++;
72                 len -= 2;
73         }
74
75         if(len)
76                 checksum += *(uint8_t *)p;
77
78         while(checksum >> 16)
79                 checksum = (checksum & 0xFFFF) + (checksum >> 16);
80
81         return ~checksum;
82 }
83
84 static bool ratelimit(int frequency) {
85         static time_t lasttime = 0;
86         static int count = 0;
87         time_t now = time(NULL);
88
89         if(lasttime == now) {
90                 if(count >= frequency)
91                         return true;
92         } else {
93                 lasttime = now;
94                 count = 0;
95         }
96
97         count++;
98         return false;
99 }
100
101 static bool checklength(node_t *source, vpn_packet_t *packet, length_t length) {
102         if(packet->len < length) {
103                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Got too short packet from %s (%s)", source->name, source->hostname);
104                 return false;
105         } else
106                 return true;
107 }
108
109 static void clamp_mss(const node_t *source, const node_t *via, vpn_packet_t *packet) {
110         if(!source || !via || !(via->options & OPTION_CLAMP_MSS))
111                 return;
112
113         uint16_t mtu = source->mtu;
114         if(via != myself && via->mtu < mtu)
115                 mtu = via->mtu;
116
117         /* Find TCP header */
118         int start = ether_size;
119         uint16_t type = packet->data[12] << 8 | packet->data[13];
120
121         if(type == ETH_P_8021Q) {
122                 start += 4;
123                 type = packet->data[16] << 8 | packet->data[17];
124         }
125
126         if(type == ETH_P_IP && packet->data[start + 9] == 6)
127                 start += (packet->data[start] & 0xf) * 4;
128         else if(type == ETH_P_IPV6 && packet->data[start + 6] == 6)
129                 start += 40;
130         else
131                 return;
132
133         if(packet->len <= start + 20)
134                 return;
135
136         /* Use data offset field to calculate length of options field */
137         int len = ((packet->data[start + 12] >> 4) - 5) * 4;
138
139         if(packet->len < start + 20 + len)
140                 return;
141
142         /* Search for MSS option header */
143         for(int i = 0; i < len;) {
144                 if(packet->data[start + 20 + i] == 0)
145                         break;
146
147                 if(packet->data[start + 20 + i] == 1) {
148                         i++;
149                         continue;
150                 }
151
152                 if(i > len - 2 || i > len - packet->data[start + 21 + i])
153                         break;
154
155                 if(packet->data[start + 20 + i] != 2) {
156                         if(packet->data[start + 21 + i] < 2)
157                                 break;
158                         i += packet->data[start + 21 + i];
159                         continue;
160                 }
161
162                 if(packet->data[start + 21] != 4)
163                         break;
164
165                 /* Found it */
166                 uint16_t oldmss = packet->data[start + 22 + i] << 8 | packet->data[start + 23 + i];
167                 uint16_t newmss = mtu - start - 20;
168                 uint16_t csum = packet->data[start + 16] << 8 | packet->data[start + 17];
169
170                 if(oldmss <= newmss)
171                         break;
172
173                 logger(DEBUG_TRAFFIC, LOG_INFO, "Clamping MSS of packet from %s to %s to %d", source->name, via->name, newmss);
174
175                 /* Update the MSS value and the checksum */
176                 packet->data[start + 22 + i] = newmss >> 8;
177                 packet->data[start + 23 + i] = newmss & 0xff;
178                 csum ^= 0xffff;
179                 csum -= oldmss;
180                 csum += newmss;
181                 csum ^= 0xffff;
182                 packet->data[start + 16] = csum >> 8;
183                 packet->data[start + 17] = csum & 0xff;
184                 break;
185         }
186 }
187
188 static void swap_mac_addresses(vpn_packet_t *packet) {
189         mac_t tmp;
190         memcpy(&tmp, &packet->data[0], sizeof tmp);
191         memcpy(&packet->data[0], &packet->data[6], sizeof tmp);
192         memcpy(&packet->data[6], &tmp, sizeof tmp);
193 }
194
195 static void age_subnets(int fd, short events, void *data) {
196         bool left = false;
197         time_t now = time(NULL);
198
199         for splay_each(subnet_t, s, myself->subnet_tree) {
200                 if(s->expires && s->expires < now) {
201                         if(debug_level >= DEBUG_TRAFFIC) {
202                                 char netstr[MAXNETSTR];
203                                 if(net2str(netstr, sizeof netstr, s))
204                                         logger(DEBUG_TRAFFIC, LOG_INFO, "Subnet %s expired", netstr);
205                         }
206
207                         for list_each(connection_t, c, connection_list)
208                                 if(c->status.active)
209                                         send_del_subnet(c, s);
210
211                         subnet_del(myself, s);
212                 } else {
213                         if(s->expires)
214                                 left = true;
215                 }
216         }
217
218         if(left)
219                 event_add(&age_subnets_event, &(struct timeval){10, rand() % 100000});
220 }
221
222 static void learn_mac(mac_t *address) {
223         subnet_t *subnet = lookup_subnet_mac(myself, address);
224
225         /* If we don't know this MAC address yet, store it */
226
227         if(!subnet) {
228                 logger(DEBUG_TRAFFIC, LOG_INFO, "Learned new MAC address %hx:%hx:%hx:%hx:%hx:%hx",
229                                    address->x[0], address->x[1], address->x[2], address->x[3],
230                                    address->x[4], address->x[5]);
231
232                 subnet = new_subnet();
233                 subnet->type = SUBNET_MAC;
234                 subnet->expires = time(NULL) + macexpire;
235                 subnet->net.mac.address = *address;
236                 subnet->weight = 10;
237                 subnet_add(myself, subnet);
238                 subnet_update(myself, subnet, true);
239
240                 /* And tell all other tinc daemons it's our MAC */
241
242                 for list_each(connection_t, c, connection_list)
243                         if(c->status.active)
244                                 send_add_subnet(c, subnet);
245
246                 if(!timeout_initialized(&age_subnets_event))
247                         timeout_set(&age_subnets_event, age_subnets, NULL);
248                 event_add(&age_subnets_event, &(struct timeval){10, rand() % 100000});
249         } else {
250                 if(subnet->expires)
251                         subnet->expires = time(NULL) + macexpire;
252         }
253 }
254
255 /* RFC 792 */
256
257 static void route_ipv4_unreachable(node_t *source, vpn_packet_t *packet, length_t ether_size, uint8_t type, uint8_t code) {
258         struct ip ip = {0};
259         struct icmp icmp = {0};
260
261         struct in_addr ip_src;
262         struct in_addr ip_dst;
263         uint32_t oldlen;
264
265         if(ratelimit(3))
266                 return;
267
268         /* Swap Ethernet source and destination addresses */
269
270         swap_mac_addresses(packet);
271
272         /* Copy headers from packet into properly aligned structs on the stack */
273
274         memcpy(&ip, packet->data + ether_size, ip_size);
275
276         /* Remember original source and destination */
277
278         ip_src = ip.ip_src;
279         ip_dst = ip.ip_dst;
280
281         oldlen = packet->len - ether_size;
282
283         if(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
284                 icmp.icmp_nextmtu = htons(packet->len - ether_size);
285
286         if(oldlen >= IP_MSS - ip_size - icmp_size)
287                 oldlen = IP_MSS - ip_size - icmp_size;
288
289         /* Copy first part of original contents to ICMP message */
290
291         memmove(packet->data + ether_size + ip_size + icmp_size, packet->data + ether_size, oldlen);
292
293         /* Fill in IPv4 header */
294
295         ip.ip_v = 4;
296         ip.ip_hl = ip_size / 4;
297         ip.ip_tos = 0;
298         ip.ip_len = htons(ip_size + icmp_size + oldlen);
299         ip.ip_id = 0;
300         ip.ip_off = 0;
301         ip.ip_ttl = 255;
302         ip.ip_p = IPPROTO_ICMP;
303         ip.ip_sum = 0;
304         ip.ip_src = ip_dst;
305         ip.ip_dst = ip_src;
306
307         ip.ip_sum = inet_checksum(&ip, ip_size, ~0);
308
309         /* Fill in ICMP header */
310
311         icmp.icmp_type = type;
312         icmp.icmp_code = code;
313         icmp.icmp_cksum = 0;
314
315         icmp.icmp_cksum = inet_checksum(&icmp, icmp_size, ~0);
316         icmp.icmp_cksum = inet_checksum(packet->data + ether_size + ip_size + icmp_size, oldlen, icmp.icmp_cksum);
317
318         /* Copy structs on stack back to packet */
319
320         memcpy(packet->data + ether_size, &ip, ip_size);
321         memcpy(packet->data + ether_size + ip_size, &icmp, icmp_size);
322
323         packet->len = ether_size + ip_size + icmp_size + oldlen;
324
325         send_packet(source, packet);
326 }
327
328 /* RFC 791 */
329
330 static void fragment_ipv4_packet(node_t *dest, vpn_packet_t *packet, length_t ether_size) {
331         struct ip ip;
332         vpn_packet_t fragment;
333         int len, maxlen, todo;
334         uint8_t *offset;
335         uint16_t ip_off, origf;
336
337         memcpy(&ip, packet->data + ether_size, ip_size);
338         fragment.priority = packet->priority;
339
340         if(ip.ip_hl != ip_size / 4)
341                 return;
342
343         todo = ntohs(ip.ip_len) - ip_size;
344
345         if(ether_size + ip_size + todo != packet->len) {
346                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Length of packet (%d) doesn't match length in IPv4 header (%d)", packet->len, (int)(ether_size + ip_size + todo));
347                 return;
348         }
349
350         logger(DEBUG_TRAFFIC, LOG_INFO, "Fragmenting packet of %d bytes to %s (%s)", packet->len, dest->name, dest->hostname);
351
352         offset = packet->data + ether_size + ip_size;
353         maxlen = (dest->mtu - ether_size - ip_size) & ~0x7;
354         ip_off = ntohs(ip.ip_off);
355         origf = ip_off & ~IP_OFFMASK;
356         ip_off &= IP_OFFMASK;
357
358         while(todo) {
359                 len = todo > maxlen ? maxlen : todo;
360                 memcpy(fragment.data + ether_size + ip_size, offset, len);
361                 todo -= len;
362                 offset += len;
363
364                 ip.ip_len = htons(ip_size + len);
365                 ip.ip_off = htons(ip_off | origf | (todo ? IP_MF : 0));
366                 ip.ip_sum = 0;
367                 ip.ip_sum = inet_checksum(&ip, ip_size, ~0);
368                 memcpy(fragment.data, packet->data, ether_size);
369                 memcpy(fragment.data + ether_size, &ip, ip_size);
370                 fragment.len = ether_size + ip_size + len;
371
372                 send_packet(dest, &fragment);
373
374                 ip_off += len / 8;
375         }
376 }
377
378 static void route_ipv4_unicast(node_t *source, vpn_packet_t *packet) {
379         subnet_t *subnet;
380         node_t *via;
381         ipv4_t dest;
382
383         memcpy(&dest, &packet->data[30], sizeof dest);
384         subnet = lookup_subnet_ipv4(&dest);
385
386         if(!subnet) {
387                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Cannot route packet from %s (%s): unknown IPv4 destination address %d.%d.%d.%d",
388                                 source->name, source->hostname,
389                                 dest.x[0],
390                                 dest.x[1],
391                                 dest.x[2],
392                                 dest.x[3]);
393
394                 route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_NET_UNKNOWN);
395                 return;
396         }
397
398         if(subnet->owner == source) {
399                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Packet looping back to %s (%s)!", source->name, source->hostname);
400                 return;
401         }
402
403         if(!subnet->owner->status.reachable)
404                 return route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_NET_UNREACH);
405
406         if(forwarding_mode == FMODE_OFF && source != myself && subnet->owner != myself)
407                 return route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_NET_ANO);
408
409         if(priorityinheritance)
410                 packet->priority = packet->data[15];
411
412         via = (subnet->owner->via == myself) ? subnet->owner->nexthop : subnet->owner->via;
413
414         if(via == source) {
415                 logger(DEBUG_TRAFFIC, LOG_ERR, "Routing loop for packet from %s (%s)!", source->name, source->hostname);
416                 return;
417         }
418
419         if(directonly && subnet->owner != via)
420                 return route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_NET_ANO);
421
422         if(via && packet->len > MAX(via->mtu, 590) && via != myself) {
423                 logger(DEBUG_TRAFFIC, LOG_INFO, "Packet for %s (%s) length %d larger than MTU %d", subnet->owner->name, subnet->owner->hostname, packet->len, via->mtu);
424                 if(packet->data[20] & 0x40) {
425                         packet->len = MAX(via->mtu, 590);
426                         route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED);
427                 } else {
428                         fragment_ipv4_packet(via, packet, ether_size);
429                 }
430
431                 return;
432         }
433
434         clamp_mss(source, via, packet);
435
436         send_packet(subnet->owner, packet);
437 }
438
439 static void route_ipv4(node_t *source, vpn_packet_t *packet) {
440         if(!checklength(source, packet, ether_size + ip_size))
441                 return;
442
443         if(broadcast_mode && (((packet->data[30] & 0xf0) == 0xe0) || (
444                         packet->data[30] == 255 &&
445                         packet->data[31] == 255 &&
446                         packet->data[32] == 255 &&
447                         packet->data[33] == 255)))
448                 broadcast_packet(source, packet);
449         else
450                 route_ipv4_unicast(source, packet);
451 }
452
453 /* RFC 2463 */
454
455 static void route_ipv6_unreachable(node_t *source, vpn_packet_t *packet, length_t ether_size, uint8_t type, uint8_t code) {
456         struct ip6_hdr ip6;
457         struct icmp6_hdr icmp6 = {0};
458         uint16_t checksum;
459
460         struct {
461                 struct in6_addr ip6_src;        /* source address */
462                 struct in6_addr ip6_dst;        /* destination address */
463                 uint32_t length;
464                 uint32_t next;
465         } pseudo;
466
467         if(ratelimit(3))
468                 return;
469
470         /* Swap Ethernet source and destination addresses */
471
472         swap_mac_addresses(packet);
473
474         /* Copy headers from packet to structs on the stack */
475
476         memcpy(&ip6, packet->data + ether_size, ip6_size);
477
478         /* Remember original source and destination */
479
480         pseudo.ip6_src = ip6.ip6_dst;
481         pseudo.ip6_dst = ip6.ip6_src;
482
483         pseudo.length = packet->len - ether_size;
484
485         if(type == ICMP6_PACKET_TOO_BIG)
486                 icmp6.icmp6_mtu = htonl(pseudo.length);
487
488         if(pseudo.length >= IP_MSS - ip6_size - icmp6_size)
489                 pseudo.length = IP_MSS - ip6_size - icmp6_size;
490
491         /* Copy first part of original contents to ICMP message */
492
493         memmove(packet->data + ether_size + ip6_size + icmp6_size, packet->data + ether_size, pseudo.length);
494
495         /* Fill in IPv6 header */
496
497         ip6.ip6_flow = htonl(0x60000000UL);
498         ip6.ip6_plen = htons(icmp6_size + pseudo.length);
499         ip6.ip6_nxt = IPPROTO_ICMPV6;
500         ip6.ip6_hlim = 255;
501         ip6.ip6_src = pseudo.ip6_src;
502         ip6.ip6_dst = pseudo.ip6_dst;
503
504         /* Fill in ICMP header */
505
506         icmp6.icmp6_type = type;
507         icmp6.icmp6_code = code;
508         icmp6.icmp6_cksum = 0;
509
510         /* Create pseudo header */
511
512         pseudo.length = htonl(icmp6_size + pseudo.length);
513         pseudo.next = htonl(IPPROTO_ICMPV6);
514
515         /* Generate checksum */
516
517         checksum = inet_checksum(&pseudo, sizeof pseudo, ~0);
518         checksum = inet_checksum(&icmp6, icmp6_size, checksum);
519         checksum = inet_checksum(packet->data + ether_size + ip6_size + icmp6_size, ntohl(pseudo.length) - icmp6_size, checksum);
520
521         icmp6.icmp6_cksum = checksum;
522
523         /* Copy structs on stack back to packet */
524
525         memcpy(packet->data + ether_size, &ip6, ip6_size);
526         memcpy(packet->data + ether_size + ip6_size, &icmp6, icmp6_size);
527
528         packet->len = ether_size + ip6_size + ntohl(pseudo.length);
529
530         send_packet(source, packet);
531 }
532
533 static void route_ipv6_unicast(node_t *source, vpn_packet_t *packet) {
534         subnet_t *subnet;
535         node_t *via;
536         ipv6_t dest;
537
538         memcpy(&dest, &packet->data[38], sizeof dest);
539         subnet = lookup_subnet_ipv6(&dest);
540
541         if(!subnet) {
542                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Cannot route packet from %s (%s): unknown IPv6 destination address %hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
543                                 source->name, source->hostname,
544                                 ntohs(dest.x[0]),
545                                 ntohs(dest.x[1]),
546                                 ntohs(dest.x[2]),
547                                 ntohs(dest.x[3]),
548                                 ntohs(dest.x[4]),
549                                 ntohs(dest.x[5]),
550                                 ntohs(dest.x[6]),
551                                 ntohs(dest.x[7]));
552
553                 route_ipv6_unreachable(source, packet, ether_size, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR);
554                 return;
555         }
556
557         if(subnet->owner == source) {
558                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Packet looping back to %s (%s)!", source->name, source->hostname);
559                 return;
560         }
561
562         if(!subnet->owner->status.reachable)
563                 return route_ipv6_unreachable(source, packet, ether_size, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOROUTE);
564
565         if(forwarding_mode == FMODE_OFF && source != myself && subnet->owner != myself)
566                 return route_ipv6_unreachable(source, packet, ether_size, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADMIN);
567
568         via = (subnet->owner->via == myself) ? subnet->owner->nexthop : subnet->owner->via;
569
570         if(via == source) {
571                 logger(DEBUG_TRAFFIC, LOG_ERR, "Routing loop for packet from %s (%s)!", source->name, source->hostname);
572                 return;
573         }
574
575         if(directonly && subnet->owner != via)
576                 return route_ipv6_unreachable(source, packet, ether_size, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADMIN);
577
578         if(via && packet->len > MAX(via->mtu, 1294) && via != myself) {
579                 logger(DEBUG_TRAFFIC, LOG_INFO, "Packet for %s (%s) length %d larger than MTU %d", subnet->owner->name, subnet->owner->hostname, packet->len, via->mtu);
580                 packet->len = MAX(via->mtu, 1294);
581                 route_ipv6_unreachable(source, packet, ether_size, ICMP6_PACKET_TOO_BIG, 0);
582                 return;
583         }
584
585         clamp_mss(source, via, packet);
586
587         send_packet(subnet->owner, packet);
588 }
589
590 /* RFC 2461 */
591
592 static void route_neighborsol(node_t *source, vpn_packet_t *packet) {
593         struct ip6_hdr ip6;
594         struct nd_neighbor_solicit ns;
595         struct nd_opt_hdr opt;
596         subnet_t *subnet;
597         uint16_t checksum;
598         bool has_opt;
599
600         struct {
601                 struct in6_addr ip6_src;
602                 struct in6_addr ip6_dst;
603                 uint32_t length;
604                 uint32_t next;
605         } pseudo;
606
607         if(!checklength(source, packet, ether_size + ip6_size + ns_size))
608                 return;
609
610         has_opt = packet->len >= ether_size + ip6_size + ns_size + opt_size + ETH_ALEN;
611
612         if(source != myself) {
613                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Got neighbor solicitation request from %s (%s) while in router mode!", source->name, source->hostname);
614                 return;
615         }
616
617         /* Copy headers from packet to structs on the stack */
618
619         memcpy(&ip6, packet->data + ether_size, ip6_size);
620         memcpy(&ns, packet->data + ether_size + ip6_size, ns_size);
621         if(has_opt)
622                 memcpy(&opt, packet->data + ether_size + ip6_size + ns_size, opt_size);
623
624         /* First, snatch the source address from the neighbor solicitation packet */
625
626         if(overwrite_mac)
627                 memcpy(mymac.x, packet->data + ETH_ALEN, ETH_ALEN);
628
629         /* Check if this is a valid neighbor solicitation request */
630
631         if(ns.nd_ns_hdr.icmp6_type != ND_NEIGHBOR_SOLICIT ||
632            (has_opt && opt.nd_opt_type != ND_OPT_SOURCE_LINKADDR)) {
633                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Cannot route packet: received unknown type neighbor solicitation request");
634                 return;
635         }
636
637         /* Create pseudo header */
638
639         pseudo.ip6_src = ip6.ip6_src;
640         pseudo.ip6_dst = ip6.ip6_dst;
641         if(has_opt)
642                 pseudo.length = htonl(ns_size + opt_size + ETH_ALEN);
643         else
644                 pseudo.length = htonl(ns_size);
645         pseudo.next = htonl(IPPROTO_ICMPV6);
646
647         /* Generate checksum */
648
649         checksum = inet_checksum(&pseudo, sizeof pseudo, ~0);
650         checksum = inet_checksum(&ns, ns_size, checksum);
651         if(has_opt) {
652                 checksum = inet_checksum(&opt, opt_size, checksum);
653                 checksum = inet_checksum(packet->data + ether_size + ip6_size + ns_size + opt_size, ETH_ALEN, checksum);
654         }
655
656         if(checksum) {
657                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Cannot route packet: checksum error for neighbor solicitation request");
658                 return;
659         }
660
661         /* Check if the IPv6 address exists on the VPN */
662
663         subnet = lookup_subnet_ipv6((ipv6_t *) &ns.nd_ns_target);
664
665         if(!subnet) {
666                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Cannot route packet: neighbor solicitation request for unknown address %hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
667                                    ntohs(((uint16_t *) &ns.nd_ns_target)[0]),
668                                    ntohs(((uint16_t *) &ns.nd_ns_target)[1]),
669                                    ntohs(((uint16_t *) &ns.nd_ns_target)[2]),
670                                    ntohs(((uint16_t *) &ns.nd_ns_target)[3]),
671                                    ntohs(((uint16_t *) &ns.nd_ns_target)[4]),
672                                    ntohs(((uint16_t *) &ns.nd_ns_target)[5]),
673                                    ntohs(((uint16_t *) &ns.nd_ns_target)[6]),
674                                    ntohs(((uint16_t *) &ns.nd_ns_target)[7]));
675
676                 return;
677         }
678
679         /* Check if it is for our own subnet */
680
681         if(subnet->owner == myself)
682                 return;                                          /* silently ignore */
683
684         /* Create neighbor advertation reply */
685
686         memcpy(packet->data, packet->data + ETH_ALEN, ETH_ALEN); /* copy destination address */
687         packet->data[ETH_ALEN * 2 - 1] ^= 0xFF;                  /* mangle source address so it looks like it's not from us */
688
689         ip6.ip6_dst = ip6.ip6_src;                               /* swap destination and source protocoll address */
690         ip6.ip6_src = ns.nd_ns_target;
691
692         if(has_opt)
693                 memcpy(packet->data + ether_size + ip6_size + ns_size + opt_size, packet->data + ETH_ALEN, ETH_ALEN);   /* add fake source hard addr */
694
695         ns.nd_ns_cksum = 0;
696         ns.nd_ns_type = ND_NEIGHBOR_ADVERT;
697         ns.nd_ns_reserved = htonl(0x40000000UL);                 /* Set solicited flag */
698         opt.nd_opt_type = ND_OPT_TARGET_LINKADDR;
699
700         /* Create pseudo header */
701
702         pseudo.ip6_src = ip6.ip6_src;
703         pseudo.ip6_dst = ip6.ip6_dst;
704         if(has_opt)
705                 pseudo.length = htonl(ns_size + opt_size + ETH_ALEN);
706         else
707                 pseudo.length = htonl(ns_size);
708         pseudo.next = htonl(IPPROTO_ICMPV6);
709
710         /* Generate checksum */
711
712         checksum = inet_checksum(&pseudo, sizeof pseudo, ~0);
713         checksum = inet_checksum(&ns, ns_size, checksum);
714         if(has_opt) {
715                 checksum = inet_checksum(&opt, opt_size, checksum);
716                 checksum = inet_checksum(packet->data + ether_size + ip6_size + ns_size + opt_size, ETH_ALEN, checksum);
717         }
718
719         ns.nd_ns_hdr.icmp6_cksum = checksum;
720
721         /* Copy structs on stack back to packet */
722
723         memcpy(packet->data + ether_size, &ip6, ip6_size);
724         memcpy(packet->data + ether_size + ip6_size, &ns, ns_size);
725         if(has_opt)
726                 memcpy(packet->data + ether_size + ip6_size + ns_size, &opt, opt_size);
727
728         send_packet(source, packet);
729 }
730
731 static void route_ipv6(node_t *source, vpn_packet_t *packet) {
732         if(!checklength(source, packet, ether_size + ip6_size))
733                 return;
734
735         if(packet->data[20] == IPPROTO_ICMPV6 && checklength(source, packet, ether_size + ip6_size + icmp6_size) && packet->data[54] == ND_NEIGHBOR_SOLICIT) {
736                 route_neighborsol(source, packet);
737                 return;
738         }
739
740         if(broadcast_mode && packet->data[38] == 255)
741                 broadcast_packet(source, packet);
742         else
743                 route_ipv6_unicast(source, packet);
744 }
745
746 /* RFC 826 */
747
748 static void route_arp(node_t *source, vpn_packet_t *packet) {
749         struct ether_arp arp;
750         subnet_t *subnet;
751         struct in_addr addr;
752
753         if(!checklength(source, packet, ether_size + arp_size))
754                 return;
755
756         if(source != myself) {
757                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Got ARP request from %s (%s) while in router mode!", source->name, source->hostname);
758                 return;
759         }
760
761         /* First, snatch the source address from the ARP packet */
762
763         if(overwrite_mac)
764                 memcpy(mymac.x, packet->data + ETH_ALEN, ETH_ALEN);
765
766         /* Copy headers from packet to structs on the stack */
767
768         memcpy(&arp, packet->data + ether_size, arp_size);
769
770         /* Check if this is a valid ARP request */
771
772         if(ntohs(arp.arp_hrd) != ARPHRD_ETHER || ntohs(arp.arp_pro) != ETH_P_IP ||
773            arp.arp_hln != ETH_ALEN || arp.arp_pln != sizeof addr || ntohs(arp.arp_op) != ARPOP_REQUEST) {
774                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Cannot route packet: received unknown type ARP request");
775                 return;
776         }
777
778         /* Check if the IPv4 address exists on the VPN */
779
780         subnet = lookup_subnet_ipv4((ipv4_t *) &arp.arp_tpa);
781
782         if(!subnet) {
783                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Cannot route packet: ARP request for unknown address %d.%d.%d.%d",
784                                    arp.arp_tpa[0], arp.arp_tpa[1], arp.arp_tpa[2],
785                                    arp.arp_tpa[3]);
786                 return;
787         }
788
789         /* Check if it is for our own subnet */
790
791         if(subnet->owner == myself)
792                 return;                                          /* silently ignore */
793
794         memcpy(packet->data, packet->data + ETH_ALEN, ETH_ALEN); /* copy destination address */
795         packet->data[ETH_ALEN * 2 - 1] ^= 0xFF;                  /* mangle source address so it looks like it's not from us */
796
797         memcpy(&addr, arp.arp_tpa, sizeof addr);                 /* save protocol addr */
798         memcpy(arp.arp_tpa, arp.arp_spa, sizeof addr);           /* swap destination and source protocol address */
799         memcpy(arp.arp_spa, &addr, sizeof addr);                 /* ... */
800
801         memcpy(arp.arp_tha, arp.arp_sha, ETH_ALEN);              /* set target hard/proto addr */
802         memcpy(arp.arp_sha, packet->data + ETH_ALEN, ETH_ALEN);  /* add fake source hard addr */
803         arp.arp_op = htons(ARPOP_REPLY);
804
805         /* Copy structs on stack back to packet */
806
807         memcpy(packet->data + ether_size, &arp, arp_size);
808
809         send_packet(source, packet);
810 }
811
812 static void route_mac(node_t *source, vpn_packet_t *packet) {
813         subnet_t *subnet;
814         mac_t dest;
815
816         /* Learn source address */
817
818         if(source == myself) {
819                 mac_t src;
820                 memcpy(&src, &packet->data[6], sizeof src);
821                 learn_mac(&src);
822         }
823
824         /* Lookup destination address */
825
826         memcpy(&dest, &packet->data[0], sizeof dest);
827         subnet = lookup_subnet_mac(NULL, &dest);
828
829         if(!subnet) {
830                 broadcast_packet(source, packet);
831                 return;
832         }
833
834         if(subnet->owner == source) {
835                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Packet looping back to %s (%s)!", source->name, source->hostname);
836                 return;
837         }
838
839         if(forwarding_mode == FMODE_OFF && source != myself && subnet->owner != myself)
840                 return;
841
842         // Handle packets larger than PMTU
843
844         node_t *via = (subnet->owner->via == myself) ? subnet->owner->nexthop : subnet->owner->via;
845
846         if(directonly && subnet->owner != via)
847                 return;
848
849         if(via && packet->len > via->mtu && via != myself) {
850                 logger(DEBUG_TRAFFIC, LOG_INFO, "Packet for %s (%s) length %d larger than MTU %d", subnet->owner->name, subnet->owner->hostname, packet->len, via->mtu);
851                 uint16_t type = packet->data[12] << 8 | packet->data[13];
852                 length_t ethlen = 14;
853
854                 if(type == ETH_P_8021Q) {
855                         type = packet->data[16] << 8 | packet->data[17];
856                         ethlen += 4;
857                 }
858
859                 if(type == ETH_P_IP && packet->len > 576 + ethlen) {
860                         if(packet->data[6 + ethlen] & 0x40) {
861                                 packet->len = via->mtu;
862                                 route_ipv4_unreachable(source, packet, ethlen, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED);
863                         } else {
864                                 fragment_ipv4_packet(via, packet, ethlen);
865                         }
866                         return;
867                 } else if(type == ETH_P_IPV6 && packet->len > 1280 + ethlen) {
868                         packet->len = via->mtu;
869                         route_ipv6_unreachable(source, packet, ethlen, ICMP6_PACKET_TOO_BIG, 0);
870                         return;
871                 }
872         }
873
874         clamp_mss(source, via, packet);
875
876         send_packet(subnet->owner, packet);
877 }
878
879 static void send_pcap(vpn_packet_t *packet) {
880         pcap = false;
881
882         for list_each(connection_t, c, connection_list) {
883                 if(!c->status.pcap)
884                         continue;
885
886                 pcap = true;
887                 int len = packet->len;
888                 if(c->outmaclength && c->outmaclength < len)
889                         len = c->outmaclength;
890
891                 if(send_request(c, "%d %d %d", CONTROL, REQ_PCAP, len))
892                         send_meta(c, (char *)packet->data, len);
893         }
894 }
895
896 static bool do_decrement_ttl(node_t *source, vpn_packet_t *packet) {
897         uint16_t type = packet->data[12] << 8 | packet->data[13];
898         length_t ethlen = ether_size;
899
900         if(type == ETH_P_8021Q) {
901                 type = packet->data[16] << 8 | packet->data[17];
902                 ethlen += 4;
903         }
904
905         switch (type) {
906                 case ETH_P_IP:
907                         if(!checklength(source, packet, ethlen + ip_size))
908                                 return false;
909
910                         if(packet->data[ethlen + 8] < 1) {
911                                 if(packet->data[ethlen + 11] != IPPROTO_ICMP || packet->data[ethlen + 32] != ICMP_TIME_EXCEEDED)
912                                         route_ipv4_unreachable(source, packet, ethlen, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL);
913                                 return false;
914                         }
915
916                         uint16_t old = packet->data[ethlen + 8] << 8 | packet->data[ethlen + 9];
917                         packet->data[ethlen + 8]--;
918                         uint16_t new = packet->data[ethlen + 8] << 8 | packet->data[ethlen + 9];
919
920                         uint32_t checksum = packet->data[ethlen + 10] << 8 | packet->data[ethlen + 11];
921                         checksum += old + (~new & 0xFFFF);
922                         while(checksum >> 16)
923                                 checksum = (checksum & 0xFFFF) + (checksum >> 16);
924                         packet->data[ethlen + 10] = checksum >> 8;
925                         packet->data[ethlen + 11] = checksum & 0xff;
926
927                         return true;
928
929                 case ETH_P_IPV6:
930                         if(!checklength(source, packet, ethlen + ip6_size))
931                                 return false;
932
933                         if(packet->data[ethlen + 7] < 1) {
934                                 if(packet->data[ethlen + 6] != IPPROTO_ICMPV6 || packet->data[ethlen + 40] != ICMP6_TIME_EXCEEDED)
935                                         route_ipv6_unreachable(source, packet, ethlen, ICMP6_TIME_EXCEEDED, ICMP6_TIME_EXCEED_TRANSIT);
936                                 return false;
937                         }
938
939                         packet->data[ethlen + 7]--;
940
941                         return true;
942
943                 default:
944                         return true;
945         }
946 }
947
948 void route(node_t *source, vpn_packet_t *packet) {
949         if(pcap)
950                 send_pcap(packet);
951
952         if(forwarding_mode == FMODE_KERNEL && source != myself) {
953                 send_packet(myself, packet);
954                 return;
955         }
956
957         if(!checklength(source, packet, ether_size))
958                 return;
959
960         if(decrement_ttl && source != myself)
961                 if(!do_decrement_ttl(source, packet))
962                         return;
963
964         uint16_t type = packet->data[12] << 8 | packet->data[13];
965
966         switch (routing_mode) {
967                 case RMODE_ROUTER:
968                         switch (type) {
969                                 case ETH_P_ARP:
970                                         route_arp(source, packet);
971                                         break;
972
973                                 case ETH_P_IP:
974                                         route_ipv4(source, packet);
975                                         break;
976
977                                 case ETH_P_IPV6:
978                                         route_ipv6(source, packet);
979                                         break;
980
981                                 default:
982                                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Cannot route packet from %s (%s): unknown type %hx", source->name, source->hostname, type);
983                                         break;
984                         }
985                         break;
986
987                 case RMODE_SWITCH:
988                         route_mac(source, packet);
989                         break;
990
991                 case RMODE_HUB:
992                         broadcast_packet(source, packet);
993                         break;
994         }
995 }