Slightly randomize all timeouts.
[tinc] / src / net.c
1 /*
2     net.c -- most of the network code
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2012 Guus Sliepen <guus@tinc-vpn.org>
5                   2006      Scott Lamb <slamb@slamb.org>
6                   2011      Loïc Grenié <loic.grenie@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #include "utils.h"
26 #include "conf.h"
27 #include "connection.h"
28 #include "device.h"
29 #include "graph.h"
30 #include "logger.h"
31 #include "meta.h"
32 #include "net.h"
33 #include "netutl.h"
34 #include "process.h"
35 #include "protocol.h"
36 #include "subnet.h"
37 #include "xalloc.h"
38
39 int contradicting_add_edge = 0;
40 int contradicting_del_edge = 0;
41 static int sleeptime = 10;
42 time_t last_config_check = 0;
43
44 /* Purge edges and subnets of unreachable nodes. Use carefully. */
45
46 void purge(void) {
47         logger(DEBUG_PROTOCOL, LOG_DEBUG, "Purging unreachable nodes");
48
49         /* Remove all edges and subnets owned by unreachable nodes. */
50
51         for splay_each(node_t, n, node_tree) {
52                 if(!n->status.reachable) {
53                         logger(DEBUG_SCARY_THINGS, LOG_DEBUG, "Purging node %s (%s)", n->name, n->hostname);
54
55                         for splay_each(subnet_t, s, n->subnet_tree) {
56                                 send_del_subnet(everyone, s);
57                                 if(!strictsubnets)
58                                         subnet_del(n, s);
59                         }
60
61                         for splay_each(edge_t, e, n->edge_tree) {
62                                 if(!tunnelserver)
63                                         send_del_edge(everyone, e);
64                                 edge_del(e);
65                         }
66                 }
67         }
68
69         /* Check if anyone else claims to have an edge to an unreachable node. If not, delete node. */
70
71         for splay_each(node_t, n, node_tree) {
72                 if(!n->status.reachable) {
73                         for splay_each(edge_t, e, edge_weight_tree)
74                                 if(e->to == n)
75                                         return;
76
77                         if(!autoconnect && (!strictsubnets || !n->subnet_tree->head))
78                                 /* in strictsubnets mode do not delete nodes with subnets */
79                                 node_del(n);
80                 }
81         }
82 }
83
84 /*
85   Terminate a connection:
86   - Mark it as inactive
87   - Remove the edge representing this connection
88   - Kill it with fire
89   - Check if we need to retry making an outgoing connection
90 */
91 void terminate_connection(connection_t *c, bool report) {
92         logger(DEBUG_CONNECTIONS, LOG_NOTICE, "Closing connection with %s (%s)", c->name, c->hostname);
93
94         c->status.active = false;
95
96         if(c->node && c->node->connection == c)
97                 c->node->connection = NULL;
98
99         if(c->edge) {
100                 if(report && !tunnelserver)
101                         send_del_edge(everyone, c->edge);
102
103                 edge_del(c->edge);
104                 c->edge = NULL;
105
106                 /* Run MST and SSSP algorithms */
107
108                 graph();
109
110                 /* If the node is not reachable anymore but we remember it had an edge to us, clean it up */
111
112                 if(report && !c->node->status.reachable) {
113                         edge_t *e;
114                         e = lookup_edge(c->node, myself);
115                         if(e) {
116                                 if(!tunnelserver)
117                                         send_del_edge(everyone, e);
118                                 edge_del(e);
119                         }
120                 }
121         }
122
123         outgoing_t *outgoing = c->outgoing;
124         connection_del(c);
125
126         /* Check if this was our outgoing connection */
127
128         if(outgoing)
129                 do_outgoing_connection(outgoing);
130 }
131
132 /*
133   Check if the other end is active.
134   If we have sent packets, but didn't receive any,
135   then possibly the other end is dead. We send a
136   PING request over the meta connection. If the other
137   end does not reply in time, we consider them dead
138   and close the connection.
139 */
140 static void timeout_handler(int fd, short events, void *event) {
141         time_t now = time(NULL);
142
143         for list_each(connection_t, c, connection_list) {
144                 if(c->status.control)
145                         continue;
146
147                 if(c->last_ping_time + pingtimeout <= now) {
148                         if(c->status.active) {
149                                 if(c->status.pinged) {
150                                         logger(DEBUG_CONNECTIONS, LOG_INFO, "%s (%s) didn't respond to PING in %ld seconds", c->name, c->hostname, (long)now - c->last_ping_time);
151                                 } else if(c->last_ping_time + pinginterval <= now) {
152                                         send_ping(c);
153                                         continue;
154                                 } else {
155                                         continue;
156                                 }
157                         } else {
158                                 if(c->status.connecting)
159                                         logger(DEBUG_CONNECTIONS, LOG_WARNING, "Timeout while connecting to %s (%s)", c->name, c->hostname);
160                                 else
161                                         logger(DEBUG_CONNECTIONS, LOG_WARNING, "Timeout from %s (%s) during authentication", c->name, c->hostname);
162                         }
163                         terminate_connection(c, c->status.active);
164                 }
165         }
166
167         event_add(event, &(struct timeval){pingtimeout, rand() % 100000});
168 }
169
170 static void periodic_handler(int fd, short events, void *event) {
171         /* Check if there are too many contradicting ADD_EDGE and DEL_EDGE messages.
172            This usually only happens when another node has the same Name as this node.
173            If so, sleep for a short while to prevent a storm of contradicting messages.
174         */
175
176         if(contradicting_del_edge > 100 && contradicting_add_edge > 100) {
177                 logger(DEBUG_ALWAYS, LOG_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", sleeptime);
178                 usleep(sleeptime * 1000000LL);
179                 sleeptime *= 2;
180                 if(sleeptime < 0)
181                         sleeptime = 3600;
182         } else {
183                 sleeptime /= 2;
184                 if(sleeptime < 10)
185                         sleeptime = 10;
186         }
187
188         contradicting_add_edge = 0;
189         contradicting_del_edge = 0;
190
191         /* If AutoConnect is set, check if we need to make or break connections. */
192
193         if(autoconnect && node_tree->count > 1) {
194                 /* Count number of active connections */
195                 int nc = 0;
196                 for list_each(connection_t, c, connection_list) {
197                         if(c->status.active && !c->status.control)
198                                 nc++;
199                 }
200
201                 if(nc < autoconnect) {
202                         /* Not enough active connections, try to add one.
203                            Choose a random node, if we don't have a connection to it,
204                            and we are not already trying to make one, create an
205                            outgoing connection to this node.
206                         */
207                         int r = rand() % node_tree->count;
208                         int i = 0;
209
210                         for splay_each(node_t, n, node_tree) {
211                                 if(i++ != r)
212                                         continue;
213
214                                 if(n->connection)
215                                         break;
216
217                                 bool found = false;
218
219                                 for list_each(outgoing_t, outgoing, outgoing_list) {
220                                         if(!strcmp(outgoing->name, n->name)) {
221                                                 found = true;
222                                                 break;
223                                         }
224                                 }
225
226                                 if(!found) {
227                                         logger(DEBUG_CONNECTIONS, LOG_INFO, "Autoconnecting to %s", n->name);
228                                         outgoing_t *outgoing = xmalloc_and_zero(sizeof *outgoing);
229                                         outgoing->name = xstrdup(n->name);
230                                         list_insert_tail(outgoing_list, outgoing);
231                                         setup_outgoing_connection(outgoing);
232                                 }
233                                 break;
234                         }
235                 } else if(nc > autoconnect) {
236                         /* Too many active connections, try to remove one.
237                            Choose a random outgoing connection to a node
238                            that has at least one other connection.
239                         */
240                         int r = rand() % nc;
241                         int i = 0;
242
243                         for list_each(connection_t, c, connection_list) {
244                                 if(!c->status.active || c->status.control)
245                                         continue;
246
247                                 if(i++ != r)
248                                         continue;
249
250                                 if(!c->outgoing || !c->node || c->node->edge_tree->count < 2)
251                                         break;
252
253                                 logger(DEBUG_CONNECTIONS, LOG_INFO, "Autodisconnecting from %s", c->name);
254                                 list_delete(outgoing_list, c->outgoing);
255                                 c->outgoing = NULL;
256                                 terminate_connection(c, c->status.active);
257                                 break;
258                         }
259                 }
260
261                 if(nc >= autoconnect) {
262                         /* If we have enough active connections,
263                            remove any pending outgoing connections.
264                         */
265                         for list_each(outgoing_t, o, outgoing_list) {
266                                 bool found = false;
267                                 for list_each(connection_t, c, connection_list) {
268                                         if(c->outgoing == o) {
269                                                 found = true;
270                                                 break;
271                                         }
272                                 }
273                                 if(!found) {
274                                         logger(DEBUG_CONNECTIONS, LOG_INFO, "Cancelled outgoing connection to %s", o->name);
275                                         list_delete_node(outgoing_list, node);
276                                 }
277                         }
278                 }
279         }
280
281         event_add(event, &(struct timeval){5, rand() % 100000});
282 }
283
284 void handle_meta_connection_data(int fd, short events, void *data) {
285         connection_t *c = data;
286         int result;
287         socklen_t len = sizeof result;
288
289         if(c->status.connecting) {
290                 c->status.connecting = false;
291
292                 getsockopt(c->socket, SOL_SOCKET, SO_ERROR, &result, &len);
293
294                 if(!result)
295                         finish_connecting(c);
296                 else {
297                         logger(DEBUG_CONNECTIONS, LOG_DEBUG, "Error while connecting to %s (%s): %s", c->name, c->hostname, sockstrerror(result));
298                         terminate_connection(c, false);
299                         return;
300                 }
301         }
302
303         if (!receive_meta(c)) {
304                 terminate_connection(c, c->status.active);
305                 return;
306         }
307 }
308
309 static void sigterm_handler(int signal, short events, void *data) {
310         logger(DEBUG_ALWAYS, LOG_NOTICE, "Got %s signal", strsignal(signal));
311         event_loopexit(NULL);
312 }
313
314 static void sighup_handler(int signal, short events, void *data) {
315         logger(DEBUG_ALWAYS, LOG_NOTICE, "Got %s signal", strsignal(signal));
316         reopenlogger();
317         reload_configuration();
318 }
319
320 static void sigalrm_handler(int signal, short events, void *data) {
321         logger(DEBUG_ALWAYS, LOG_NOTICE, "Got %s signal", strsignal(signal));
322         retry();
323 }
324
325 int reload_configuration(void) {
326         char *fname;
327
328         /* Reread our own configuration file */
329
330         exit_configuration(&config_tree);
331         init_configuration(&config_tree);
332
333         if(!read_server_config()) {
334                 logger(DEBUG_ALWAYS, LOG_ERR, "Unable to reread configuration file, exitting.");
335                 event_loopexit(NULL);
336                 return EINVAL;
337         }
338
339         read_config_options(config_tree, NULL);
340
341         xasprintf(&fname, "%s" SLASH "hosts" SLASH "%s", confbase, myself->name);
342         read_config_file(config_tree, fname);
343         free(fname);
344
345         /* Parse some options that are allowed to be changed while tinc is running */
346
347         setup_myself_reloadable();
348
349         /* If StrictSubnet is set, expire deleted Subnets and read new ones in */
350
351         if(strictsubnets) {
352                 for splay_each(subnet_t, subnet, subnet_tree)
353                         subnet->expires = 1;
354
355                 load_all_subnets();
356
357                 for splay_each(subnet_t, subnet, subnet_tree) {
358                         if(subnet->expires == 1) {
359                                 send_del_subnet(everyone, subnet);
360                                 if(subnet->owner->status.reachable)
361                                         subnet_update(subnet->owner, subnet, false);
362                                 subnet_del(subnet->owner, subnet);
363                         } else if(subnet->expires == -1) {
364                                 subnet->expires = 0;
365                         } else {
366                                 send_add_subnet(everyone, subnet);
367                                 if(subnet->owner->status.reachable)
368                                         subnet_update(subnet->owner, subnet, true);
369                         }
370                 }
371         } else { /* Only read our own subnets back in */
372                 for splay_each(subnet_t, subnet, myself->subnet_tree)
373                         if(!subnet->expires)
374                                 subnet->expires = 1;
375
376                 config_t *cfg = lookup_config(config_tree, "Subnet");
377
378                 while(cfg) {
379                         subnet_t *subnet, *s2;
380
381                         if(!get_config_subnet(cfg, &subnet))
382                                 continue;
383
384                         if((s2 = lookup_subnet(myself, subnet))) {
385                                 if(s2->expires == 1)
386                                         s2->expires = 0;
387
388                                 free_subnet(subnet);
389                         } else {
390                                 subnet_add(myself, subnet);
391                                 send_add_subnet(everyone, subnet);
392                                 subnet_update(myself, subnet, true);
393                         }
394
395                         cfg = lookup_config_next(config_tree, cfg);
396                 }
397
398                 for splay_each(subnet_t, subnet, myself->subnet_tree) {
399                         if(subnet->expires == 1) {
400                                 send_del_subnet(everyone, subnet);
401                                 subnet_update(myself, subnet, false);
402                                 subnet_del(myself, subnet);
403                         }
404                 }
405         }
406
407         /* Try to make outgoing connections */
408
409         try_outgoing_connections();
410
411         /* Close connections to hosts that have a changed or deleted host config file */
412
413         for list_each(connection_t, c, connection_list) {
414                 if(c->status.control)
415                         continue;
416
417                 xasprintf(&fname, "%s" SLASH "hosts" SLASH "%s", confbase, c->name);
418                 struct stat s;
419                 if(stat(fname, &s) || s.st_mtime > last_config_check) {
420                         logger(DEBUG_CONNECTIONS, LOG_INFO, "Host config file of %s has been changed", c->name);
421                         terminate_connection(c, c->status.active);
422                 }
423                 free(fname);
424         }
425
426         last_config_check = time(NULL);
427
428         return 0;
429 }
430
431 void retry(void) {
432         for list_each(connection_t, c, connection_list) {
433                 if(c->outgoing && !c->node) {
434                         if(timeout_initialized(&c->outgoing->ev))
435                                 event_del(&c->outgoing->ev);
436                         if(c->status.connecting)
437                                 close(c->socket);
438                         c->outgoing->timeout = 0;
439                         terminate_connection(c, c->status.active);
440                 }
441         }
442 }
443
444 /*
445   this is where it all happens...
446 */
447 int main_loop(void) {
448         struct event timeout_event;
449         struct event periodic_event;
450
451         timeout_set(&timeout_event, timeout_handler, &timeout_event);
452         event_add(&timeout_event, &(struct timeval){pingtimeout, rand() % 100000});
453
454         timeout_set(&periodic_event, periodic_handler, &periodic_event);
455         event_add(&periodic_event, &(struct timeval){5, rand() % 100000});
456
457 #ifndef HAVE_MINGW
458         struct event sighup_event;
459         struct event sigterm_event;
460         struct event sigquit_event;
461         struct event sigalrm_event;
462
463         signal_set(&sighup_event, SIGHUP, sighup_handler, NULL);
464         signal_add(&sighup_event, NULL);
465         signal_set(&sigterm_event, SIGTERM, sigterm_handler, NULL);
466         signal_add(&sigterm_event, NULL);
467         signal_set(&sigquit_event, SIGQUIT, sigterm_handler, NULL);
468         signal_add(&sigquit_event, NULL);
469         signal_set(&sigalrm_event, SIGALRM, sigalrm_handler, NULL);
470         signal_add(&sigalrm_event, NULL);
471 #endif
472
473         if(event_loop(0) < 0) {
474                 logger(DEBUG_ALWAYS, LOG_ERR, "Error while waiting for input: %s", strerror(errno));
475                 return 1;
476         }
477
478 #ifndef HAVE_MINGW
479         signal_del(&sighup_event);
480         signal_del(&sigterm_event);
481         signal_del(&sigquit_event);
482         signal_del(&sigalrm_event);
483 #endif
484
485         event_del(&timeout_event);
486
487         return 0;
488 }