5d0bcef0b265c6067157e2674b18a07f8d647e70
[tinc] / event.c
1 /*
2     event.c -- I/O, timeout and signal event handling
3     Copyright (C) 2012-2022 Guus Sliepen <guus@tinc-vpn.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "system.h"
21
22 #include <assert.h>
23
24 #ifdef HAVE_SYS_EPOLL_H
25 #include <sys/epoll.h>
26 #endif
27
28 #include "event.h"
29 #include "utils.h"
30 #include "net.h"
31
32 struct timeval now;
33 #ifndef HAVE_MINGW
34
35 #ifdef HAVE_SYS_EPOLL_H
36 static int epollset = 0;
37 #else
38 static fd_set readfds;
39 static fd_set writefds;
40 #endif
41
42 #else
43 static const long READ_EVENTS = FD_READ | FD_ACCEPT | FD_CLOSE;
44 static const long WRITE_EVENTS = FD_WRITE | FD_CONNECT;
45 static DWORD event_count = 0;
46 #endif
47 static bool running;
48
49 #ifdef HAVE_SYS_EPOLL_H
50 static inline int event_epoll_init(void) {
51         /* NOTE: 1024 limit is only used on ancient (pre 2.6.27) kernels.
52                 Decent kernels will ignore this value making it unlimited.
53                 epoll_create1 might be better, but these kernels would not be supported
54                 in that case.
55         */
56         return epoll_create(1024);
57 }
58 #endif
59
60 static int io_compare(const io_t *a, const io_t *b) {
61 #ifndef HAVE_MINGW
62         return a->fd - b->fd;
63 #else
64
65         if(a->event < b->event) {
66                 return -1;
67         }
68
69         if(a->event > b->event) {
70                 return 1;
71         }
72
73         return 0;
74 #endif
75 }
76
77 static int timeout_compare(const timeout_t *a, const timeout_t *b) {
78         struct timeval diff;
79         timersub(&a->tv, &b->tv, &diff);
80
81         if(diff.tv_sec < 0) {
82                 return -1;
83         }
84
85         if(diff.tv_sec > 0) {
86                 return 1;
87         }
88
89         if(diff.tv_usec < 0) {
90                 return -1;
91         }
92
93         if(diff.tv_usec > 0) {
94                 return 1;
95         }
96
97         if(a < b) {
98                 return -1;
99         }
100
101         if(a > b) {
102                 return 1;
103         }
104
105         return 0;
106 }
107
108 static splay_tree_t io_tree = {.compare = (splay_compare_t)io_compare};
109 static splay_tree_t timeout_tree = {.compare = (splay_compare_t)timeout_compare};
110
111 void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) {
112         if(io->cb) {
113                 return;
114         }
115
116         io->fd = fd;
117 #ifdef HAVE_MINGW
118
119         if(io->fd != -1) {
120                 io->event = WSACreateEvent();
121
122                 if(io->event == WSA_INVALID_EVENT) {
123                         abort();
124                 }
125         }
126
127         event_count++;
128 #endif
129         io->cb = cb;
130         io->data = data;
131         io->node.data = io;
132
133         io_set(io, flags);
134
135 #ifndef HAVE_SYS_EPOLL_H
136
137         if(!splay_insert_node(&io_tree, &io->node)) {
138                 abort();
139         }
140
141 #endif
142 }
143
144 #ifdef HAVE_MINGW
145 void io_add_event(io_t *io, io_cb_t cb, void *data, WSAEVENT event) {
146         io->event = event;
147         io_add(io, cb, data, -1, 0);
148 }
149 #endif
150
151 void io_set(io_t *io, int flags) {
152 #ifdef HAVE_SYS_EPOLL_H
153
154         if(!epollset) {
155                 epollset = event_epoll_init();
156         }
157
158 #endif
159
160         if(flags == io->flags) {
161                 return;
162         }
163
164         io->flags = flags;
165
166         if(io->fd == -1) {
167                 return;
168         }
169
170 #ifndef HAVE_MINGW
171 #ifdef HAVE_SYS_EPOLL_H
172         epoll_ctl(epollset, EPOLL_CTL_DEL, io->fd, NULL);
173
174         struct epoll_event ev = {
175                 .events = 0,
176                 .data.ptr = io,
177         };
178
179         if(flags & IO_READ) {
180                 ev.events |= EPOLLIN;
181         }
182
183         if(flags & IO_WRITE) {
184                 ev.events |= EPOLLOUT;
185         } else if(ev.events == 0) {
186                 io_tree.generation++;
187                 return;
188         }
189
190         if(epoll_ctl(epollset, EPOLL_CTL_ADD, io->fd, &ev) < 0) {
191                 perror("epoll_ctl_add");
192         }
193
194 #else
195
196         if(flags & IO_READ) {
197                 FD_SET(io->fd, &readfds);
198         } else {
199                 FD_CLR(io->fd, &readfds);
200         }
201
202         if(flags & IO_WRITE) {
203                 FD_SET(io->fd, &writefds);
204         } else {
205                 FD_CLR(io->fd, &writefds);
206         }
207
208 #endif
209 #else
210         long events = 0;
211
212         if(flags & IO_WRITE) {
213                 events |= WRITE_EVENTS;
214         }
215
216         if(flags & IO_READ) {
217                 events |= READ_EVENTS;
218         }
219
220         if(WSAEventSelect(io->fd, io->event, events) != 0) {
221                 abort();
222         }
223
224 #endif
225 }
226
227 void io_del(io_t *io) {
228         if(!io->cb) {
229                 return;
230         }
231
232         io_set(io, 0);
233 #ifdef HAVE_MINGW
234
235         if(io->fd != -1 && WSACloseEvent(io->event) == FALSE) {
236                 abort();
237         }
238
239         event_count--;
240 #endif
241
242 #ifndef HAVE_SYS_EPOLL_H
243         splay_unlink_node(&io_tree, &io->node);
244 #endif
245         io->cb = NULL;
246 }
247
248 void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, struct timeval *tv) {
249         timeout->cb = cb;
250         timeout->data = data;
251         timeout->node.data = timeout;
252
253         timeout_set(timeout, tv);
254 }
255
256 void timeout_set(timeout_t *timeout, struct timeval *tv) {
257         if(timerisset(&timeout->tv)) {
258                 splay_unlink_node(&timeout_tree, &timeout->node);
259         }
260
261         if(!now.tv_sec) {
262                 gettimeofday(&now, NULL);
263         }
264
265         timeradd(&now, tv, &timeout->tv);
266
267         if(!splay_insert_node(&timeout_tree, &timeout->node)) {
268                 abort();
269         }
270 }
271
272 void timeout_del(timeout_t *timeout) {
273         if(!timeout->cb) {
274                 return;
275         }
276
277         splay_unlink_node(&timeout_tree, &timeout->node);
278         timeout->cb = 0;
279         timeout->tv = (struct timeval) {
280                 0, 0
281         };
282 }
283
284 #ifndef HAVE_MINGW
285
286 // From Matz's Ruby
287 #ifndef NSIG
288 # define NSIG (_SIGMAX + 1)      /* For QNX */
289 #endif
290
291
292 static io_t signalio;
293 static int pipefd[2] = {-1, -1};
294 static signal_t *signal_handle[NSIG + 1] = {NULL};
295
296 static void signal_handler(int signum) {
297         unsigned char num = signum;
298
299         if(write(pipefd[1], &num, 1) != 1) {
300                 // Pipe full or broken, nothing we can do about it.
301         }
302 }
303
304 static void signalio_handler(void *data, int flags) {
305         (void)data;
306         (void)flags;
307         unsigned char signum;
308
309         if(read(pipefd[0], &signum, 1) != 1) {
310                 return;
311         }
312
313         signal_t *sig = signal_handle[signum];
314
315         if(sig) {
316                 sig->cb(sig->data);
317         }
318 }
319
320 static void pipe_init(void) {
321         if(!pipe(pipefd)) {
322                 io_add(&signalio, signalio_handler, NULL, pipefd[0], IO_READ);
323         }
324 }
325
326 void signal_add(signal_t *sig, signal_cb_t cb, void *data, int signum) {
327         if(sig->cb) {
328                 return;
329         }
330
331         sig->signum = signum;
332         sig->cb = cb;
333         sig->data = data;
334
335         if(pipefd[0] == -1) {
336                 pipe_init();
337         }
338
339         signal(signum, signal_handler);
340
341         signal_handle[signum] = sig;
342 }
343
344 void signal_del(signal_t *sig) {
345         if(!sig->cb) {
346                 return;
347         }
348
349         signal(sig->signum, SIG_DFL);
350
351         signal_handle[sig->signum] = NULL;
352         sig->cb = NULL;
353 }
354 #endif
355
356 static struct timeval *timeout_execute(struct timeval *diff) {
357         gettimeofday(&now, NULL);
358         struct timeval *tv = NULL;
359
360         while(timeout_tree.head) {
361                 timeout_t *timeout = timeout_tree.head->data;
362                 timersub(&timeout->tv, &now, diff);
363
364                 if(diff->tv_sec < 0) {
365                         timeout->cb(timeout->data);
366
367                         if(timercmp(&timeout->tv, &now, <)) {
368                                 timeout_del(timeout);
369                         }
370                 } else {
371                         tv = diff;
372                         break;
373                 }
374         }
375
376         return tv;
377 }
378
379 bool event_loop(void) {
380         running = true;
381
382 #ifndef HAVE_MINGW
383
384 #ifdef HAVE_SYS_EPOLL_H
385
386         if(!epollset) {
387                 epollset = event_epoll_init();
388         }
389
390 #else
391         fd_set readable;
392         fd_set writable;
393 #endif
394
395         while(running) {
396                 struct timeval diff;
397                 struct timeval *tv = timeout_execute(&diff);
398 #ifndef HAVE_SYS_EPOLL_H
399                 memcpy(&readable, &readfds, sizeof(readable));
400                 memcpy(&writable, &writefds, sizeof(writable));
401 #endif
402
403
404 #ifdef HAVE_SYS_EPOLL_H
405                 struct epoll_event events[EPOLL_MAX_EVENTS_PER_LOOP];
406                 long timeout = (tv->tv_sec * 1000) + (tv->tv_usec / 1000);
407
408                 if(timeout > INT_MAX) {
409                         timeout = INT_MAX;
410                 }
411
412                 int n = epoll_wait(epollset, events, EPOLL_MAX_EVENTS_PER_LOOP, (int)timeout);
413 #else
414                 int maxfds =  0;
415
416                 if(io_tree.tail) {
417                         io_t *last = io_tree.tail->data;
418                         maxfds = last->fd + 1;
419                 }
420
421                 int n = select(maxfds, &readable, &writable, NULL, tv);
422 #endif
423
424                 if(n < 0) {
425                         if(sockwouldblock(sockerrno)) {
426                                 continue;
427                         } else {
428                                 return false;
429                         }
430                 }
431
432                 if(!n) {
433                         continue;
434                 }
435
436                 unsigned int curgen = io_tree.generation;
437
438
439 #ifdef HAVE_SYS_EPOLL_H
440
441                 for(int i = 0; i < n; i++) {
442                         io_t *io = events[i].data.ptr;
443
444                         if(events[i].events & EPOLLOUT && io->flags & IO_WRITE) {
445                                 io->cb(io->data, IO_WRITE);
446                         }
447
448                         if(curgen != io_tree.generation) {
449                                 break;
450                         }
451
452                         if(events[i].events & EPOLLIN && io->flags & IO_READ) {
453                                 io->cb(io->data, IO_READ);
454                         }
455
456                         if(curgen != io_tree.generation) {
457                                 break;
458                         }
459                 }
460
461 #else
462
463                 for splay_each(io_t, io, &io_tree) {
464                         if(FD_ISSET(io->fd, &writable)) {
465                                 io->cb(io->data, IO_WRITE);
466                         } else if(FD_ISSET(io->fd, &readable)) {
467                                 io->cb(io->data, IO_READ);
468                         } else {
469                                 continue;
470                         }
471
472                         /*
473                                 There are scenarios in which the callback will remove another io_t from the tree
474                                 (e.g. closing a double connection). Since splay_each does not support that, we
475                                 need to exit the loop if that happens. That's okay, since any remaining events will
476                                 get picked up by the next select() call.
477                         */
478                         if(curgen != io_tree.generation) {
479                                 break;
480                         }
481                 }
482
483 #endif
484         }
485
486 #else
487         assert(WSA_WAIT_EVENT_0 == 0);
488
489         while(running) {
490                 struct timeval diff;
491                 struct timeval *tv = timeout_execute(&diff);
492                 DWORD timeout_ms = tv ? (DWORD)(tv->tv_sec * 1000 + tv->tv_usec / 1000 + 1) : WSA_INFINITE;
493
494                 if(!event_count) {
495                         Sleep(timeout_ms);
496                         continue;
497                 }
498
499                 /*
500                    For some reason, Microsoft decided to make the FD_WRITE event edge-triggered instead of level-triggered,
501                    which is the opposite of what select() does. In practice, that means that if a FD_WRITE event triggers,
502                    it will never trigger again until a send() returns EWOULDBLOCK. Since the semantics of this event loop
503                    is that write events are level-triggered (i.e. they continue firing until the socket is full), we need
504                    to emulate these semantics by making sure we fire each IO_WRITE that is still writeable.
505
506                    Note that technically FD_CLOSE has the same problem, but it's okay because user code does not rely on
507                    this event being fired again if ignored.
508                 */
509                 unsigned int curgen = io_tree.generation;
510
511                 for splay_each(io_t, io, &io_tree) {
512                         if(io->flags & IO_WRITE && send(io->fd, NULL, 0, 0) == 0) {
513                                 io->cb(io->data, IO_WRITE);
514
515                                 if(curgen != io_tree.generation) {
516                                         break;
517                                 }
518                         }
519                 }
520
521                 if(event_count > WSA_MAXIMUM_WAIT_EVENTS) {
522                         WSASetLastError(WSA_INVALID_PARAMETER);
523                         return(false);
524                 }
525
526                 WSAEVENT events[WSA_MAXIMUM_WAIT_EVENTS];
527                 io_t *io_map[WSA_MAXIMUM_WAIT_EVENTS];
528                 DWORD event_index = 0;
529
530                 for splay_each(io_t, io, &io_tree) {
531                         events[event_index] = io->event;
532                         io_map[event_index] = io;
533                         event_index++;
534                 }
535
536                 /*
537                  * If the generation number changes due to event addition
538                  * or removal by a callback we restart the loop.
539                  */
540                 curgen = io_tree.generation;
541
542                 for(DWORD event_offset = 0; event_offset < event_count;) {
543                         DWORD result = WSAWaitForMultipleEvents(event_count - event_offset, &events[event_offset], FALSE, timeout_ms, FALSE);
544
545                         if(result == WSA_WAIT_TIMEOUT) {
546                                 break;
547                         }
548
549                         if(result >= event_count - event_offset) {
550                                 return false;
551                         }
552
553                         /* Look up io in the map by index. */
554                         event_index = result + event_offset;
555                         io_t *io = io_map[event_index];
556
557                         if(io->fd == -1) {
558                                 io->cb(io->data, 0);
559
560                                 if(curgen != io_tree.generation) {
561                                         break;
562                                 }
563                         } else {
564                                 WSANETWORKEVENTS network_events;
565
566                                 if(WSAEnumNetworkEvents(io->fd, io->event, &network_events) != 0) {
567                                         return(false);
568                                 }
569
570                                 if(network_events.lNetworkEvents & READ_EVENTS) {
571                                         io->cb(io->data, IO_READ);
572
573                                         if(curgen != io_tree.generation) {
574                                                 break;
575                                         }
576                                 }
577
578                                 /*
579                                     The fd might be available for write too. However, if we already fired the read callback, that
580                                     callback might have deleted the io (e.g. through terminate_connection()), so we can't fire the
581                                     write callback here. Instead, we loop back and let the writable io loop above handle it.
582                                  */
583                         }
584
585                         /* Continue checking the rest of the events. */
586                         event_offset = event_index + 1;
587
588                         /* Just poll the next time through. */
589                         timeout_ms = 0;
590                 }
591         }
592
593 #endif
594
595         return true;
596 }
597
598 void event_exit(void) {
599         running = false;
600 }