Use epoll() if available.
authorMathew Heard <splitice@users.noreply.github.com>
Mon, 16 Aug 2021 02:04:19 +0000 (12:04 +1000)
committerGuus Sliepen <guus@tinc-vpn.org>
Tue, 17 Aug 2021 21:32:06 +0000 (23:32 +0200)
configure.ac
src/event.c
src/net.h

index db7d1aa..e5bd288 100644 (file)
@@ -170,7 +170,7 @@ AS_IF([test "x$enable_hardening" != "xno"],
 dnl Checks for header files.
 dnl We do this in multiple stages, because unlike Linux all the other operating systems really suck and don't include their own dependencies.
 
-AC_CHECK_HEADERS([syslog.h sys/file.h sys/ioctl.h sys/mman.h sys/param.h sys/resource.h sys/socket.h sys/time.h sys/un.h sys/wait.h netdb.h arpa/inet.h dirent.h getopt.h stddef.h])
+AC_CHECK_HEADERS([syslog.h sys/file.h sys/ioctl.h sys/mman.h sys/param.h sys/resource.h sys/socket.h sys/time.h sys/un.h sys/wait.h netdb.h arpa/inet.h dirent.h getopt.h stddef.h sys/epoll.h])
 AC_CHECK_HEADERS([net/if.h net/if_types.h net/ethernet.h net/if_arp.h netinet/in_systm.h netinet/in.h netinet/in6.h netpacket/packet.h],
   [], [], [#include "$srcdir/src/have.h"]
 )
index 226a452..c547c46 100644 (file)
 */
 
 #include "system.h"
+#include "dropin.h"
+
+#ifdef HAVE_SYS_EPOLL_H
+#include <sys/epoll.h>
+#endif
+
 #include "event.h"
 #include "utils.h"
+#include "net.h"
 
 struct timeval now;
-
 #ifndef HAVE_MINGW
+
+#ifdef HAVE_SYS_EPOLL_H
+static int epollset = 0;
+#else
 static fd_set readfds;
 static fd_set writefds;
+#endif
+
 #else
 static const long READ_EVENTS = FD_READ | FD_ACCEPT | FD_CLOSE;
 static const long WRITE_EVENTS = FD_WRITE | FD_CONNECT;
@@ -33,6 +45,17 @@ static DWORD event_count = 0;
 #endif
 static bool running;
 
+#ifdef HAVE_SYS_EPOLL_H
+static inline int event_epoll_init() {
+       /* NOTE: 1024 limit is only used on ancient (pre 2.6.27) kernels.
+               Decent kernels will ignore this value making it unlimited.
+               epoll_create1 might be better, but these kernels would not be supported
+               in that case.
+       */
+       return epoll_create(1024);
+}
+#endif
+
 static int io_compare(const io_t *a, const io_t *b) {
 #ifndef HAVE_MINGW
        return a->fd - b->fd;
@@ -108,9 +131,13 @@ void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) {
 
        io_set(io, flags);
 
+#ifndef HAVE_SYS_EPOLL_H
+
        if(!splay_insert_node(&io_tree, &io->node)) {
                abort();
        }
+
+#endif
 }
 
 #ifdef HAVE_MINGW
@@ -121,6 +148,14 @@ void io_add_event(io_t *io, io_cb_t cb, void *data, WSAEVENT event) {
 #endif
 
 void io_set(io_t *io, int flags) {
+#ifdef HAVE_SYS_EPOLL_H
+
+       if(!epollset) {
+               epollset = event_epoll_init();
+       }
+
+#endif
+
        if(flags == io->flags) {
                return;
        }
@@ -132,6 +167,30 @@ void io_set(io_t *io, int flags) {
        }
 
 #ifndef HAVE_MINGW
+#ifdef HAVE_SYS_EPOLL_H
+       epoll_ctl(epollset, EPOLL_CTL_DEL, io->fd, NULL);
+
+       struct epoll_event ev = {
+               .events = 0,
+               .data.ptr = io,
+       };
+
+       if(flags & IO_READ) {
+               ev.events |= EPOLLIN;
+       }
+
+       if(flags & IO_WRITE) {
+               ev.events |= EPOLLOUT;
+       } else if(ev.events == 0) {
+               io_tree.generation++;
+               return;
+       }
+
+       if(epoll_ctl(epollset, EPOLL_CTL_ADD, io->fd, &ev) < 0) {
+               perror("epoll_ctl_add");
+       }
+
+#else
 
        if(flags & IO_READ) {
                FD_SET(io->fd, &readfds);
@@ -145,6 +204,7 @@ void io_set(io_t *io, int flags) {
                FD_CLR(io->fd, &writefds);
        }
 
+#endif
 #else
        long events = 0;
 
@@ -178,7 +238,9 @@ void io_del(io_t *io) {
        event_count--;
 #endif
 
+#ifndef HAVE_SYS_EPOLL_H
        splay_unlink_node(&io_tree, &io->node);
+#endif
        io->cb = NULL;
 }
 
@@ -287,7 +349,7 @@ void signal_del(signal_t *sig) {
 }
 #endif
 
-static struct timeval *get_time_remaining(struct timeval *diff) {
+static struct timeval *timeout_execute(struct timeval *diff) {
        gettimeofday(&now, NULL);
        struct timeval *tv = NULL;
 
@@ -314,23 +376,46 @@ bool event_loop(void) {
        running = true;
 
 #ifndef HAVE_MINGW
+
+#ifdef HAVE_SYS_EPOLL_H
+
+       if(!epollset) {
+               epollset = event_epoll_init();
+       }
+
+#else
        fd_set readable;
        fd_set writable;
+#endif
 
        while(running) {
                struct timeval diff;
-               struct timeval *tv = get_time_remaining(&diff);
+               struct timeval *tv = timeout_execute(&diff);
+#ifndef HAVE_SYS_EPOLL_H
                memcpy(&readable, &readfds, sizeof(readable));
                memcpy(&writable, &writefds, sizeof(writable));
+#endif
+
 
-               int fds = 0;
+#ifdef HAVE_SYS_EPOLL_H
+               struct epoll_event events[EPOLL_MAX_EVENTS_PER_LOOP];
+               long timeout = (tv->tv_sec * 1000) + (tv->tv_usec / 1000);
+
+               if(timeout > INT_MAX) {
+                       timeout = INT_MAX;
+               }
+
+               int n = epoll_wait(epollset, events, EPOLL_MAX_EVENTS_PER_LOOP, (int)timeout);
+#else
+               int maxfds =  0;
 
                if(io_tree.tail) {
                        io_t *last = io_tree.tail->data;
-                       fds = last->fd + 1;
+                       maxfds = last->fd + 1;
                }
 
-               int n = select(fds, &readable, &writable, NULL, tv);
+               int n = select(maxfds, &readable, &writable, NULL, tv);
+#endif
 
                if(n < 0) {
                        if(sockwouldblock(sockerrno)) {
@@ -346,6 +431,31 @@ bool event_loop(void) {
 
                unsigned int curgen = io_tree.generation;
 
+
+#ifdef HAVE_SYS_EPOLL_H
+
+               for(int i = 0; i < n; i++) {
+                       io_t *io = events[i].data.ptr;
+
+                       if(events[i].events & EPOLLOUT && io->flags & IO_WRITE) {
+                               io->cb(io->data, IO_WRITE);
+                       }
+
+                       if(curgen != io_tree.generation) {
+                               break;
+                       }
+
+                       if(events[i].events & EPOLLIN && io->flags & IO_READ) {
+                               io->cb(io->data, IO_READ);
+                       }
+
+                       if(curgen != io_tree.generation) {
+                               break;
+                       }
+               }
+
+#else
+
                for splay_each(io_t, io, &io_tree) {
                        if(FD_ISSET(io->fd, &writable)) {
                                io->cb(io->data, IO_WRITE);
@@ -356,22 +466,24 @@ bool event_loop(void) {
                        }
 
                        /*
-                          There are scenarios in which the callback will remove another io_t from the tree
-                          (e.g. closing a double connection). Since splay_each does not support that, we
-                          need to exit the loop if that happens. That's okay, since any remaining events will
-                          get picked up by the next select() call.
-                        */
+                               There are scenarios in which the callback will remove another io_t from the tree
+                               (e.g. closing a double connection). Since splay_each does not support that, we
+                               need to exit the loop if that happens. That's okay, since any remaining events will
+                               get picked up by the next select() call.
+                       */
                        if(curgen != io_tree.generation) {
                                break;
                        }
                }
+
+#endif
        }
 
 #else
 
        while(running) {
                struct timeval diff;
-               struct timeval *tv = get_time_remaining(&diff);
+               struct timeval *tv = timeout_execute(&diff);
                DWORD timeout_ms = tv ? (DWORD)(tv->tv_sec * 1000 + tv->tv_usec / 1000 + 1) : WSA_INFINITE;
 
                if(!event_count) {
index f945a1f..b179288 100644 (file)
--- a/src/net.h
+++ b/src/net.h
@@ -28,6 +28,8 @@
 #include "digest.h"
 #include "event.h"
 
+#define EPOLL_MAX_EVENTS_PER_LOOP 32
+
 #ifdef ENABLE_JUMBOGRAMS
 #define MTU 9018        /* 9000 bytes payload + 14 bytes ethernet header + 4 bytes VLAN tag */
 #else