From d2949381713c35210a43391524494c639f6f1d48 Mon Sep 17 00:00:00 2001 From: Kirill Isakov Date: Mon, 23 May 2022 19:54:41 +0600 Subject: [PATCH] Add optional systemd integration - startup & shutdown notifications - optional watchdog with auto-restart on hang Watchdog is enabled by using WatchdogSec in systemd unit file. --- meson.build | 8 +++++ src/event.c | 4 +-- src/event.h | 4 +-- src/linux/meson.build | 7 ++++ src/linux/watchdog.c | 71 ++++++++++++++++++++++++++++++++++++++++ src/net.c | 4 +++ src/tincd.c | 9 +++++ src/watchdog.h | 15 +++++++++ systemd/tinc@.service.in | 3 +- 9 files changed, 120 insertions(+), 5 deletions(-) create mode 100644 src/linux/watchdog.c create mode 100644 src/watchdog.h diff --git a/meson.build b/meson.build index 5200ab42..7354953d 100644 --- a/meson.build +++ b/meson.build @@ -165,3 +165,11 @@ run_target('lint', command: [ python, '@SOURCE_ROOT@/lint.py', ]) + +if meson_version.version_compare('>=0.53') + summary({ + 'prefix': prefix, + 'sandbox': cdata.has('HAVE_SANDBOX'), + 'watchdog': cdata.has('HAVE_WATCHDOG'), + }, bool_yn: true, section: 'System') +endif diff --git a/src/event.c b/src/event.c index 2710a8b8..463735f2 100644 --- a/src/event.c +++ b/src/event.c @@ -245,7 +245,7 @@ void io_del(io_t *io) { io->cb = NULL; } -void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, struct timeval *tv) { +void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, const struct timeval *tv) { timeout->cb = cb; timeout->data = data; timeout->node.data = timeout; @@ -253,7 +253,7 @@ void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, struct timeval timeout_set(timeout, tv); } -void timeout_set(timeout_t *timeout, struct timeval *tv) { +void timeout_set(timeout_t *timeout, const struct timeval *tv) { if(timerisset(&timeout->tv)) { splay_unlink_node(&timeout_tree, &timeout->node); } diff --git a/src/event.h b/src/event.h index 70d86f51..1443ebca 100644 --- a/src/event.h +++ b/src/event.h @@ -63,9 +63,9 @@ extern void io_add_event(io_t *io, io_cb_t cb, void *data, WSAEVENT event); extern void io_del(io_t *io); extern void io_set(io_t *io, int flags); -extern void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, struct timeval *tv); +extern void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, const struct timeval *tv); extern void timeout_del(timeout_t *timeout); -extern void timeout_set(timeout_t *timeout, struct timeval *tv); +extern void timeout_set(timeout_t *timeout, const struct timeval *tv); extern void signal_add(signal_t *sig, signal_cb_t cb, void *data, int signum); extern void signal_del(signal_t *sig); diff --git a/src/linux/meson.build b/src/linux/meson.build index 8c0f1377..1b94f95c 100644 --- a/src/linux/meson.build +++ b/src/linux/meson.build @@ -8,6 +8,13 @@ check_functions += 'recvmmsg' src_tincd += files('device.c') +dep_libsystemd = dependency('libsystemd', required: opt_systemd) +if dep_libsystemd.found() + src_tincd += files('watchdog.c') + deps_tincd += dep_libsystemd + cdata.set('HAVE_WATCHDOG', 1) +endif + if opt_uml src_tincd += files('uml_device.c') cdata.set('ENABLE_UML', 1) diff --git a/src/linux/watchdog.c b/src/linux/watchdog.c new file mode 100644 index 00000000..ab7fa736 --- /dev/null +++ b/src/linux/watchdog.c @@ -0,0 +1,71 @@ +#include "../system.h" + +#include + +#include "../event.h" +#include "../logger.h" +#include "../watchdog.h" + +static timeout_t timer; +static struct timeval interval; + +static uint64_t second_to_microsecond(time_t second) { + return second * 1000000; +} + +static time_t microsecond_to_second(uint64_t micros) { + return (time_t)(micros / 1000000); +} + +// Ignore errors from sd_notify() since there's nothing we can do if it breaks anyway. +// Also, there's this passage in `man sd_notify.3`: +// In order to support both service managers that implement this scheme and those +// which do not, it is generally recommended to ignore the return value of this call. +void watchdog_ping(void) { + sd_notify(false, "WATCHDOG=1"); +} + +static void watchdog_handler(void *data) { + (void)data; + watchdog_ping(); + timeout_set(&timer, &interval); +} + +static bool watchdog_register(void) { + uint64_t timeout = 0; + + if(sd_watchdog_enabled(false, &timeout) <= 0 || !timeout) { + return false; + } + + if(timeout < second_to_microsecond(2)) { + logger(DEBUG_ALWAYS, LOG_WARNING, "Consider using a higher watchdog timeout. Spurious failures may occur."); + } + + // Send notifications twice per timeout period + timeout /= 2; + + interval.tv_sec = microsecond_to_second(timeout); + + if(interval.tv_sec) { + timeout -= second_to_microsecond(interval.tv_sec); + } + + interval.tv_usec = (suseconds_t)timeout; + + timeout_add(&timer, watchdog_handler, &timer, &interval); + watchdog_ping(); + + return true; +} + +void watchdog_start(void) { + sd_notify(false, "READY=1"); + bool enabled = watchdog_register(); + logger(DEBUG_ALWAYS, LOG_INFO, "Watchdog %s", enabled ? "started" : "is disabled"); +} + +void watchdog_stop(void) { + sd_notify(false, "STOPPING=1"); + timeout_del(&timer); +} diff --git a/src/net.c b/src/net.c index 9669bc97..dd86cf27 100644 --- a/src/net.c +++ b/src/net.c @@ -35,6 +35,7 @@ #include "protocol.h" #include "subnet.h" #include "utils.h" +#include "watchdog.h" int contradicting_add_edge = 0; int contradicting_del_edge = 0; @@ -195,6 +196,9 @@ static void timeout_handler(void *data) { by default */ if(sleep_time > 2 * udp_discovery_timeout) { +#ifdef HAVE_WATCHDOG + watchdog_ping(); +#endif logger(DEBUG_ALWAYS, LOG_ERR, "Awaking from dead after %ld seconds of sleep", sleep_time); /* Do not send any packets to tinc after we wake up. diff --git a/src/tincd.c b/src/tincd.c index 20d0e297..8a4c1f39 100644 --- a/src/tincd.c +++ b/src/tincd.c @@ -56,6 +56,7 @@ #include "version.h" #include "random.h" #include "sandbox.h" +#include "watchdog.h" /* If nonzero, display usage information and exit. */ static bool show_help = false; @@ -694,8 +695,16 @@ int main2(int argc, char **argv) { try_outgoing_connections(); +#ifdef HAVE_WATCHDOG + watchdog_start(); +#endif + status = main_loop(); +#ifdef HAVE_WATCHDOG + watchdog_stop(); +#endif + /* Shutdown properly. */ end: diff --git a/src/watchdog.h b/src/watchdog.h new file mode 100644 index 00000000..9865451f --- /dev/null +++ b/src/watchdog.h @@ -0,0 +1,15 @@ +#ifndef TINC_WATCHDOG_H +#define TINC_WATCHDOG_H + +// Start sending keepalive notifications to watchdog. +// Called after initialization is finished before entering the event loop. +void watchdog_start(void); + +// Stop sending keepalive notifications. +// Called shortly before exiting. +void watchdog_stop(void); + +// Send keepalive notification. +void watchdog_ping(void); + +#endif // TINC_WATCHDOG_H diff --git a/systemd/tinc@.service.in b/systemd/tinc@.service.in index 2d695caa..95e86920 100644 --- a/systemd/tinc@.service.in +++ b/systemd/tinc@.service.in @@ -7,7 +7,7 @@ PartOf=tinc.service ReloadPropagatedFrom=tinc.service [Service] -Type=simple +Type=notify WorkingDirectory=@sysconfdir@/tinc/%i ExecStart=@sbindir@/tincd -n %i -D ExecReload=@sbindir@/tinc -n %i reload @@ -15,6 +15,7 @@ KillMode=mixed Restart=on-failure RestartSec=5 TimeoutStopSec=5 +WatchdogSec=10 [Install] WantedBy=tinc.service -- 2.20.1