From: Etienne Dechamps Date: Sat, 3 Dec 2016 23:13:46 +0000 (+0000) Subject: On Windows, don't cancel I/O when disabling the device. X-Git-Tag: release-1.1pre15~40^2 X-Git-Url: https://tinc-vpn.org/git/browse?p=tinc;a=commitdiff_plain;h=d21d97eaf5db9e848d8eee28784c4f54af85f43d On Windows, don't cancel I/O when disabling the device. I have observed cases where disable_device() can get stuck on the GetOverlappedResult() call, especially when the computer is waking up from sleep. This is problematic when combined with DeviceStandby=yes: other_side (1.2.3.4 port 655) didn't respond to PING in 5 seconds Closing connection with other_side (1.2.3.4 port 655) Disabling Windows tap device gdb reveals the following stack trace: #0 0x77c7dd3c in ?? () #1 0x7482aad0 in KERNELBASE!GetOverlappedResult () from C:\WINDOWS\SysWoW64\KernelBase.dll #2 0x0043c343 in disable_device () at mingw/device.c:244 #3 0x0040fcee in device_disable () at net_setup.c:759 #4 0x00405bb5 in check_reachability () at graph.c:292 #5 0x00405be2 in graph () at graph.c:301 #6 0x004088db in terminate_connection (c=0x4dea5c0, report=true) at net.c:108 #7 0x00408aed in timeout_handler (data=0x5af0c0 ) at net.c:168 #8 0x00403af8 in get_time_remaining (diff=0x2a8fd64) at event.c:239 #9 0x00403b6c in event_loop () at event.c:303 #10 0x00409904 in main_loop () at net.c:461 #11 0x00424a95 in main2 (argc=6, argv=0x2b42a60) at tincd.c:489 #12 0x00424788 in main (argc=6, argv=0x2b42a60) at tincd.c:416 This is with TAP-Win32 9.0.0.9. I suspect driver bugs related to sleep. In any case, this commit fixes the issue by cancelling I/O only when the entire tinc process is being gracefully shut down, as opposed to every time the device is disabled. Thankfully, the driver seems to be perfectly fine with this code issuing TAP_IOCTL_SET_MEDIA_STATUS ioctls while there are I/O operations inflight. --- diff --git a/src/mingw/device.c b/src/mingw/device.c index b0476155..dfdb9641 100644 --- a/src/mingw/device.c +++ b/src/mingw/device.c @@ -214,6 +214,9 @@ static bool setup_device(void) { logger(DEBUG_ALWAYS, LOG_INFO, "%s (%s) is a %s", device, iface, device_info); + device_read_overlapped.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); + device_write_overlapped.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); + return true; } @@ -226,9 +229,6 @@ static void enable_device(void) { /* We don't use the write event directly, but GetOverlappedResult() does, internally. */ - device_read_overlapped.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); - device_write_overlapped.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); - io_add_event(&device_read_io, device_handle_read, NULL, device_read_overlapped.hEvent); device_issue_read(); } @@ -237,6 +237,19 @@ static void disable_device(void) { logger(DEBUG_ALWAYS, LOG_INFO, "Disabling %s", device_info); io_del(&device_read_io); + + ULONG status = 0; + DWORD len; + DeviceIoControl(device_handle, TAP_IOCTL_SET_MEDIA_STATUS, &status, sizeof status, &status, sizeof status, &len, NULL); + + /* Note that we don't try to cancel ongoing I/O here - we just stop listening. + This is because some TAP-Win32 drivers don't seem to handle cancellation very well, + especially when combined with other events such as the computer going to sleep - cases + were observed where the GetOverlappedResult() would just block indefinitely and never + return in that case. */ +} + +static void close_device(void) { CancelIo(device_handle); /* According to MSDN, CancelIo() does not necessarily wait for the operation to complete. @@ -253,11 +266,6 @@ static void disable_device(void) { CloseHandle(device_read_overlapped.hEvent); CloseHandle(device_write_overlapped.hEvent); - ULONG status = 0; - DeviceIoControl(device_handle, TAP_IOCTL_SET_MEDIA_STATUS, &status, sizeof status, &status, sizeof status, &len, NULL); -} - -static void close_device(void) { CloseHandle(device_handle); device_handle = INVALID_HANDLE_VALUE; free(device); device = NULL;