[prev in list] [next in list] [prev in thread] [next in thread] 

List:       sanlock-devel
Subject:    [sanlock] 02/02: wdmd: close watchdog when not used
From:       pagure () pagure ! io
Date:       2022-11-15 16:11:55
Message-ID: 20221115161153.892942D0D5D6 () pagure02 ! fedoraproject ! org
[Download RAW message or body]

This is an automated email from the git hooks/post-receive script.

teigland pushed a commit to branch master
in repository sanlock.

commit 45563a6e0c9d388f58178e03c9dd0b2708b2969c
Author: David Teigland <teigland@redhat.com>
AuthorDate: Mon Nov 14 16:04:39 2022 -0600

    wdmd: close watchdog when not used
    
    Previously, the watchdog would remain open with keepalives
    even after all clients (sanlock lockspaces) were removed.
    This was an unnecessary liability in case it couldn't be
    kept alive while not needed.  It also prevented sanlock
    from being restarted using a new watchdog timeout.
---
 wdmd/main.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/wdmd/main.c b/wdmd/main.c
index e89ab5e..3c60b4e 100644
--- a/wdmd/main.c
+++ b/wdmd/main.c
@@ -101,6 +101,7 @@ struct client {
 	int pid;
 	int pid_dead;
 	int refcount;
+	int internal;
 	uint64_t renewal;
 	uint64_t expire;
 	void *workfn;
@@ -281,12 +282,12 @@ static void dump_debug(int fd)
 	now = monotime();
 
 	memset(line, 0, sizeof(line));
-	snprintf(line, 255, "wdmd %d socket_gid %d high_priority %d now %llu last_keepalive \
%llu last_closeunclean %llu allow_scripts %d kill_script_sec %d\n", +	snprintf(line, \
255, "wdmd %d socket_gid %d high_priority %d now %llu last_keepalive %llu \
last_closeunclean %llu allow_scripts %d kill_script_sec %d fire_timeout %d\n",  \
getpid(), socket_gid, high_priority,  (unsigned long long)now,
 		 (unsigned long long)last_keepalive,
 		 (unsigned long long)last_closeunclean,
-		 allow_scripts, kill_script_sec);
+		 allow_scripts, kill_script_sec, fire_timeout);
 
 	line_len = strlen(line);
 	strncat(debug_buf, line, LINE_SIZE);
@@ -648,6 +649,7 @@ static int setup_clients(void)
 
 	ci = client_add(fd, process_listener, client_pid_dead);
 	strncpy(client[ci].name, "listen", WDMD_NAME_SIZE);
+	client[ci].internal = 1;
 	return 0;
 }
 
@@ -731,6 +733,25 @@ static int active_clients(void)
 	return 0;
 }
 
+static void count_clients(int *active, int *external)
+{
+	int act = 0, ext = 0;
+	int i;
+
+	for (i = 0; i < client_size; i++) {
+		if (!client[i].used)
+			continue;
+		if (client[i].refcount)
+			act++;
+		if (!client[i].internal)
+			ext++;
+	}
+	if (active)
+		*active = act;
+	if (external)
+		*external = ext;
+}
+
 
 #ifdef TEST_FILES
 #define FILES_DIR "/run/wdmd/test_files"
@@ -1447,6 +1468,7 @@ static int setup_signals(void)
 
 	ci = client_add(fd, process_signals, client_pid_dead);
 	strncpy(client[ci].name, "signal", WDMD_NAME_SIZE);
+	client[ci].internal = 1;
 	return 0;
 }
 
@@ -1490,6 +1512,8 @@ static int test_loop(void)
 	void (*workfn) (int ci);
 	void (*deadfn) (int ci);
 	uint64_t test_time;
+	int resetting = 0;
+	int active_usage, external_usage;
 	int poll_timeout;
 	int sleep_seconds;
 	int fail_count;
@@ -1520,7 +1544,9 @@ static int test_loop(void)
 			}
 		}
 
-		if (daemon_quit && !active_clients())
+		count_clients(&active_usage, &external_usage);
+
+		if (daemon_quit && !active_usage)
 			break;
 
 		/*
@@ -1529,6 +1555,31 @@ static int test_loop(void)
 		if (!test_loop_enable)
 			continue;
 
+		/*
+		 * active_usage are client connections with a refcount.
+		 * external_usage are any clients other than internal.
+		 * (open_watchdog happens with external but not active
+		 * connections.)
+		 *
+		 * checking resetting here is critical to avoiding
+		 * unnecessary resets: while in recovery mode we
+		 * have done close_watchdog_unclean, then all clients
+		 * are cleared, and we need the loop below to see
+		 * no further failures and reopen and pet the watchdog
+		 * again to avoid a reset.  After it's been reopened,
+		 * and no longer used due to all clients being cleared,
+		 * then it's ok to get here and close cleanly.
+		 */
+		if (!active_usage && !external_usage && !resetting) {
+			log_debug("close watchdog unused");
+			close_watchdog();
+			test_loop_enable = 0;
+			test_interval = standard_test_interval;
+			poll_timeout = test_interval * 1000;
+			test_time = 0;
+			continue;
+		}
+
 		if (monotime() - test_time >= test_interval) {
 			test_time = monotime();
 			log_debug("test_time %llu",
@@ -1549,6 +1600,7 @@ static int test_loop(void)
 				}
 
 				test_interval = standard_test_interval;
+				resetting = 0;
 			} else {
 				/* If we can patch the kernel so that close
 				   does not generate a ping, then we can skip
@@ -1557,6 +1609,7 @@ static int test_loop(void)
 				close_watchdog_unclean();
 
 				test_interval = RECOVER_TEST_INTERVAL;
+				resetting = 1;
 			}
 		}
 

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.
_______________________________________________
sanlock-devel mailing list -- sanlock-devel@lists.fedorahosted.org
To unsubscribe send an email to sanlock-devel-leave@lists.fedorahosted.org
Fedora Code of Conduct: https://docs.fedoraproject.org/en-US/project/code-of-conduct/
List Guidelines: https://fedoraproject.org/wiki/Mailing_list_guidelines
List Archives: https://lists.fedorahosted.org/archives/list/sanlock-devel@lists.fedorahosted.org
 Do not reply to spam, report it: https://pagure.io/fedora-infrastructure/new_issue


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic