[prev in list] [next in list] [prev in thread] [next in thread] 

List:       redhat-linux-cluster
Subject:    Re: [Linux-cluster] [RFC][PATCH] Add ability to freeze a service.
From:       Simone Gotti <simone.gotti () email ! it>
Date:       2007-04-25 18:52:08
Message-ID: 1177527128.14525.15.camel () localhost
[Download RAW message or body]

On Mon, 2007-04-23 at 16:02 -0400, Lon Hohberger wrote:
> On Mon, Apr 23, 2007 at 11:15:22AM +0200, Simone Gotti wrote:
> > Hi,
> > 
> > like discussed with lon on IRC I'm trying to add to rgmanager the
> > ability to freeze a service. I worked on it in these days and did an
> > example patch. Here is how I think what a "freeze" can be and, of
> > course, it can be implemented in many other ways so it's only an
> > example.
> 
> Hi,
> 
> couple of comments -
> 
Hi Lon,

> (1) s/freezed/frozen/ig  :)
Uh... :( I hope I fixed them all now without adding other language errors.

> (2) svc_status_inquiry shouldn't call rg_lock(); you should use
> get_rg_state_local() instead of rg_lock/get_rg_state/rg_unlock
done. I didn't knew what do if its return value was != 0 so I logged end exited. Is \
it right?

> Note that svc_status_inquiry isn't even used yet, so the behavior
> might change at a later date ;)
Ok.

> (3.a) Warning: The addition of rs_flags to rg_state_t will break
> compatibility with existing software (which is fine in -HEAD).  It
> would be easier to cope with this change (that is- make upgrades from
> old->new work) if you added the rs_flags field after the
> rs_transition field. 
> 
> (3.b) If you don't want to do that, then you need to add another 32-bits
> worth of data (could be just a "pad" field) before rs_transition because
> the kernel on ia64 machines will complain if they read a 64-bit int and
> it's not aligned on a 64-bit boundary.
I implemented 3.a. So it should be easily backportable. I wasn't aware of the ia64 \
alignment problems so thanks for the explanation.

> Aside from that, it looks like you got it right for what you wanted it
> to do.  I can fix (1) (language stuff) if you fix (2) and (3).
I tried also to fix (1) (I hope).
In the meantime I fixed (like you reported on IRC) the missing changes
to clustat.c:xml_rg_state and enhanced a little the default clustat
output adding a "Flags" column.
I added a parameter "separator" to rg_flags_str so it can be defined.
For example in the default clustat I'm using a comma separated list
while in the xml output I'm using a space as separator (dunno if this is
right to do with xml).

Thanks!
Bye!

> -- Lon
> 
-- 
Simone Gotti

 
 
 --
 Email.it, the professional e-mail, gratis per te: http://www.email.it/f
 
 Sponsor:
 Vuoi diventare un vero esperto sul Controllo di Gestione? Scopri come nella tua \
azienda puoi migliorare gli utili e ridurre le spese  Clicca qui: \
http://adv.email.it/cgi-bin/foclick.cgi?mid=6197&d=25-4


["rgmanager-cvsHEAD-add_service_freezing-try02.patch" (rgmanager-cvsHEAD-add_service_freezing-try02.patch)]

Index: include/resgroup.h
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/include/resgroup.h,v
retrieving revision 1.19
diff -u -b -B -p -r1.19 resgroup.h
--- include/resgroup.h	20 Mar 2007 17:09:56 -0000	1.19
+++ include/resgroup.h	25 Apr 2007 18:47:46 -0000
@@ -35,6 +35,7 @@ typedef struct {
 	uint32_t	rs_restarts;	/**< Number of cluster-induced 
 					     restarts */
 	uint64_t	rs_transition;	/**< Last service transition time */
+	uint32_t	rs_flags;	/**< User setted flags */
 } rg_state_t;
 
 #define swab_rg_state_t(ptr) \
@@ -46,6 +47,7 @@ typedef struct {
 	swab32((ptr)->rs_state);\
 	swab32((ptr)->rs_restarts);\
 	swab64((ptr)->rs_transition);\
+	swab32((ptr)->rs_flags);\
 }
 
 
@@ -79,6 +81,8 @@ typedef struct {
 #define RG_UNLOCK	  20
 #define RG_QUERY_LOCK	  21
 #define RG_MIGRATE	  22
+#define RG_FREEZE	  23
+#define RG_UNFREEZE	  24
 #define RG_NONE		  999
 
 const char *rg_req_str(int req);
@@ -105,7 +109,11 @@ int handle_start_remote_req(char *svcNam
 
 #define DEFAULT_CHECK_INTERVAL		10
 
+/* Resource group flags (for now) */
+#define RG_FLAG_FROZEN			(1<<0)	/** Resource frozen */
+
 const char *rg_state_str(int val);
+const char *rg_flags_str(char *flags_string, size_t size, int val, char *separator);
 const char *agent_op_str(int val);
 
 int eval_groups(int local, uint32_t nodeid, int nodeStatus);
@@ -121,6 +129,8 @@ int svc_stop(char *svcName, int error);
 int svc_status(char *svcName);
 int svc_disable(char *svcName);
 int svc_fail(char *svcName);
+int svc_freeze(char *svcName);
+int svc_unfreeze(char *svcName);
 int svc_migrate(char *svcName, int target);
 int rt_enqueue_request(const char *resgroupname, int request,
 		       msgctx_t *resp_ctx,
@@ -162,6 +172,7 @@ cluster_member_list_t *member_list(void)
 int my_id(void);
 
 /* Return codes */
+#define RG_EFROZEN	-11		/* Service is frozen */
 #define RG_ERUN		-10		/* Service is already running */
 #define RG_EQUORUM	-9		/* Operation requires quorum */
 #define RG_EINVAL	-8		/* Invalid operation for resource */
Index: src/clulib/rg_strings.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/clulib/rg_strings.c,v
retrieving revision 1.7
diff -u -b -B -p -r1.7 rg_strings.c
--- src/clulib/rg_strings.c	10 Mar 2007 00:20:54 -0000	1.7
+++ src/clulib/rg_strings.c	25 Apr 2007 18:47:47 -0000
@@ -35,6 +35,7 @@ const struct string_val rg_error_strings
 	{ RG_ENOSERVICE,"Service does not exist" },
 	{ RG_EFORWARD,	"Service not mastered locally" },
 	{ RG_EABORT,	"Aborted; service failed" },
+	{ RG_EFROZEN,	"Failure: Service is frozen"},
 	{ RG_EFAIL,	"Failure" },
 	{ RG_ESUCCESS,	"Success" },
 	{ RG_YES,	"Yes" },
@@ -88,6 +89,12 @@ const struct string_val rg_state_strings
 };
 
 
+const struct string_val rg_flags_strings[] = {
+	{RG_FLAG_FROZEN, "frozen"},
+	{0, NULL}
+};
+
+
 const struct string_val agent_ops[] = {
 	{RS_START, "start"},
 	{RS_STOP, "stop"},
@@ -122,6 +129,20 @@ rg_search_table(const struct string_val 
 }
 
 
+static inline const char *
+rg_flag_search_table(const struct string_val *table, int val)
+{
+	int x;
+
+	for (x = 0; table[x].str != NULL; x++) {
+		if (table[x].val == val) {
+			return table[x].str;
+		}
+	}
+
+	return "Unknown";
+}
+
 const char *
 rg_strerror(int val)
 {
@@ -134,6 +155,22 @@ rg_state_str(int val)
 	return rg_search_table(rg_state_strings, val);
 }
 
+const char *
+rg_flags_str(char *flags_string, size_t size, int val, char *separator)
+{
+	int i;
+	const char *string;
+
+	for (i = 0; i < sizeof(uint32_t); i++) {
+		if ( val & (1 << i)) {
+			if (strlen(flags_string))
+				strncat(flags_string, separator, size - (strlen(flags_string) + strlen(separator) + 1));
+			string = rg_search_table(rg_flags_strings, (1 << i));
+			strncat(flags_string, string, size - (strlen(flags_string) + strlen(string) + 1));
+		}
+	}
+	return flags_string;
+}
 
 const char *
 rg_req_str(int val)
Index: src/daemons/groups.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/groups.c,v
retrieving revision 1.31
diff -u -b -B -p -r1.31 groups.c
--- src/daemons/groups.c	19 Apr 2007 17:59:36 -0000	1.31
+++ src/daemons/groups.c	25 Apr 2007 18:47:48 -0000
@@ -376,6 +376,9 @@ consider_start(resource_node_t *node, ch
 	mp = memb_id_to_p(membership, my_id());
 	assert(mp);
 
+	/* Service cannot be started if Frozen */
+	if (svcStatus->rs_flags & RG_FLAG_FROZEN)
+		return;
 	/*
 	 * Service must be not be running elsewhere to consider for a
 	 * local start.
Index: src/daemons/rg_state.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/rg_state.c,v
retrieving revision 1.31
diff -u -b -B -p -r1.31 rg_state.c
--- src/daemons/rg_state.c	19 Apr 2007 17:59:36 -0000	1.31
+++ src/daemons/rg_state.c	25 Apr 2007 18:47:49 -0000
@@ -282,6 +282,7 @@ init_rg(char *name, rg_state_t *svcblk)
 	svcblk->rs_owner = 0;
 	svcblk->rs_last_owner = 0;
 	svcblk->rs_state = RG_STATE_STOPPED;
+       	svcblk->rs_flags = 0;
        	svcblk->rs_restarts = 0;
 	svcblk->rs_transition = 0;	
 	strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name));
@@ -418,6 +419,7 @@ get_rg_state_local(char *name, rg_state_
 		svcblk->rs_owner = 0;
 		svcblk->rs_last_owner = 0;
 		svcblk->rs_state = RG_STATE_UNINITIALIZED;
+       		svcblk->rs_flags = 0;
        		svcblk->rs_restarts = 0;
 		svcblk->rs_transition = 0;	
 		strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name));
@@ -446,6 +448,7 @@ get_rg_state_local(char *name, rg_state_
  *			2 = DO NOT stop service, return 0 (success)
  *                      3 = DO NOT stop service, return RG_EFORWARD
  *			4 = DO NOT stop service, return RG_EAGAIN
+ *			5 = DO NOT stop service, return RG_EFROZEN
  */
 int
 svc_advise_stop(rg_state_t *svcStatus, char *svcName, int req)
@@ -453,6 +456,11 @@ svc_advise_stop(rg_state_t *svcStatus, c
 	cluster_member_list_t *membership = member_list();
 	int ret = 0;
 	
+	if (svcStatus->rs_flags & RG_FLAG_FROZEN) {
+		clulog(LOG_DEBUG, "Service %s frozen.\n", svcName);
+		return 5;
+	}
+
 	switch(svcStatus->rs_state) {
 	case RG_STATE_FAILED:
 		if (req == RG_DISABLE)
@@ -568,6 +576,7 @@ svc_advise_stop(rg_state_t *svcStatus, c
  *			2 = DO NOT start service, return 0
  *			3 = DO NOT start service, return RG_EAGAIN
  *			4 = DO NOT start service, return RG_ERUN
+ *			5 = DO NOT start service, return RG_EFROZEN
  */
 int
 svc_advise_start(rg_state_t *svcStatus, char *svcName, int req)
@@ -575,6 +584,11 @@ svc_advise_start(rg_state_t *svcStatus, 
 	cluster_member_list_t *membership = member_list();
 	int ret = 0;
 	
+	if (svcStatus->rs_flags & RG_FLAG_FROZEN) {
+		clulog(LOG_DEBUG, "Service %s frozen.\n", svcName);
+		return 5;
+	}
+
 	switch(svcStatus->rs_state) {
 	case RG_STATE_FAILED:
 		clulog(LOG_ERR,
@@ -752,6 +766,9 @@ svc_start(char *svcName, int req)
 	case 4:
 		rg_unlock(&lockp);
 		return RG_ERUN;
+	case 5:
+		rg_unlock(&lockp);
+		return RG_EFROZEN;
 	default:
 		break;
 	}
@@ -914,6 +931,10 @@ svc_status(char *svcName)
 	}
 	rg_unlock(&lockp);
 
+	if (svcStatus.rs_flags & RG_FLAG_FROZEN)
+		/* Don't check status if the service is frozen */
+		return 0;
+
 	if (svcStatus.rs_owner != my_id())
 		/* Don't check status for anything not owned */
 		return 0;
@@ -961,6 +982,17 @@ svc_status(char *svcName)
 int
 svc_status_inquiry(char *svcName)
 {
+	rg_state_t svcStatus;
+
+	if (get_rg_state_local(svcName, &svcStatus) != 0) {
+		clulog(LOG_ERR, "Failed getting local status for RG %s\n",
+		       svcName);
+		return RG_EFAIL;
+	}
+
+	if (svcStatus.rs_flags & RG_FLAG_FROZEN)
+		return 0;
+	
 	return group_op(svcName, RG_STATUS);
 }
 
@@ -1015,6 +1047,9 @@ _svc_stop(char *svcName, int req, int re
 	case 4:
 		rg_unlock(&lockp);
 		return RG_EAGAIN;
+	case 5:
+		rg_unlock(&lockp);
+		return RG_EFROZEN;
 	default:
 		break;
 	}
@@ -1191,6 +1226,76 @@ svc_fail(char *svcName)
 	return 0;
 }
 
+/**
+ * Flag/Unflag a cluster service as frozen.
+ *
+ * @param svcName	Service ID to flag/unflag as frozen.
+ * @return		FAIL, 0
+ */
+int
+_svc_freeze(char *svcName, int enabled)
+{
+	struct dlm_lksb lockp;
+	rg_state_t svcStatus;
+
+	if (rg_lock(svcName, &lockp) == RG_EFAIL) {
+		clulog(LOG_ERR, "#55: Unable to obtain cluster lock: %s\n",
+		       strerror(errno));
+		return RG_EFAIL;
+	}
+
+	clulog(LOG_DEBUG, "Handling %s request for RG %s\n", svcName, enabled?"freeze":"unfreeze");
+
+	if (get_rg_state(svcName, &svcStatus) != 0) {
+		rg_unlock(&lockp);
+		clulog(LOG_ERR, "#56: Failed getting status for RG %s\n",
+		       svcName);
+		return RG_EFAIL;
+	}
+
+	switch(svcStatus.rs_state) {
+	case RG_STATE_STOPPED:
+	case RG_STATE_STARTED:
+	case RG_STATE_DISABLED:
+
+		if (enabled == 1) {
+			clulog(LOG_DEBUG, "Freezing RG %s\n", svcName);
+			svcStatus.rs_flags |= RG_FLAG_FROZEN;
+		} else {
+			clulog(LOG_DEBUG, "Unfreezing RG %s\n", svcName);
+			svcStatus.rs_flags &= ~RG_FLAG_FROZEN;
+		}
+
+		if (set_rg_state(svcName, &svcStatus) != 0) {
+			rg_unlock(&lockp);
+			clulog(LOG_ERR, "#57: Failed changing RG status\n");
+			return RG_EFAIL;
+		}
+		break;
+
+	default:
+		rg_unlock(&lockp);
+		return RG_EFAIL;
+		break;
+	}
+
+	rg_unlock(&lockp);
+
+	return 0;
+}
+
+int
+svc_freeze(char *svcName)
+{
+	return _svc_freeze(svcName, 1);
+}
+
+int
+svc_unfreeze(char *svcName)
+{
+	return _svc_freeze(svcName, 0);
+}
+
 
 /*
  * Send a message to the target node to start the service.
@@ -1324,6 +1429,9 @@ handle_relocate_req(char *svcName, int r
 			svc_fail(svcName);
 			return RG_EFAIL;
 		}
+		if (ret == RG_EFROZEN) {
+			return RG_EFROZEN;
+		}
 		if (ret == RG_EFORWARD)
 			return RG_EFORWARD;
 	}
@@ -1531,7 +1639,7 @@ handle_start_req(char *svcName, int req,
 	/* 
 	   If services are locked, return the error 
 	  */
-	if (ret == RG_EAGAIN || ret == RG_ERUN)
+	if (ret == RG_EAGAIN || ret == RG_ERUN || ret == RG_EFROZEN)
 		return ret;
 
 	/*
Index: src/daemons/rg_thread.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/rg_thread.c,v
retrieving revision 1.19
diff -u -b -B -p -r1.19 rg_thread.c
--- src/daemons/rg_thread.c	27 Mar 2007 19:33:20 -0000	1.19
+++ src/daemons/rg_thread.c	25 Apr 2007 18:47:49 -0000
@@ -422,6 +422,18 @@ resgroup_thread_main(void *arg)
 
 			break;
 
+		case RG_FREEZE:
+			error = svc_freeze(myname);
+			if (error != 0)
+				ret = RG_EFAIL;
+			break;
+
+		case RG_UNFREEZE:
+			error = svc_unfreeze(myname);
+			if (error != 0)
+				ret = RG_EFAIL;
+			break;
+
 		default:
 			printf("Unhandled request %d\n", req->rr_request);
 			ret = RG_NONE;
Index: src/utils/clustat.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/utils/clustat.c,v
retrieving revision 1.31
diff -u -b -B -p -r1.31 clustat.c
--- src/utils/clustat.c	6 Feb 2007 20:21:17 -0000	1.31
+++ src/utils/clustat.c	25 Apr 2007 18:47:50 -0000
@@ -416,7 +416,7 @@ void
 _txt_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags)
 {
 	char owner[31];
-
+	char flags_string[255] = "";
 
 	if (rs->rs_state == RG_STATE_STOPPED ||
 	    rs->rs_state == RG_STATE_DISABLED ||
@@ -430,19 +430,34 @@ _txt_rg_state(rg_state_t *rs, cluster_me
 		snprintf(owner, sizeof(owner), "%-.30s",
 			 my_memb_id_to_name(members, rs->rs_owner));
 	}
-	printf("  %-20.20s %-30.30s %-16.16s\n",
+	rg_flags_str(flags_string, sizeof(flags_string), rs->rs_flags, ", ");
+	printf("  %-20.20s %-30.30s %-16.16s ",
 	       rs->rs_name,
 	       owner,
 	       rg_state_str(rs->rs_state));
+	if(strlen(flags_string))
+		printf ("%-30.30s\n", flags_string);
+	else
+		printf("\n");
 }
 
 
 void
 _txt_rg_state_v(rg_state_t *rs, cluster_member_list_t *members, int flags)
 {
+	char flags_string[255] = "";
+
+	rg_flags_str(flags_string, sizeof(flags_string), rs->rs_flags, ", ");
+
 	printf("Service Name      : %s\n", rs->rs_name);
 	printf("  Current State   : %s (%d)\n",
 	       rg_state_str(rs->rs_state), rs->rs_state);
+	if (rs->rs_flags)
+		printf("  Flags           : %s (%d)\n",
+		       flags_string, rs->rs_flags);
+	else
+		printf("  Flags           : none (%d)\n",
+		       rs->rs_flags);
 	printf("  Owner           : %s\n",
 	       my_memb_id_to_name(members, rs->rs_owner));
 	printf("  Last Owner      : %s\n",
@@ -466,6 +481,7 @@ void
 xml_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags)
 {
 	char time_str[32];
+	char flags_string[255] = "";
 	int x;
 
 	/* Chop off newlines */
@@ -477,12 +493,15 @@ xml_rg_state(rg_state_t *rs, cluster_mem
 		}
 	}
 
-	printf("    <group name=\"%s\" state=\"%d\" state_str=\"%s\" "
+	printf("    <group name=\"%s\" state=\"%d\" state_str=\"%s\""
+	       " flags=\"%d\" flags_str=\"%s\""
 	       " owner=\"%s\" last_owner=\"%s\" restarts=\"%d\""
 	       " last_transition=\"%llu\" last_transition_str=\"%s\"/>\n",
 	       rs->rs_name,
 	       rs->rs_state,
 	       rg_state_str(rs->rs_state),
+	       rs->rs_flags,
+	       rg_flags_str(flags_string, sizeof(flags_string), rs->rs_flags, " "),
 	       my_memb_id_to_name(members, rs->rs_owner),
 	       my_memb_id_to_name(members, rs->rs_last_owner),
 	       rs->rs_restarts,
@@ -504,10 +523,10 @@ txt_rg_states(rg_state_list_t *rgl, clus
 		ret = -1;
 
 	if (!(flags & RG_VERBOSE)) {
-		printf("  %-20.20s %-30.30s %-14.14s\n",
-		       "Service Name", "Owner (Last)", "State");
-		printf("  %-20.20s %-30.30s %-14.14s\n",
-		       "------- ----", "----- ------", "-----");
+		printf("  %-20.20s %-30.30s %-16.16s %-30.30s\n",
+		       "Service Name", "Owner (Last)", "State", "Flags");
+		printf("  %-20.20s %-30.30s %-16.16s %-30.30s\n",
+		       "------- ----", "----- ------", "-----", "-----");
 	} else {
 		printf("Service Information\n"
 		       "------- -----------\n\n");
Index: src/utils/clusvcadm.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/utils/clusvcadm.c,v
retrieving revision 1.18
diff -u -b -B -p -r1.18 clusvcadm.c
--- src/utils/clusvcadm.c	20 Mar 2007 17:09:57 -0000	1.18
+++ src/utils/clusvcadm.c	25 Apr 2007 18:47:50 -0000
@@ -240,7 +240,7 @@ main(int argc, char **argv)
 		return 1;
 	}
 
-	while ((opt = getopt(argc, argv, "lSue:M:d:r:n:m:vR:s:qh?")) != EOF) {
+	while ((opt = getopt(argc, argv, "lSue:M:d:r:n:m:vR:s:F:U:qh?")) != EOF) {
 		switch (opt) {
 		case 'l':
 			return do_lock();
@@ -294,6 +294,16 @@ main(int argc, char **argv)
 		case 'v':
 			printf("%s\n",PACKAGE_VERSION);
 			return 0;
+		case 'F':
+			actionstr = "freezing";
+			action = RG_FREEZE;
+			svcname = optarg;
+			break;
+		case 'U':
+			actionstr = "unfreezing";
+			action = RG_UNFREEZE;
+			svcname = optarg;
+			break;
 		case 'q':
 			close(STDOUT_FILENO);
 			break;


--
Linux-cluster mailing list
Linux-cluster@redhat.com
https://www.redhat.com/mailman/listinfo/linux-cluster

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic