[prev in list] [next in list] [prev in thread] [next in thread]
List: redhat-linux-cluster
Subject: Re: [Linux-cluster] [RFC][PATCH] Add ability to freeze a service.
From: Simone Gotti <simone.gotti () email ! it>
Date: 2007-04-25 18:52:08
Message-ID: 1177527128.14525.15.camel () localhost
[Download RAW message or body]
On Mon, 2007-04-23 at 16:02 -0400, Lon Hohberger wrote:
> On Mon, Apr 23, 2007 at 11:15:22AM +0200, Simone Gotti wrote:
> > Hi,
> >
> > like discussed with lon on IRC I'm trying to add to rgmanager the
> > ability to freeze a service. I worked on it in these days and did an
> > example patch. Here is how I think what a "freeze" can be and, of
> > course, it can be implemented in many other ways so it's only an
> > example.
>
> Hi,
>
> couple of comments -
>
Hi Lon,
> (1) s/freezed/frozen/ig :)
Uh... :( I hope I fixed them all now without adding other language errors.
> (2) svc_status_inquiry shouldn't call rg_lock(); you should use
> get_rg_state_local() instead of rg_lock/get_rg_state/rg_unlock
done. I didn't knew what do if its return value was != 0 so I logged end exited. Is \
it right?
> Note that svc_status_inquiry isn't even used yet, so the behavior
> might change at a later date ;)
Ok.
> (3.a) Warning: The addition of rs_flags to rg_state_t will break
> compatibility with existing software (which is fine in -HEAD). It
> would be easier to cope with this change (that is- make upgrades from
> old->new work) if you added the rs_flags field after the
> rs_transition field.
>
> (3.b) If you don't want to do that, then you need to add another 32-bits
> worth of data (could be just a "pad" field) before rs_transition because
> the kernel on ia64 machines will complain if they read a 64-bit int and
> it's not aligned on a 64-bit boundary.
I implemented 3.a. So it should be easily backportable. I wasn't aware of the ia64 \
alignment problems so thanks for the explanation.
> Aside from that, it looks like you got it right for what you wanted it
> to do. I can fix (1) (language stuff) if you fix (2) and (3).
I tried also to fix (1) (I hope).
In the meantime I fixed (like you reported on IRC) the missing changes
to clustat.c:xml_rg_state and enhanced a little the default clustat
output adding a "Flags" column.
I added a parameter "separator" to rg_flags_str so it can be defined.
For example in the default clustat I'm using a comma separated list
while in the xml output I'm using a space as separator (dunno if this is
right to do with xml).
Thanks!
Bye!
> -- Lon
>
--
Simone Gotti
--
Email.it, the professional e-mail, gratis per te: http://www.email.it/f
Sponsor:
Vuoi diventare un vero esperto sul Controllo di Gestione? Scopri come nella tua \
azienda puoi migliorare gli utili e ridurre le spese Clicca qui: \
http://adv.email.it/cgi-bin/foclick.cgi?mid=6197&d=25-4
["rgmanager-cvsHEAD-add_service_freezing-try02.patch" (rgmanager-cvsHEAD-add_service_freezing-try02.patch)]
Index: include/resgroup.h
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/include/resgroup.h,v
retrieving revision 1.19
diff -u -b -B -p -r1.19 resgroup.h
--- include/resgroup.h 20 Mar 2007 17:09:56 -0000 1.19
+++ include/resgroup.h 25 Apr 2007 18:47:46 -0000
@@ -35,6 +35,7 @@ typedef struct {
uint32_t rs_restarts; /**< Number of cluster-induced
restarts */
uint64_t rs_transition; /**< Last service transition time */
+ uint32_t rs_flags; /**< User setted flags */
} rg_state_t;
#define swab_rg_state_t(ptr) \
@@ -46,6 +47,7 @@ typedef struct {
swab32((ptr)->rs_state);\
swab32((ptr)->rs_restarts);\
swab64((ptr)->rs_transition);\
+ swab32((ptr)->rs_flags);\
}
@@ -79,6 +81,8 @@ typedef struct {
#define RG_UNLOCK 20
#define RG_QUERY_LOCK 21
#define RG_MIGRATE 22
+#define RG_FREEZE 23
+#define RG_UNFREEZE 24
#define RG_NONE 999
const char *rg_req_str(int req);
@@ -105,7 +109,11 @@ int handle_start_remote_req(char *svcNam
#define DEFAULT_CHECK_INTERVAL 10
+/* Resource group flags (for now) */
+#define RG_FLAG_FROZEN (1<<0) /** Resource frozen */
+
const char *rg_state_str(int val);
+const char *rg_flags_str(char *flags_string, size_t size, int val, char *separator);
const char *agent_op_str(int val);
int eval_groups(int local, uint32_t nodeid, int nodeStatus);
@@ -121,6 +129,8 @@ int svc_stop(char *svcName, int error);
int svc_status(char *svcName);
int svc_disable(char *svcName);
int svc_fail(char *svcName);
+int svc_freeze(char *svcName);
+int svc_unfreeze(char *svcName);
int svc_migrate(char *svcName, int target);
int rt_enqueue_request(const char *resgroupname, int request,
msgctx_t *resp_ctx,
@@ -162,6 +172,7 @@ cluster_member_list_t *member_list(void)
int my_id(void);
/* Return codes */
+#define RG_EFROZEN -11 /* Service is frozen */
#define RG_ERUN -10 /* Service is already running */
#define RG_EQUORUM -9 /* Operation requires quorum */
#define RG_EINVAL -8 /* Invalid operation for resource */
Index: src/clulib/rg_strings.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/clulib/rg_strings.c,v
retrieving revision 1.7
diff -u -b -B -p -r1.7 rg_strings.c
--- src/clulib/rg_strings.c 10 Mar 2007 00:20:54 -0000 1.7
+++ src/clulib/rg_strings.c 25 Apr 2007 18:47:47 -0000
@@ -35,6 +35,7 @@ const struct string_val rg_error_strings
{ RG_ENOSERVICE,"Service does not exist" },
{ RG_EFORWARD, "Service not mastered locally" },
{ RG_EABORT, "Aborted; service failed" },
+ { RG_EFROZEN, "Failure: Service is frozen"},
{ RG_EFAIL, "Failure" },
{ RG_ESUCCESS, "Success" },
{ RG_YES, "Yes" },
@@ -88,6 +89,12 @@ const struct string_val rg_state_strings
};
+const struct string_val rg_flags_strings[] = {
+ {RG_FLAG_FROZEN, "frozen"},
+ {0, NULL}
+};
+
+
const struct string_val agent_ops[] = {
{RS_START, "start"},
{RS_STOP, "stop"},
@@ -122,6 +129,20 @@ rg_search_table(const struct string_val
}
+static inline const char *
+rg_flag_search_table(const struct string_val *table, int val)
+{
+ int x;
+
+ for (x = 0; table[x].str != NULL; x++) {
+ if (table[x].val == val) {
+ return table[x].str;
+ }
+ }
+
+ return "Unknown";
+}
+
const char *
rg_strerror(int val)
{
@@ -134,6 +155,22 @@ rg_state_str(int val)
return rg_search_table(rg_state_strings, val);
}
+const char *
+rg_flags_str(char *flags_string, size_t size, int val, char *separator)
+{
+ int i;
+ const char *string;
+
+ for (i = 0; i < sizeof(uint32_t); i++) {
+ if ( val & (1 << i)) {
+ if (strlen(flags_string))
+ strncat(flags_string, separator, size - (strlen(flags_string) + strlen(separator) + 1));
+ string = rg_search_table(rg_flags_strings, (1 << i));
+ strncat(flags_string, string, size - (strlen(flags_string) + strlen(string) + 1));
+ }
+ }
+ return flags_string;
+}
const char *
rg_req_str(int val)
Index: src/daemons/groups.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/groups.c,v
retrieving revision 1.31
diff -u -b -B -p -r1.31 groups.c
--- src/daemons/groups.c 19 Apr 2007 17:59:36 -0000 1.31
+++ src/daemons/groups.c 25 Apr 2007 18:47:48 -0000
@@ -376,6 +376,9 @@ consider_start(resource_node_t *node, ch
mp = memb_id_to_p(membership, my_id());
assert(mp);
+ /* Service cannot be started if Frozen */
+ if (svcStatus->rs_flags & RG_FLAG_FROZEN)
+ return;
/*
* Service must be not be running elsewhere to consider for a
* local start.
Index: src/daemons/rg_state.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/rg_state.c,v
retrieving revision 1.31
diff -u -b -B -p -r1.31 rg_state.c
--- src/daemons/rg_state.c 19 Apr 2007 17:59:36 -0000 1.31
+++ src/daemons/rg_state.c 25 Apr 2007 18:47:49 -0000
@@ -282,6 +282,7 @@ init_rg(char *name, rg_state_t *svcblk)
svcblk->rs_owner = 0;
svcblk->rs_last_owner = 0;
svcblk->rs_state = RG_STATE_STOPPED;
+ svcblk->rs_flags = 0;
svcblk->rs_restarts = 0;
svcblk->rs_transition = 0;
strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name));
@@ -418,6 +419,7 @@ get_rg_state_local(char *name, rg_state_
svcblk->rs_owner = 0;
svcblk->rs_last_owner = 0;
svcblk->rs_state = RG_STATE_UNINITIALIZED;
+ svcblk->rs_flags = 0;
svcblk->rs_restarts = 0;
svcblk->rs_transition = 0;
strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name));
@@ -446,6 +448,7 @@ get_rg_state_local(char *name, rg_state_
* 2 = DO NOT stop service, return 0 (success)
* 3 = DO NOT stop service, return RG_EFORWARD
* 4 = DO NOT stop service, return RG_EAGAIN
+ * 5 = DO NOT stop service, return RG_EFROZEN
*/
int
svc_advise_stop(rg_state_t *svcStatus, char *svcName, int req)
@@ -453,6 +456,11 @@ svc_advise_stop(rg_state_t *svcStatus, c
cluster_member_list_t *membership = member_list();
int ret = 0;
+ if (svcStatus->rs_flags & RG_FLAG_FROZEN) {
+ clulog(LOG_DEBUG, "Service %s frozen.\n", svcName);
+ return 5;
+ }
+
switch(svcStatus->rs_state) {
case RG_STATE_FAILED:
if (req == RG_DISABLE)
@@ -568,6 +576,7 @@ svc_advise_stop(rg_state_t *svcStatus, c
* 2 = DO NOT start service, return 0
* 3 = DO NOT start service, return RG_EAGAIN
* 4 = DO NOT start service, return RG_ERUN
+ * 5 = DO NOT start service, return RG_EFROZEN
*/
int
svc_advise_start(rg_state_t *svcStatus, char *svcName, int req)
@@ -575,6 +584,11 @@ svc_advise_start(rg_state_t *svcStatus,
cluster_member_list_t *membership = member_list();
int ret = 0;
+ if (svcStatus->rs_flags & RG_FLAG_FROZEN) {
+ clulog(LOG_DEBUG, "Service %s frozen.\n", svcName);
+ return 5;
+ }
+
switch(svcStatus->rs_state) {
case RG_STATE_FAILED:
clulog(LOG_ERR,
@@ -752,6 +766,9 @@ svc_start(char *svcName, int req)
case 4:
rg_unlock(&lockp);
return RG_ERUN;
+ case 5:
+ rg_unlock(&lockp);
+ return RG_EFROZEN;
default:
break;
}
@@ -914,6 +931,10 @@ svc_status(char *svcName)
}
rg_unlock(&lockp);
+ if (svcStatus.rs_flags & RG_FLAG_FROZEN)
+ /* Don't check status if the service is frozen */
+ return 0;
+
if (svcStatus.rs_owner != my_id())
/* Don't check status for anything not owned */
return 0;
@@ -961,6 +982,17 @@ svc_status(char *svcName)
int
svc_status_inquiry(char *svcName)
{
+ rg_state_t svcStatus;
+
+ if (get_rg_state_local(svcName, &svcStatus) != 0) {
+ clulog(LOG_ERR, "Failed getting local status for RG %s\n",
+ svcName);
+ return RG_EFAIL;
+ }
+
+ if (svcStatus.rs_flags & RG_FLAG_FROZEN)
+ return 0;
+
return group_op(svcName, RG_STATUS);
}
@@ -1015,6 +1047,9 @@ _svc_stop(char *svcName, int req, int re
case 4:
rg_unlock(&lockp);
return RG_EAGAIN;
+ case 5:
+ rg_unlock(&lockp);
+ return RG_EFROZEN;
default:
break;
}
@@ -1191,6 +1226,76 @@ svc_fail(char *svcName)
return 0;
}
+/**
+ * Flag/Unflag a cluster service as frozen.
+ *
+ * @param svcName Service ID to flag/unflag as frozen.
+ * @return FAIL, 0
+ */
+int
+_svc_freeze(char *svcName, int enabled)
+{
+ struct dlm_lksb lockp;
+ rg_state_t svcStatus;
+
+ if (rg_lock(svcName, &lockp) == RG_EFAIL) {
+ clulog(LOG_ERR, "#55: Unable to obtain cluster lock: %s\n",
+ strerror(errno));
+ return RG_EFAIL;
+ }
+
+ clulog(LOG_DEBUG, "Handling %s request for RG %s\n", svcName, enabled?"freeze":"unfreeze");
+
+ if (get_rg_state(svcName, &svcStatus) != 0) {
+ rg_unlock(&lockp);
+ clulog(LOG_ERR, "#56: Failed getting status for RG %s\n",
+ svcName);
+ return RG_EFAIL;
+ }
+
+ switch(svcStatus.rs_state) {
+ case RG_STATE_STOPPED:
+ case RG_STATE_STARTED:
+ case RG_STATE_DISABLED:
+
+ if (enabled == 1) {
+ clulog(LOG_DEBUG, "Freezing RG %s\n", svcName);
+ svcStatus.rs_flags |= RG_FLAG_FROZEN;
+ } else {
+ clulog(LOG_DEBUG, "Unfreezing RG %s\n", svcName);
+ svcStatus.rs_flags &= ~RG_FLAG_FROZEN;
+ }
+
+ if (set_rg_state(svcName, &svcStatus) != 0) {
+ rg_unlock(&lockp);
+ clulog(LOG_ERR, "#57: Failed changing RG status\n");
+ return RG_EFAIL;
+ }
+ break;
+
+ default:
+ rg_unlock(&lockp);
+ return RG_EFAIL;
+ break;
+ }
+
+ rg_unlock(&lockp);
+
+ return 0;
+}
+
+int
+svc_freeze(char *svcName)
+{
+ return _svc_freeze(svcName, 1);
+}
+
+int
+svc_unfreeze(char *svcName)
+{
+ return _svc_freeze(svcName, 0);
+}
+
/*
* Send a message to the target node to start the service.
@@ -1324,6 +1429,9 @@ handle_relocate_req(char *svcName, int r
svc_fail(svcName);
return RG_EFAIL;
}
+ if (ret == RG_EFROZEN) {
+ return RG_EFROZEN;
+ }
if (ret == RG_EFORWARD)
return RG_EFORWARD;
}
@@ -1531,7 +1639,7 @@ handle_start_req(char *svcName, int req,
/*
If services are locked, return the error
*/
- if (ret == RG_EAGAIN || ret == RG_ERUN)
+ if (ret == RG_EAGAIN || ret == RG_ERUN || ret == RG_EFROZEN)
return ret;
/*
Index: src/daemons/rg_thread.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/rg_thread.c,v
retrieving revision 1.19
diff -u -b -B -p -r1.19 rg_thread.c
--- src/daemons/rg_thread.c 27 Mar 2007 19:33:20 -0000 1.19
+++ src/daemons/rg_thread.c 25 Apr 2007 18:47:49 -0000
@@ -422,6 +422,18 @@ resgroup_thread_main(void *arg)
break;
+ case RG_FREEZE:
+ error = svc_freeze(myname);
+ if (error != 0)
+ ret = RG_EFAIL;
+ break;
+
+ case RG_UNFREEZE:
+ error = svc_unfreeze(myname);
+ if (error != 0)
+ ret = RG_EFAIL;
+ break;
+
default:
printf("Unhandled request %d\n", req->rr_request);
ret = RG_NONE;
Index: src/utils/clustat.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/utils/clustat.c,v
retrieving revision 1.31
diff -u -b -B -p -r1.31 clustat.c
--- src/utils/clustat.c 6 Feb 2007 20:21:17 -0000 1.31
+++ src/utils/clustat.c 25 Apr 2007 18:47:50 -0000
@@ -416,7 +416,7 @@ void
_txt_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags)
{
char owner[31];
-
+ char flags_string[255] = "";
if (rs->rs_state == RG_STATE_STOPPED ||
rs->rs_state == RG_STATE_DISABLED ||
@@ -430,19 +430,34 @@ _txt_rg_state(rg_state_t *rs, cluster_me
snprintf(owner, sizeof(owner), "%-.30s",
my_memb_id_to_name(members, rs->rs_owner));
}
- printf(" %-20.20s %-30.30s %-16.16s\n",
+ rg_flags_str(flags_string, sizeof(flags_string), rs->rs_flags, ", ");
+ printf(" %-20.20s %-30.30s %-16.16s ",
rs->rs_name,
owner,
rg_state_str(rs->rs_state));
+ if(strlen(flags_string))
+ printf ("%-30.30s\n", flags_string);
+ else
+ printf("\n");
}
void
_txt_rg_state_v(rg_state_t *rs, cluster_member_list_t *members, int flags)
{
+ char flags_string[255] = "";
+
+ rg_flags_str(flags_string, sizeof(flags_string), rs->rs_flags, ", ");
+
printf("Service Name : %s\n", rs->rs_name);
printf(" Current State : %s (%d)\n",
rg_state_str(rs->rs_state), rs->rs_state);
+ if (rs->rs_flags)
+ printf(" Flags : %s (%d)\n",
+ flags_string, rs->rs_flags);
+ else
+ printf(" Flags : none (%d)\n",
+ rs->rs_flags);
printf(" Owner : %s\n",
my_memb_id_to_name(members, rs->rs_owner));
printf(" Last Owner : %s\n",
@@ -466,6 +481,7 @@ void
xml_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags)
{
char time_str[32];
+ char flags_string[255] = "";
int x;
/* Chop off newlines */
@@ -477,12 +493,15 @@ xml_rg_state(rg_state_t *rs, cluster_mem
}
}
- printf(" <group name=\"%s\" state=\"%d\" state_str=\"%s\" "
+ printf(" <group name=\"%s\" state=\"%d\" state_str=\"%s\""
+ " flags=\"%d\" flags_str=\"%s\""
" owner=\"%s\" last_owner=\"%s\" restarts=\"%d\""
" last_transition=\"%llu\" last_transition_str=\"%s\"/>\n",
rs->rs_name,
rs->rs_state,
rg_state_str(rs->rs_state),
+ rs->rs_flags,
+ rg_flags_str(flags_string, sizeof(flags_string), rs->rs_flags, " "),
my_memb_id_to_name(members, rs->rs_owner),
my_memb_id_to_name(members, rs->rs_last_owner),
rs->rs_restarts,
@@ -504,10 +523,10 @@ txt_rg_states(rg_state_list_t *rgl, clus
ret = -1;
if (!(flags & RG_VERBOSE)) {
- printf(" %-20.20s %-30.30s %-14.14s\n",
- "Service Name", "Owner (Last)", "State");
- printf(" %-20.20s %-30.30s %-14.14s\n",
- "------- ----", "----- ------", "-----");
+ printf(" %-20.20s %-30.30s %-16.16s %-30.30s\n",
+ "Service Name", "Owner (Last)", "State", "Flags");
+ printf(" %-20.20s %-30.30s %-16.16s %-30.30s\n",
+ "------- ----", "----- ------", "-----", "-----");
} else {
printf("Service Information\n"
"------- -----------\n\n");
Index: src/utils/clusvcadm.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/utils/clusvcadm.c,v
retrieving revision 1.18
diff -u -b -B -p -r1.18 clusvcadm.c
--- src/utils/clusvcadm.c 20 Mar 2007 17:09:57 -0000 1.18
+++ src/utils/clusvcadm.c 25 Apr 2007 18:47:50 -0000
@@ -240,7 +240,7 @@ main(int argc, char **argv)
return 1;
}
- while ((opt = getopt(argc, argv, "lSue:M:d:r:n:m:vR:s:qh?")) != EOF) {
+ while ((opt = getopt(argc, argv, "lSue:M:d:r:n:m:vR:s:F:U:qh?")) != EOF) {
switch (opt) {
case 'l':
return do_lock();
@@ -294,6 +294,16 @@ main(int argc, char **argv)
case 'v':
printf("%s\n",PACKAGE_VERSION);
return 0;
+ case 'F':
+ actionstr = "freezing";
+ action = RG_FREEZE;
+ svcname = optarg;
+ break;
+ case 'U':
+ actionstr = "unfreezing";
+ action = RG_UNFREEZE;
+ svcname = optarg;
+ break;
case 'q':
close(STDOUT_FILENO);
break;
--
Linux-cluster mailing list
Linux-cluster@redhat.com
https://www.redhat.com/mailman/listinfo/linux-cluster
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic