[prev in list] [next in list] [prev in thread] [next in thread]
List: linux-ha-dev
Subject: Re: [Linux-ha-dev] Patch: RA anything
From: Dejan Muhamedagic <dejanmm () fastmail ! fm>
Date: 2009-11-16 17:42:18
Message-ID: 20091116174218.GA16321 () rondo ! homenet
[Download RAW message or body]
Hi,
This patch is now applied with some modifications. Sorry about
the horrible delay :(
Cheers,
Dejan
On Wed, Feb 11, 2009 at 12:15:54PM +0100, Dominik Klein wrote:
> Hi
>
> I fixed most of the things Lars mentioned in
> http://hg.linux-ha.org/dev/rev/15bcf3491f9c and will explain why I did
> not fix some of them. ocf-tester runs fine with the RA.
>
> > # FIXME: This should use pidofproc
>
> pidofproc is not available everywhere and is not able to get down to
> command line options, eg could not tell the difference between "$process
> $option_a" and "$process $option_b" which I wanted to support with this
> agent.
>
> Example:
> dktest3:~/src/linuxha/hg/dev # sleep 200 &
> [1] 5799
> dktest3:~/src/linuxha/hg/dev # sleep 300 &
> [2] 5801
> dktest3:~/src/linuxha/hg/dev # pidofproc sleep
> 5801 5799
> dktest3:~/src/linuxha/hg/dev # pidofproc "sleep 300"
>
> > # FIXME: use start_daemon
>
> start_daemon is not available everywhere either.
>
> > # FIXME: What about daemons which can manage their own pidfiles?
>
> This agent is meant to be used for programs that are not actually
> daemons by design. It is meant to be able to run sth "stupid" in the
> cluster. Even like "/bin/sleep 10000000000000000000"
>
> > # FIXME: use killproc
>
> This is also a problem with "$process $option_a" and "$process
> $option_b". You can't just "killproc $process" then.
>
> > # FIXME: Attributes special meaning to the resource id
>
> I tried to, but couldn't understand what you meant here.
>
> I also talked to Dejan on IRC and we agreed that "anything" is a bad
> name for the RA and the changeset description was propably bad, too.
> This RA is not for (as the cs stated) "arbitrary daemons", it is more
> for daemonizing programs which were not meant to be daemons.
>
> If a proper name comes to anyone's mind - please share.
>
> Hopefully, now it is a bit clearer what I wanted to be able to do with
> this RA. I agree the "cmd=" lines and pid file creation are very very
> ugly, but I could not yet find a better way. Not that much of a shell
> genius I guess :( Please share if you can improve things.
>
> Regards
> Dominik
> exporting patch:
> # HG changeset patch
> # User Dominik Klein <dk@in-telegence.net>
> # Date 1234350091 -3600
> # Node ID 04533b37813c8be009814f52de7b14ff65bf9862
> # Parent 90ff997faa7288248ac57583b0c03df4c8e41bda
> RA: anything. Implement most of lmbs suggestions.
>
> diff -r 90ff997faa72 -r 04533b37813c resources/OCF/anything
> --- a/resources/OCF/anything Wed Feb 11 11:31:02 2009 +0100
> +++ b/resources/OCF/anything Wed Feb 11 12:01:31 2009 +0100
> @@ -32,6 +32,7 @@
> # OCF_RESKEY_errlogfile
> # OCF_RESKEY_user
> # OCF_RESKEY_monitor_hook
> +# OCF_RESKEY_stop_timeout
> #
> # This RA starts $binfile with $cmdline_options as $user and writes a $pidfile from \
> that. # If you want it to, it logs:
> @@ -47,18 +48,20 @@
> # Initialization:
> . ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
>
> -getpid() { # make sure that the file contains a number
> - # FIXME: pidfiles could contain spaces
> - grep '^[0-9][0-9]*$' $1
> +getpid() {
> + grep -o '[0-9]*' $1
> }
>
> anything_status() {
> - # FIXME: This should use pidofproc
> - # FIXME: pidfile w/o process means the process died, so should
> - # be ERR_GENERIC
> - if test -f "$pidfile" && pid=`getpid $pidfile` && kill -0 $pid
> + if test -f "$pidfile"
> then
> - return $OCF_RUNNING
> + if pid=`getpid $pidfile` && kill -0 $pid
> + then
> + return $OCF_RUNNING
> + else
> + # pidfile w/o process means the process died
> + return $OCF_ERR_GENERIC
> + fi
> else
> return $OCF_NOT_RUNNING
> fi
> @@ -66,8 +69,6 @@
>
> anything_start() {
> if ! anything_status
> - # FIXME: use start_daemon
> - # FIXME: What about daemons which can manage their own pidfiles?
> then
> if [ -n "$logfile" -a -n "$errlogfile" ]
> then
> @@ -101,29 +102,48 @@
> }
>
> anything_stop() {
> - # FIXME: use killproc
> + if [ -n "$OCF_RESKEY_stop_timeout" ]
> + then
> + stop_timeout=$OCF_RESKEY_stop_timeout
> + elif [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
> + # Allow 2/3 of the action timeout for the orderly shutdown
> + # (The origin unit is ms, hence the conversion)
> + stop_timeout=$((OCF_RESKEY_CRM_meta_timeout/1500))
> + else
> + stop_timeout=10
> + fi
> if anything_status
> then
> - pid=`getpid $pidfile`
> - kill $pid
> - i=0
> - # FIXME: escalate to kill -9 before timeout
> - while sleep 1
> - do
> - if ! anything_status
> - then
> - rm -f $pidfile > /dev/null 2>&1
> - return $OCF_SUCCESS
> - fi
> - let "i++"
> - done
> + pid=`getpid $pidfile`
> + kill $pid
> + rm -f $pidfile
> + i=0
> + while [ $i -lt $stop_timeout ]
> + do
> + while sleep 1
> + do
> + if ! anything_status
> + then
> + return $OCF_SUCCESS
> + fi
> + let "i++"
> + done
> + done
> + ocf_log warn "Stop with SIGTERM failed/timed out, now sending \
> SIGKILL." + kill -9 $pid
> + if ! anything_status
> + then
> + ocf_log warn "SIGKILL did the job."
> + return $OCF_SUCCESS
> + else
> + ocf_log err "Failed to stop - even with SIGKILL."
> + return $OCF_ERR_GENERIC
> + fi
> else
> # was not running, so stop can be considered successful
> rm -f $pidfile
> return $OCF_SUCCESS
> fi
> - # FIXME: Never reached.
> - return $OCF_ERR_GENERIC
> }
>
> anything_monitor() {
> @@ -131,12 +151,12 @@
> ret=$?
> if [ $ret -eq $OCF_SUCCESS ]
> then
> - # implement your deeper monitor operation here
> if [ -n "$OCF_RESKEY_monitor_hook" ]; then
> eval "$OCF_RESKEY_monitor_hook"
> - # FIXME: Implement a check that this doesn't
> - # accidentially return NOT_RUNNING?
> - return
> + if [ $? -ne $OCF_SUCCESS ]; then
> + return ${OCF_ERR_GENERIC}
> + fi
> + return $OCF_SUCCESS
> else
> true
> fi
> @@ -150,19 +170,16 @@
> binfile="$OCF_RESKEY_binfile"
> cmdline_options="$OCF_RESKEY_cmdline_options"
> pidfile="$OCF_RESKEY_pidfile"
> -# FIXME: Why test for $binfile here?
> -[ -z "$pidfile" -a -n "$binfile" ] && pidfile=${HA_VARRUN}/anything_${process}.pid
> +[ -z "$pidfile" ] && pidfile=${HA_VARRUN}/anything_${process}.pid
> logfile="$OCF_RESKEY_logfile"
> errlogfile="$OCF_RESKEY_errlogfile"
> user="$OCF_RESKEY_user"
> [ -z "$user" ] && user=root
>
> anything_validate() {
> - # FIXME: Actually this needs to test from the point of view of
> - # the user.
> - if [ ! -x "$binfile" ]
> + if ! su - $user -c "test -x $binfile"
> then
> - ocf_log err "binfile $binfile does not exist or is not executable."
> + ocf_log err "binfile $binfile does not exist or is not executable by $user."
> exit $OCF_ERR_INSTALLED
> fi
> if ! getent passwd $user >/dev/null 2>&1
> @@ -243,6 +260,14 @@
> <shortdesc lang="en">Command to run in monitor operation</shortdesc>
> <content type="string"/>
> </parameter>
> +<parameter name="stop_timeout">
> +<longdesc lang="en">
> +In the stop operation: Seconds to wait for kill -TERM to succeed
> +before sending kill -SIGKILL. Defaults to 2/3 of the stop operation timeout.
> +</longdesc>
> +<shortdesc lang="en">Seconds to wait after having sent SIGTERM before sending \
> SIGKILL in stop operation</shortdesc> +<content type="string" default=""/>
> +</parameter>
> </parameters>
> <actions>
> <action name="start" timeout="90" />
> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic