[prev in list] [next in list] [prev in thread] [next in thread]
List: linux-ha-dev
Subject: Re: [Linux-ha-dev] [PATCH] oracle/oralsnr RA: Improvement of the
From: Dejan Muhamedagic <dejanmm () fastmail ! fm>
Date: 2010-01-22 10:07:13
Message-ID: 20100122100712.GB22406 () rondo ! homenet
[Download RAW message or body]
Hi Kazutomo-san,
On Fri, Jan 22, 2010 at 06:31:15PM +0900, NAKAHIRA Kazutomo wrote:
> Hi, all
>
> I improved logging of the oracle/oralsnr RA.
> This patch has aimed to record the execution result of the sqlplus
> (and other commands) and output a detailed failure log using ocf_log
> when some problems occurred in RA operation.
Thanks for the patch. There are a few spelling problems, but I'll
fix those. Also, the status operation must output to stdout,
that's not meant for the logs.
Cheers,
Dejan
> Best Regards,
> NAKAHIRA Kazutomo
>
> --
> ----------------------------------------
> NAKAHIRA Kazutomo
> NTT DATA INTELLILINK CORPORATION
> Open Source Business Unit
> Software Services Integration Business Division
> # HG changeset patch
> # User root@prec370b
> # Date 1264145021 -32400
> # Node ID 0ecef9560522601936888ce168dae5f563662402
> # Parent 3024963150433960c51aa1bdccde39839efb09b7
> oracle: improve logging
>
> diff -r 302496315043 -r 0ecef9560522 heartbeat/oracle
> --- a/heartbeat/oracle Thu Jan 21 16:42:40 2010 +0100
> +++ b/heartbeat/oracle Fri Jan 22 16:23:41 2010 +0900
> @@ -231,11 +231,23 @@ ora_info() {
>
> testoraenv() {
> # Let's make sure a few important things are set...
> - [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
> + if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
> + ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. \
> ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER." return 1
> + fi
> # and some important things are there
> - [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
> + if [ ! -x "$sqlplus" ]; then
> + ocf_log err "Executeble sqlplus command($sqlplus) dose not exist."
> return 1
> + fi
> + if [ ! -x "$lsnrctl" ]; then
> + ocf_log err "Executeble lsnrctl command($lsnrctl) dose not exist."
> + return 1
> + fi
> + if [ ! -x "$tnsping" ]; then
> + ocf_log err "Executeble tnsping command($tnsping) dose not exist."
> + return 1
> + fi
> return 0
> }
>
> @@ -367,13 +379,20 @@ showdbstat() {
> # Part 1: Oracle
> dumpinstipc() {
> local dumpdest=`dbasql getdumpdest`
> - [ "x$dumpdest" != x -a -d "$dumpdest" ] || return 1
> + if [ "x$dumpdest" == x -o ! -d "$dumpdest" ]; then
> + ocf_log warn "dumpdest($dumpdest) is not a regular directory."
> + return 1
> + fi
> local -i fcount=`ls -rt $dumpdest | wc -l`
> - dbasql getipc >/dev/null 2>&1
> + output=`dbasql getipc`
> local lastf=`ls -rt $dumpdest | grep -v '^\.*$' | tail -1`
> local -i fcount2=`ls -rt $dumpdest | wc -l`
> - [ $((fcount+1)) -eq $fcount2 ] || return 1 # more than one file created
> - echo $dumpdest/$lastf
> + if [ $((fcount+1)) -eq $fcount2 ]; then
> + echo $dumpdest/$lastf
> + else
> + ocf_log warn "dumpinstipc failed bacause the number of output files is wrong. \
> before dump file count=$fcount, after dump file count=$fcount2, getipc \
> result=$output" + return 1
> + fi
> }
> parseipc() {
> local inf=$1
> @@ -440,7 +459,13 @@ is_oracle_up() {
> }
> # instance in OPEN state?
> instance_live() {
> - [ "`dbasql dbstat`" = OPEN ]
> + output=`dbasql dbstat`
> + if [ "$output" = OPEN ]; then
> + return 0
> + else
> + ocf_log info "Instance state is not OPEN. dbstat result=$output"
> + return 1
> + fi
> }
>
> ora_cleanup() {
> @@ -498,6 +523,7 @@ oracle_start() {
> # try to cleanup in case of
> # ORA-01081: cannot start already-running ORACLE - shut it down first
> if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then
> + ocf_log info "ORA-01081 error was found. try to cleanup oracle. DB start \
> output=$output" ora_cleanup
> output=`dbasql dbstart_mount`
> fi
> @@ -510,7 +536,7 @@ oracle_start() {
> ;;
> *)
> : error!!
> - ocf_log error "Oracle $ORACLE_SID can not mount."
> + ocf_log error "Oracle $ORACLE_SID can not mount. DB status=$status, DB start \
> output=$output" return $OCF_ERR_GENERIC
> ;;
> esac
> @@ -523,13 +549,16 @@ oracle_start() {
> fi
> output=`dbasql dbopen`
>
> - if is_oracle_up && instance_live; then
> + if ! is_oracle_up; then
> + ocf_log err "Oracle process is not started: $output"
> + return $OCF_ERR_GENERIC
> + elif ! instance_live; then
> + ocf_log err "Oracle instance $ORACLE_SID not started: $output"
> + return $OCF_ERR_GENERIC
> + else
> : cool, we are up and running
> ocf_log info "Oracle instance $ORACLE_SID started: $output"
> return $OCF_SUCCESS
> - else
> - ocf_log err "Oracle instance $ORACLE_SID not started: $output"
> - return $OCF_ERR_GENERIC
> fi
> }
>
> @@ -562,13 +591,21 @@ killprocs() {
> killprocs() {
> local sig=$1
> shift 1
> - kill -$sig $* >/dev/null 2>&1
> + # Record stderr
> + kill -$sig $* >/dev/null
> }
> ora_kill() {
> - killprocs TERM `eval $procs | awk '{print $1}'`
> + oraprocs=`eval $procs | awk '{print $1}'`
> + if [ -z "$oraprocs" ]; then
> + ocf_log debug "All oracle processes are already stopped."
> + return
> + fi
> + killprocs TERM $oraprocs
> for i in 1 2 3 4 5; do
> - killprocs 0 `eval $procs | awk '{print $1}'` ||
> + if [ -z "`eval $procs | awk '{print $1}'`" ]; then
> + ocf_log debug "All oracle processes are killed."
> return
> + fi
> sleep 5
> done
> killprocs KILL `eval $procs | awk '{print $1}'`
> @@ -578,14 +615,16 @@ ora_kill() {
> # oracle_monitor: Can the Oracle instance do anything useful?
> #
> oracle_monitor() {
> - if is_oracle_up && instance_live
> - then
> - #ocf_log info "Oracle instance $ORACLE_SID is alive"
> - return $OCF_SUCCESS
> - else
> + if ! is_oracle_up; then
> + ocf_log info "Oracle process is down"
> + return $OCF_NOT_RUNNING
> + fi
> + if ! instance_live; then
> ocf_log info "Oracle instance $ORACLE_SID is down"
> return $OCF_NOT_RUNNING
> fi
> + #ocf_log info "Oracle instance $ORACLE_SID is alive"
> + return $OCF_SUCCESS
> }
>
> #
> @@ -675,10 +714,10 @@ case "$1" in
>
> status) if is_oracle_up
> then
> - echo Oracle instance $ORACLE_SID is running
> + ocf_log info "Oracle instance $ORACLE_SID is running"
> exit $OCF_SUCCESS
> else
> - echo Oracle instance $ORACLE_SID is stopped
> + ocf_log info "Oracle instance $ORACLE_SID is stopped"
> exit $OCF_NOT_RUNNING
> fi
> ;;
> # HG changeset patch
> # User root@prec370b
> # Date 1264145083 -32400
> # Node ID b36abb1554500f4bdf33858989205d02606c609d
> # Parent 0ecef9560522601936888ce168dae5f563662402
> oralsnr: improve logging
>
> diff -r 0ecef9560522 -r b36abb155450 heartbeat/oralsnr
> --- a/heartbeat/oralsnr Fri Jan 22 16:23:41 2010 +0900
> +++ b/heartbeat/oralsnr Fri Jan 22 16:24:43 2010 +0900
> @@ -158,11 +158,23 @@ ora_info() {
>
> testoraenv() {
> # Let's make sure a few important things are set...
> - [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
> + if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
> + ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. \
> ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER." return 1
> + fi
> # and some important things are there
> - [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
> + if [ ! -x "$sqlplus" ]; then
> + ocf_log err "Executeble sqlplus command($sqlplus) dose not exist."
> return 1
> + fi
> + if [ ! -x "$lsnrctl" ]; then
> + ocf_log err "Executeble lsnrctl command($lsnrctl) dose not exist."
> + return 1
> + fi
> + if [ ! -x "$tnsping" ]; then
> + ocf_log err "Executeble tnsping command($tnsping) dose not exist."
> + return 1
> + fi
> return 0
> }
>
> @@ -247,10 +259,17 @@ oralsnr_stop() {
> # kill the listener procs
> # give them 10 secs to exit cleanly (5 times 2)
> oralsnr_kill() {
> - killprocs TERM `eval $procs | awk '{print $1}'`
> + oraprocs=`eval $procs | awk '{print $1}'`
> + if [ -z "$oraprocs" ]; then
> + ocf_log debug "All oralsnr processes are already stopped."
> + return
> + fi
> + killprocs TERM $oraprocs
> for i in 1 2 3 4 5; do
> - killprocs 0 `eval $procs | awk '{print $1}'` ||
> + if [ -z "`eval $procs | awk '{print $1}'`" ]; then
> + ocf_log debug "All oralsnr processes are killed."
> return
> + fi
> sleep 2
> done
> killprocs KILL `eval $procs | awk '{print $1}'`
> @@ -258,7 +277,8 @@ killprocs() {
> killprocs() {
> sig=$1
> shift 1
> - kill -$sig $* >/dev/null 2>&1
> + # Record stderr
> + kill -$sig $* >/dev/null
> }
>
> #
> @@ -269,11 +289,23 @@ is_oralsnr_up() {
> [ x != "x`eval $procs`" ]
> }
> oralsnr_status() {
> - $lsnrctl status $listener | tail -1 | grep -qs 'completed successfully'
> + output=`$lsnrctl status $listener`
> + echo "$output" | tail -1 | grep -qs 'completed successfully'
> + RET=$?
> + if [ $RET -ne 0 ]; then
> + ocf_log info "$listener status failed: $output"
> + fi
> + return $RET
> }
> # and does it work?
> tnsping() {
> - $tnsping $ORACLE_SID | tail -1 | grep -qs '^OK'
> + output=`$tnsping $ORACLE_SID`
> + echo "$output" | tail -1 | grep -qs '^OK'
> + RET=$?
> + if [ $RET -ne 0 ]; then
> + ocf_log info "$tnsping $ORACLE_SID failed: $output"
> + fi
> + return $RET
> }
>
> #
> @@ -285,10 +317,10 @@ oralsnr_monitor() {
> : good
> #ocf_log info "Listener $listener running"
> return $OCF_SUCCESS
> - else
> + else
> ocf_log info "Listener $listener not running"
> return $OCF_NOT_RUNNING
> - fi
> + fi
> }
>
> #
> @@ -368,10 +400,10 @@ case "$1" in
>
> status) if oralsnr_status
> then
> - echo Listener $listener is running
> + ocf_log info "Listener $listener is running"
> exit $OCF_SUCCESS
> else
> - echo Listener $listener is stopped
> + ocf_log info "Listener $listener is stopped"
> exit $OCF_NOT_RUNNING
> fi
> ;;
> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic