[prev in list] [next in list] [prev in thread] [next in thread] 

List:       linux-ha-dev
Subject:    Re: [Linux-ha-dev] [PATCH] oracle/oralsnr RA: Improvement of the
From:       Dejan Muhamedagic <dejanmm () fastmail ! fm>
Date:       2010-01-22 10:07:13
Message-ID: 20100122100712.GB22406 () rondo ! homenet
[Download RAW message or body]

Hi Kazutomo-san,

On Fri, Jan 22, 2010 at 06:31:15PM +0900, NAKAHIRA Kazutomo wrote:
> Hi, all
> 
> I improved logging of the oracle/oralsnr RA.
> This patch has aimed to record the execution result of the sqlplus
> (and other commands) and output a detailed failure log using ocf_log
> when some problems occurred in RA operation.

Thanks for the patch. There are a few spelling problems, but I'll
fix those. Also, the status operation must output to stdout,
that's not meant for the logs.

Cheers,

Dejan

> Best Regards,
> NAKAHIRA Kazutomo
> 
> -- 
> ----------------------------------------
> NAKAHIRA Kazutomo
> NTT DATA INTELLILINK CORPORATION
> Open Source Business Unit
> Software Services Integration Business Division

> # HG changeset patch
> # User root@prec370b
> # Date 1264145021 -32400
> # Node ID 0ecef9560522601936888ce168dae5f563662402
> # Parent  3024963150433960c51aa1bdccde39839efb09b7
> oracle: improve logging
> 
> diff -r 302496315043 -r 0ecef9560522 heartbeat/oracle
> --- a/heartbeat/oracle	Thu Jan 21 16:42:40 2010 +0100
> +++ b/heartbeat/oracle	Fri Jan 22 16:23:41 2010 +0900
> @@ -231,11 +231,23 @@ ora_info() {
> 
> testoraenv() {
> 	#	Let's make sure a few important things are set...
> -	[ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
> +	if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
> +		ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. \
> ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER."  return 1
> +	fi
> 	#	and some important things are there
> -	[ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
> +	if [ ! -x "$sqlplus" ]; then
> +		ocf_log err "Executeble sqlplus command($sqlplus) dose not exist."
> 		return 1
> +	fi
> +	if [ ! -x "$lsnrctl" ]; then
> +		ocf_log err "Executeble lsnrctl command($lsnrctl) dose not exist."
> +		return 1
> +	fi
> +	if [ ! -x "$tnsping" ]; then
> +		ocf_log err "Executeble tnsping command($tnsping) dose not exist."
> +		return 1
> +	fi
> 	return 0
> }
> 
> @@ -367,13 +379,20 @@ showdbstat() {
> # Part 1: Oracle
> dumpinstipc() {
> 	local dumpdest=`dbasql getdumpdest`
> -	[ "x$dumpdest" != x -a -d "$dumpdest" ] || return 1
> +	if [ "x$dumpdest" == x -o ! -d "$dumpdest" ]; then
> +		ocf_log warn "dumpdest($dumpdest) is not a regular directory."
> +		return 1
> +	fi
> 	local -i fcount=`ls -rt $dumpdest | wc -l`
> -	dbasql getipc >/dev/null 2>&1
> +	output=`dbasql getipc`
> 	local lastf=`ls -rt $dumpdest | grep -v '^\.*$' | tail -1`
> 	local -i fcount2=`ls -rt $dumpdest | wc -l`
> -	[ $((fcount+1)) -eq $fcount2 ] || return 1  # more than one file created
> -	echo $dumpdest/$lastf
> +	if [ $((fcount+1)) -eq $fcount2 ]; then
> +		echo $dumpdest/$lastf
> +	else
> +		ocf_log warn "dumpinstipc failed bacause the number of output files is wrong. \
> before dump file count=$fcount, after dump file count=$fcount2, getipc \
> result=$output" +		return 1
> +	fi
> }
> parseipc() {
> 	local inf=$1
> @@ -440,7 +459,13 @@ is_oracle_up() {
> }
> # instance in OPEN state?
> instance_live() {
> -	[ "`dbasql dbstat`" = OPEN ]
> +	output=`dbasql dbstat`
> +	if [ "$output" = OPEN ]; then
> +		return 0
> +	else
> +		ocf_log info "Instance state is not OPEN. dbstat result=$output"
> +		return 1
> +	fi
> }
> 
> ora_cleanup() {
> @@ -498,6 +523,7 @@ oracle_start() {
> 		# try to cleanup in case of
> 		# ORA-01081: cannot start already-running ORACLE - shut it down first
> 		if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then
> +			ocf_log info "ORA-01081 error was found. try to cleanup oracle. DB start \
> output=$output"  ora_cleanup
> 			output=`dbasql dbstart_mount`
> 		fi
> @@ -510,7 +536,7 @@ oracle_start() {
> 		;;
> 	*)
> 		: error!!
> -		ocf_log error "Oracle $ORACLE_SID can not mount."
> +		ocf_log error "Oracle $ORACLE_SID can not mount. DB status=$status, DB start \
> output=$output"  return $OCF_ERR_GENERIC
> 		;;
> 	esac
> @@ -523,13 +549,16 @@ oracle_start() {
> 	fi
> 	output=`dbasql dbopen`
> 
> -	if is_oracle_up && instance_live; then
> +	if ! is_oracle_up; then
> +		ocf_log err "Oracle process is not started: $output"
> +		return $OCF_ERR_GENERIC
> +	elif ! instance_live; then
> +		ocf_log err "Oracle instance $ORACLE_SID not started: $output"
> +		return $OCF_ERR_GENERIC
> +	else
> 		: cool, we are up and running
> 		ocf_log info "Oracle instance $ORACLE_SID started: $output"
> 		return $OCF_SUCCESS
> -	else
> -		ocf_log err "Oracle instance $ORACLE_SID not started: $output"
> -		return $OCF_ERR_GENERIC
> 	fi
> }
> 
> @@ -562,13 +591,21 @@ killprocs() {
> killprocs() {
> 	local sig=$1
> 	shift 1
> -	kill -$sig $* >/dev/null 2>&1
> +	# Record stderr
> +	kill -$sig $* >/dev/null
> }
> ora_kill() {
> -	killprocs TERM `eval $procs | awk '{print $1}'`
> +	oraprocs=`eval $procs | awk '{print $1}'`
> +	if [ -z "$oraprocs" ]; then
> +		ocf_log debug "All oracle processes are already stopped."
> +		return
> +	fi
> +	killprocs TERM $oraprocs
> 	for i in 1 2 3 4 5; do
> -		killprocs 0 `eval $procs | awk '{print $1}'` ||
> +		if [ -z "`eval $procs | awk '{print $1}'`" ]; then
> +			ocf_log debug "All oracle processes are killed."
> 			return
> +		fi
> 		sleep 5
> 	done
> 	killprocs KILL `eval $procs | awk '{print $1}'`
> @@ -578,14 +615,16 @@ ora_kill() {
> # oracle_monitor: Can the Oracle instance do anything useful?
> #
> oracle_monitor() {
> -	if is_oracle_up && instance_live
> -	then
> -		#ocf_log info "Oracle instance $ORACLE_SID is alive"
> -		return $OCF_SUCCESS
> -	else
> +	if ! is_oracle_up; then
> +		ocf_log info "Oracle process is down"
> +		return $OCF_NOT_RUNNING
> +	fi
> +	if ! instance_live; then
> 		ocf_log info "Oracle instance $ORACLE_SID is down"
> 		return $OCF_NOT_RUNNING
> 	fi
> +	#ocf_log info "Oracle instance $ORACLE_SID is alive"
> +	return $OCF_SUCCESS
> }
> 
> #
> @@ -675,10 +714,10 @@ case "$1" in
> 
> status)	if is_oracle_up
> 		then
> -		  echo Oracle instance $ORACLE_SID is running
> +		  ocf_log info "Oracle instance $ORACLE_SID is running"
> 		  exit $OCF_SUCCESS
> 		else
> -		  echo Oracle instance $ORACLE_SID is stopped
> +		  ocf_log info "Oracle instance $ORACLE_SID is stopped"
> 		  exit $OCF_NOT_RUNNING
> 		fi
> 		;;

> # HG changeset patch
> # User root@prec370b
> # Date 1264145083 -32400
> # Node ID b36abb1554500f4bdf33858989205d02606c609d
> # Parent  0ecef9560522601936888ce168dae5f563662402
> oralsnr: improve logging
> 
> diff -r 0ecef9560522 -r b36abb155450 heartbeat/oralsnr
> --- a/heartbeat/oralsnr	Fri Jan 22 16:23:41 2010 +0900
> +++ b/heartbeat/oralsnr	Fri Jan 22 16:24:43 2010 +0900
> @@ -158,11 +158,23 @@ ora_info() {
> 
> testoraenv() {
> 	#	Let's make sure a few important things are set...
> -	[ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
> +	if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
> +		ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. \
> ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER."  return 1
> +	fi
> 	#	and some important things are there
> -	[ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
> +	if [ ! -x "$sqlplus" ]; then
> +		ocf_log err "Executeble sqlplus command($sqlplus) dose not exist."
> 		return 1
> +	fi
> +	if [ ! -x "$lsnrctl" ]; then
> +		ocf_log err "Executeble lsnrctl command($lsnrctl) dose not exist."
> +		return 1
> +	fi
> +	if [ ! -x "$tnsping" ]; then
> +		ocf_log err "Executeble tnsping command($tnsping) dose not exist."
> +		return 1
> +	fi
> 	return 0
> }
> 
> @@ -247,10 +259,17 @@ oralsnr_stop() {
> # kill the listener procs
> # give them 10 secs to exit cleanly (5 times 2)
> oralsnr_kill() {
> -	killprocs TERM `eval $procs | awk '{print $1}'`
> +	oraprocs=`eval $procs | awk '{print $1}'`
> +	if [ -z "$oraprocs" ]; then
> +		ocf_log debug "All oralsnr processes are already stopped."
> +		return
> +	fi
> +	killprocs TERM $oraprocs
> 	for i in 1 2 3 4 5; do
> -		killprocs 0 `eval $procs | awk '{print $1}'` ||
> +		if [ -z "`eval $procs | awk '{print $1}'`" ]; then
> +			ocf_log debug "All oralsnr processes are killed."
> 			return
> +		fi
> 		sleep 2
> 	done
> 	killprocs KILL `eval $procs | awk '{print $1}'`
> @@ -258,7 +277,8 @@ killprocs() {
> killprocs() {
> 	sig=$1
> 	shift 1
> -	kill -$sig $* >/dev/null 2>&1
> +	# Record stderr
> +	kill -$sig $* >/dev/null
> }
> 
> #
> @@ -269,11 +289,23 @@ is_oralsnr_up() {
> 	[ x != "x`eval $procs`" ]
> }
> oralsnr_status() {
> -	$lsnrctl status $listener | tail -1 | grep -qs 'completed successfully'
> +	output=`$lsnrctl status $listener`
> +	echo "$output" | tail -1 | grep -qs 'completed successfully'
> +	RET=$?
> +	if [ $RET -ne 0 ]; then
> +		ocf_log info "$listener status failed: $output"
> +	fi
> +	return $RET
> }
> # and does it work?
> tnsping() {
> -	$tnsping $ORACLE_SID | tail -1 | grep -qs '^OK'
> +	output=`$tnsping $ORACLE_SID`
> +	echo "$output" | tail -1 | grep -qs '^OK'
> +	RET=$?
> +	if [ $RET -ne 0 ]; then
> +		ocf_log info "$tnsping $ORACLE_SID failed: $output"
> +	fi
> +	return $RET
> }
> 
> #
> @@ -285,10 +317,10 @@ oralsnr_monitor() {
> 		: good
> 		#ocf_log info "Listener $listener running"
> 		return $OCF_SUCCESS
> -    else
> +	else
> 		ocf_log info "Listener $listener not running"
> 		return $OCF_NOT_RUNNING
> -    fi
> +	fi
> }
> 
> #
> @@ -368,10 +400,10 @@ case "$1" in
> 
> status)	if oralsnr_status
> 		then
> -		  echo Listener $listener is running
> +		  ocf_log info "Listener $listener is running"
> 		  exit $OCF_SUCCESS
> 		else
> -		  echo Listener $listener is stopped
> +		  ocf_log info "Listener $listener is stopped"
> 		  exit $OCF_NOT_RUNNING
> 		fi
> 		;;

> _______________________________________________________
> Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
> Home Page: http://linux-ha.org/

_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic