[prev in list] [next in list] [prev in thread] [next in thread] 

List:       postgresql-general
Subject:    [HACKERS] Hot Standby and deadlock detection
From:       Simon Riggs <simon () 2ndQuadrant ! com>
Date:       2010-01-31 21:54:05
Message-ID: 1264974845.13782.8940.camel () ebony
[Download RAW message or body]

Greg Stark has requested that I re-add max_standby_delay = -1.
I deferred that in favour of relation-specific conflict resolution,
though that seems too major a change from comments received.

As discussed in various other posts, in order to re-add the -1 option we
need to add deadlock detection. I woke up today with a simplifying
assumption and have worked out a solution, the easy parts of which I
have committed earlier.

Part #2 is to make Startup process do deadlock detection. I attach a WIP
patch for comments since signal handling has been a much-discussed area
in recent weeks.

Normal deadlock detection waits for deadlock_timeout before doing the
detection. That is a simple performance tuning mechanism which I think
is probably unnecessary with hot standby, at least in the first
instance.

The way this would work is if Startup waits on a buffer pin we
immediately send out a request to all backends to cancel themselves if
they are holding the buffer pin required && waiting on a lock. We then
sleep until max_standby_delay. When max_standby_delay = -1 we only sleep
until deadlock timeout and then check (on the Startup process).

That keeps the signal handler code simple and reduces the number of test
cases required to confirm everything is solid.

This patch and the last commit together present everything we need to
reenable max_standby_delay = -1, so that change is included here also.

?

-- 
 Simon Riggs           www.2ndQuadrant.com

["startup_deadlock_check.patch" (startup_deadlock_check.patch)]

*** a/src/backend/storage/ipc/procsignal.c
--- b/src/backend/storage/ipc/procsignal.c
***************
*** 272,277 **** procsignal_sigusr1_handler(SIGNAL_ARGS)
--- 272,280 ----
  	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT))
  		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT);
  
+ 	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK))
+ 		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+ 
  	if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN))
  		RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
  
*** a/src/backend/storage/ipc/standby.c
--- b/src/backend/storage/ipc/standby.c
***************
*** 127,132 **** WaitExceedsMaxStandbyDelay(void)
--- 127,135 ----
  	long	delay_secs;
  	int		delay_usecs;
  
+ 	if (MaxStandbyDelay == -1)
+ 		return false;
+ 
  	/* Are we past max_standby_delay? */
  	TimestampDifference(GetLatestXLogTime(), GetCurrentTimestamp(),
  						&delay_secs, &delay_usecs);
***************
*** 372,378 **** ResolveRecoveryConflictWithBufferPin(void)
  	 * Signal immediately or set alarm for later.
  	 */
  	if (MaxStandbyDelay == 0)
! 		SendRecoveryConflictWithBufferPin();
  	else
  	{
  		TimestampTz now;
--- 375,391 ----
  	 * Signal immediately or set alarm for later.
  	 */
  	if (MaxStandbyDelay == 0)
! 		SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
! 	else if (MaxStandbyDelay == -1)
! 	{
! 		/*
! 		 * Set the deadlock timer only, we don't timeout in any other case
! 		 */
! 		if (enable_standby_deadlock_alarm())
! 			sig_alarm_enabled = true;
! 		else
! 			elog(FATAL, "could not set timer for process wakeup");
! 	}
  	else
  	{
  		TimestampTz now;
***************
*** 386,392 **** ResolveRecoveryConflictWithBufferPin(void)
  							&standby_delay_secs, &standby_delay_usecs);
  
  		if (standby_delay_secs >= MaxStandbyDelay)
! 			SendRecoveryConflictWithBufferPin();
  		else
  		{
  			TimestampTz fin_time;			/* Expected wake-up time by timer */
--- 399,405 ----
  							&standby_delay_secs, &standby_delay_usecs);
  
  		if (standby_delay_secs >= MaxStandbyDelay)
! 			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN);
  		else
  		{
  			TimestampTz fin_time;			/* Expected wake-up time by timer */
***************
*** 394,399 **** ResolveRecoveryConflictWithBufferPin(void)
--- 407,419 ----
  			int		timer_delay_usecs = 0;
  
  			/*
+ 			 * Send out a request to check for buffer pin deadlocks before we wait.
+ 			 * This is fairly cheap, so no need to wait for deadlock timeout before
+ 			 * trying to send it out.
+ 			 */
+ 			SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+ 
+ 			/*
  			 * How much longer we should wait?
  			 */
  			timer_delay_secs = MaxStandbyDelay - standby_delay_secs;
***************
*** 435,449 **** ResolveRecoveryConflictWithBufferPin(void)
  }
  
  void
! SendRecoveryConflictWithBufferPin(void)
  {
  	/*
  	 * We send signal to all backends to ask them if they are holding
  	 * the buffer pin which is delaying the Startup process. We must
  	 * not set the conflict flag yet, since most backends will be innocent.
  	 * Let the SIGUSR1 handling in each backend decide their own fate.
  	 */
! 	CancelDBBackends(InvalidOid, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, false);
  }
  
  /*
--- 455,472 ----
  }
  
  void
! SendRecoveryConflictWithBufferPin(ProcSignalReason reason)
  {
+ 	Assert(reason == PROCSIG_RECOVERY_CONFLICT_BUFFERPIN ||
+ 		   reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK);
+ 
  	/*
  	 * We send signal to all backends to ask them if they are holding
  	 * the buffer pin which is delaying the Startup process. We must
  	 * not set the conflict flag yet, since most backends will be innocent.
  	 * Let the SIGUSR1 handling in each backend decide their own fate.
  	 */
! 	CancelDBBackends(InvalidOid, reason, false);
  }
  
  /*
*** a/src/backend/storage/lmgr/proc.c
--- b/src/backend/storage/lmgr/proc.c
***************
*** 45,50 ****
--- 45,51 ----
  #include "storage/pmsignal.h"
  #include "storage/proc.h"
  #include "storage/procarray.h"
+ #include "storage/procsignal.h"
  #include "storage/spin.h"
  
  
***************
*** 77,82 **** static volatile bool standby_timeout_active = false;
--- 78,84 ----
  static volatile bool statement_timeout_active = false;
  static volatile bool deadlock_timeout_active = false;
  static volatile DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
+ static volatile ProcSignalReason action_at_standby_timeout;
  volatile bool cancel_from_timeout = false;
  
  /* timeout_start_time is set when log_lock_waits is true */
***************
*** 556,561 **** HaveNFreeProcs(int n)
--- 558,572 ----
  	return (n <= 0);
  }
  
+ bool
+ IsWaitingForLock(void)
+ {
+ 	if (lockAwaited == NULL)
+ 		return false;
+ 
+ 	return true;
+ }
+ 
  /*
   * Cancel any pending wait for lock, when aborting a transaction.
   *
***************
*** 1618,1624 **** enable_standby_sig_alarm(long delay_s, int delay_us, TimestampTz fin_time)
  	Assert(delay_s >= 0 && delay_us >= 0);
  
  	statement_fin_time = fin_time;
! 
  	standby_timeout_active = true;
  
  	MemSet(&timeval, 0, sizeof(struct itimerval));
--- 1629,1635 ----
  	Assert(delay_s >= 0 && delay_us >= 0);
  
  	statement_fin_time = fin_time;
! 	action_at_standby_timeout = PROCSIG_RECOVERY_CONFLICT_BUFFERPIN;
  	standby_timeout_active = true;
  
  	MemSet(&timeval, 0, sizeof(struct itimerval));
***************
*** 1630,1635 **** enable_standby_sig_alarm(long delay_s, int delay_us, TimestampTz fin_time)
--- 1641,1664 ----
  }
  
  bool
+ enable_standby_deadlock_alarm(void)
+ {
+ 	struct itimerval timeval;
+ 
+ 	statement_fin_time = TimestampTzPlusMilliseconds(GetCurrentTimestamp(),
+ 													(DeadlockTimeout * 1000));
+ 	action_at_standby_timeout = PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK;
+ 	standby_timeout_active = true;
+ 
+ 	MemSet(&timeval, 0, sizeof(struct itimerval));
+ 	timeval.it_value.tv_sec = DeadlockTimeout;
+ 	timeval.it_value.tv_usec = 0;
+ 	if (setitimer(ITIMER_REAL, &timeval, NULL))
+ 		return false;
+ 	return true;
+ }
+ 
+ bool
  disable_standby_sig_alarm(void)
  {
  	/*
***************
*** 1671,1677 **** CheckStandbyTimeout(void)
  	now = GetCurrentTimestamp();
  
  	if (now >= statement_fin_time)
! 		SendRecoveryConflictWithBufferPin();
  	else
  	{
  		/* Not time yet, so (re)schedule the interrupt */
--- 1700,1706 ----
  	now = GetCurrentTimestamp();
  
  	if (now >= statement_fin_time)
! 		SendRecoveryConflictWithBufferPin(action_at_standby_timeout);
  	else
  	{
  		/* Not time yet, so (re)schedule the interrupt */
*** a/src/backend/tcop/postgres.c
--- b/src/backend/tcop/postgres.c
***************
*** 2278,2283 **** errdetail_recovery_conflict(void)
--- 2278,2286 ----
  		case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
  				errdetail("User query might have needed to see row versions that must be removed.");
  				break;
+ 		case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ 				errdetail("User transaction caused buffer deadlock with recovery.");
+ 				break;
  		case PROCSIG_RECOVERY_CONFLICT_DATABASE:
  				errdetail("User was connected to a database that must be dropped.");
  				break;
***************
*** 2754,2759 **** RecoveryConflictInterrupt(ProcSignalReason reason)
--- 2757,2771 ----
  		RecoveryConflictReason = reason;
  		switch (reason)
  		{
+ 			case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ 					/*
+ 					 * If we aren't waiting for a lock we can never deadlock.
+ 					 */
+ 					if (!IsWaitingForLock())
+ 						return;
+ 
+ 					/* Intentional drop through to check wait for pin */
+ 
  			case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
  					/*
  					 * If we aren't blocking the Startup process there is
***************
*** 2819,2824 **** RecoveryConflictInterrupt(ProcSignalReason reason)
--- 2831,2838 ----
  					elog(FATAL, "Unknown conflict mode");
  		}
  
+ 		Assert(RecoveryConflictPending && (QueryCancelPending || ProcDiePending));
+ 
  		/*
  		 * If it's safe to interrupt, and we're waiting for input or a lock,
  		 * service the interrupt immediately
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 1392,1398 **** static struct config_int ConfigureNamesInt[] =
  			NULL
  		},
  		&MaxStandbyDelay,
! 		30, 0, INT_MAX, NULL, NULL
  	},
  
  	{
--- 1392,1398 ----
  			NULL
  		},
  		&MaxStandbyDelay,
! 		30, -1, INT_MAX, NULL, NULL
  	},
  
  	{
*** a/src/include/storage/proc.h
--- b/src/include/storage/proc.h
***************
*** 189,194 **** extern void ProcQueueInit(PROC_QUEUE *queue);
--- 189,195 ----
  extern int	ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
  extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
  extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
+ extern bool IsWaitingForLock(void);
  extern void LockWaitCancel(void);
  
  extern void ProcWaitForSignal(void);
***************
*** 199,204 **** extern bool disable_sig_alarm(bool is_statement_timeout);
--- 200,206 ----
  extern void handle_sig_alarm(SIGNAL_ARGS);
  
  extern bool enable_standby_sig_alarm(long delay_s, int delay_us, TimestampTz fin_time);
+ extern bool enable_standby_deadlock_alarm(void);
  extern bool disable_standby_sig_alarm(void);
  extern void handle_standby_sig_alarm(SIGNAL_ARGS);
  
*** a/src/include/storage/procsignal.h
--- b/src/include/storage/procsignal.h
***************
*** 38,43 **** typedef enum
--- 38,44 ----
  	PROCSIG_RECOVERY_CONFLICT_LOCK,
  	PROCSIG_RECOVERY_CONFLICT_SNAPSHOT,
  	PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
+ 	PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK,
  
  	NUM_PROCSIGNALS				/* Must be last! */
  } ProcSignalReason;
*** a/src/include/storage/standby.h
--- b/src/include/storage/standby.h
***************
*** 16,21 ****
--- 16,22 ----
  
  #include "access/xlog.h"
  #include "storage/lock.h"
+ #include "storage/procsignal.h"
  #include "storage/relfilenode.h"
  
  extern int	vacuum_defer_cleanup_age;
***************
*** 30,36 **** extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
  extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
  
  extern void ResolveRecoveryConflictWithBufferPin(void);
! extern void SendRecoveryConflictWithBufferPin(void);
  extern void CheckRecoveryConflictDeadlock(LWLockId partitionLock);
  
  /*
--- 31,37 ----
  extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
  
  extern void ResolveRecoveryConflictWithBufferPin(void);
! extern void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
  extern void CheckRecoveryConflictDeadlock(LWLockId partitionLock);
  
  /*


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic