[prev in list] [next in list] [prev in thread] [next in thread] 

List:       sanlock-devel
Subject:    [sanlock] 01/04: sanlock: fix detection of shared lease
From:       git () pagure ! io (git repository hosting)
Date:       2017-11-28 22:58:17
Message-ID: 20171128225816.E76C04CA5AEE () pagure01 ! fedoraproject ! org
[Download RAW message or body]

This is an automated email from the git hooks/post-receive script.

teigland pushed a commit to branch testing2
in repository sanlock.

commit 4523a63bb21983d64d4f526955850b40d4ec131b
Author: David Teigland <teigland@redhat.com>
Date:   Tue Nov 21 11:02:17 2017 -0600

    sanlock: fix detection of shared lease
    
    When a host is acquiring a lease and detects that another
    host holds it shared, it will check if the host with the
    shared lease is dead.  Before the dead-host check, the
    shared lease holder may have released its shared lease by
    clearing its mode_block.  The host checking the shared lease
    needs to check if the shared lease has been released.
---
 src/resource.c | 53 ++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 15 deletions(-)

diff --git a/src/resource.c b/src/resource.c
index 317fcb7..6309b9c 100644
--- a/src/resource.c
+++ b/src/resource.c
@@ -291,8 +291,9 @@ void check_mode_block(struct token *token, uint64_t next_lver, int q, char *dblo
 	if (mb.flags & MBLOCK_SHARED) {
 		set_id_bit(q + 1, token->shared_bitmap, NULL);
 		token->shared_count++;
-		log_token(token, "ballot %llu mode[%d] shared %d",
-			  (unsigned long long)next_lver, q, token->shared_count);
+		log_token(token, "ballot %llu mode[%d] shared %d gen %llu",
+			  (unsigned long long)next_lver, q, token->shared_count,
+			  (unsigned long long)mb.generation);
 	}
 }
 
@@ -370,14 +371,13 @@ static int write_host_block(struct task *task, struct token *token,
 }
 
 static int read_mode_block(struct task *task, struct token *token,
-			   uint64_t host_id, uint64_t *max_gen)
+			   uint64_t host_id, struct mode_block *mb_out)
 {
 	struct sync_disk *disk;
 	struct mode_block *mb_end;
 	struct mode_block mb;
 	char *iobuf, **p_iobuf;
 	uint64_t offset;
-	uint64_t max = 0;
 	int num_disks = token->r.num_disks;
 	int iobuf_len, rv, d;
 
@@ -406,24 +406,23 @@ static int read_mode_block(struct task *task, struct token *token,
 
 		mode_block_in(mb_end, &mb);
 
-		if (!(mb.flags & MBLOCK_SHARED))
-			continue;
+		memcpy(mb_out, &mb, sizeof(struct mode_block));
 
-		if (!max || mb.generation > max)
-			max = mb.generation;
+		/* FIXME: combine results for multi-disk case */
+		break;
 	}
 
 	if (rv != SANLK_AIO_TIMEOUT)
 		free(iobuf);
 
-	*max_gen = max;
 	return rv;
 }
 
 static int clear_dead_shared(struct task *task, struct token *token,
 			     int num_hosts, int *live_count)
 {
-	uint64_t host_id, max_gen = 0;
+	struct mode_block mb;
+	uint64_t host_id;
 	int i, rv = 0, live = 0;
 
 	for (i = 0; i < num_hosts; i++) {
@@ -435,16 +434,36 @@ static int clear_dead_shared(struct task *task, struct token *token,
 		if (!test_id_bit(host_id, token->shared_bitmap))
 			continue;
 
-		rv = read_mode_block(task, token, host_id, &max_gen);
+		memset(&mb, 0, sizeof(mb));
+
+		rv = read_mode_block(task, token, host_id, &mb);
 		if (rv < 0) {
 			log_errot(token, "clear_dead_shared read_mode_block %llu %d",
 				  (unsigned long long)host_id, rv);
 			return rv;
 		}
 
-		if (host_live(token->r.lockspace_name, host_id, max_gen)) {
+		log_token(token, "clear_dead_shared host_id %llu mode_block: flags %x gen %llu",
+			  (unsigned long long)host_id, mb.flags, (unsigned long long)mb.generation);
+
+		/*
+		 * We get to this function because we saw the shared flag during
+		 * paxos, but the holder of the shared lease may have dropped their
+		 * shared lease and cleared the mode_block since then.
+		 */
+		if (!(mb.flags & MBLOCK_SHARED))
+			continue;
+
+		if (!mb.generation) {
+			/* shouldn't happen; if the shared flag is set, the generation should also be set. */
+			log_errot(token, "clear_dead_shared host_id %llu mode_block: flags %x gen %llu",
+				  (unsigned long long)host_id, mb.flags, (unsigned long long)mb.generation);
+			continue;
+		}
+
+		if (host_live(token->r.lockspace_name, host_id, mb.generation)) {
 			log_token(token, "clear_dead_shared host_id %llu gen %llu alive",
-				  (unsigned long long)host_id, (unsigned long long)max_gen);
+				  (unsigned long long)host_id, (unsigned long long)mb.generation);
 			live++;
 			continue;
 		}
@@ -456,8 +475,12 @@ static int clear_dead_shared(struct task *task, struct token *token,
 			return rv;
 		}
 
-		log_token(token, "clear_dead_shared host_id %llu gen %llu dead and cleared",
-			  (unsigned long long)host_id, (unsigned long long)max_gen);
+		/*
+		 * not an error, just useful to have a record of when we clear a shared
+		 * lock that was left by a failed host.
+		 */
+		log_errot(token, "cleared shared lease for dead host_id %llu gen %llu",
+			  (unsigned long long)host_id, (unsigned long long)mb.generation);
 	}
 
 	*live_count = live;

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.
_______________________________________________
sanlock-devel mailing list -- sanlock-devel@lists.fedorahosted.org
To unsubscribe send an email to sanlock-devel-leave@lists.fedorahosted.org

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic