[prev in list] [next in list] [prev in thread] [next in thread] 

List:       drbd-cvs
Subject:    [DRBD-cvs] drbd by phil; [Patch by Lars] * we requested a "low me...
From:       drbd-cvs () linbit ! com
Date:       2004-04-29 14:43:32
Message-ID: 20040429144332.7374515DF5C () garcon ! linbit ! com
[Download RAW message or body]

DRBD CVS committal

Author  : phil
Module  : drbd

Dir     : drbd/drbd


Modified Files:
      Tag: rel-0_7-branch
	drbd_actlog.c drbd_compat_wrappers.h drbd_fs.c drbd_int.h 
	drbd_main.c drbd_receiver.c drbd_req-2.4.c 


Log Message:
[Patch by Lars]
* we requested a "low mem page" for md_io, so we do not need to kmap it.
* always plug the device, since if "they" no longer do blk_run_queues(),
  we need to pass the kick to the lo dev ourselves.
* do a "yield()" after thi->taks = NULL, so ther is not even a
  theoretical race anymore in wake_asender()

* several smp_mb(), without detailed understanding, not even whether
  they are usefull there :(
  so you may want to drop them again.

* improvement to verify_ips() in drbdadm

===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_actlog.c,v
retrieving revision 1.1.2.93
retrieving revision 1.1.2.94
diff -u -3 -r1.1.2.93 -r1.1.2.94
--- drbd_actlog.c	28 Apr 2004 13:36:07 -0000	1.1.2.93
+++ drbd_actlog.c	29 Apr 2004 14:43:26 -0000	1.1.2.94
@@ -43,9 +43,8 @@
 
 #ifdef PARANOIA
 	if (rw != WRITE) {
-		void *b = kmap(mdev->md_io_page);
+		void *b = page_address(mdev->md_io_page);
 		memset(b,0,PAGE_SIZE);
-		kunmap(mdev->md_io_page);
 	}
 #endif
 	init_completion(&event);
@@ -73,9 +72,8 @@
 
 #ifdef PARANOIA
 	if (rw != WRITE) {
-		void *b = kmap(mdev->md_io_page);
+		void *b = page_address(mdev->md_io_page);
 		memset(b,0,PAGE_SIZE);
-		kunmap(mdev->md_io_page);
 	}
 #endif
 	bio_init(&bio);
@@ -231,7 +229,7 @@
 	u32 xor_sum=0;
 
 	down(&mdev->md_io_mutex); // protects md_io_buffer, al_tr_cycle, ...
-	buffer = (struct al_transaction*)kmap(mdev->md_io_page);
+	buffer = (struct al_transaction*)page_address(mdev->md_io_page);
 
 	buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
 	buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
@@ -266,7 +264,6 @@
 
 	buffer->xor_sum = cpu_to_be32(xor_sum);
 
-	kunmap(mdev->md_io_page);
 
 	sector = drbd_md_ss(mdev) + MD_AL_OFFSET + mdev->al_tr_pos ;
 
@@ -324,7 +321,7 @@
 	 * and make sure the page is mapped.
 	 */
 	down(&mdev->md_io_mutex);
-	buffer = kmap(mdev->md_io_page);
+	buffer = page_address(mdev->md_io_page);
 
 	// Find the valid transaction in the log
 	for(i=0;i<=mx;i++) {
@@ -347,7 +344,6 @@
 	if(from == -1 || to == -1) {
 		WARN("No usable activity log found.\n");
 
-		kunmap(mdev->md_io_page);
 		up(&mdev->md_io_mutex);
 		return;
 	}
@@ -396,7 +392,6 @@
 	}
 
 	/* ok, we are done with it */
-	kunmap(mdev->md_io_page);
 	up(&mdev->md_io_mutex);
 
 	INFO("Found %d transactions (%d active extents) in activity log.\n",
@@ -525,7 +520,7 @@
 	bm = mdev->mbds_id->bm;
 
 	down(&mdev->md_io_mutex);
-	buffer = (unsigned long *)kmap(mdev->md_io_page);
+	buffer = (unsigned long *)page_address(mdev->md_io_page);
 
 	while (1) {
 		want=min_t(int,512/sizeof(long),bm_words-bm_i);
@@ -548,7 +543,6 @@
 		}
 	}
 
-	kunmap(mdev->md_io_page);
 	up(&mdev->md_io_mutex);
 
 	mdev->rs_total = (bits << (BM_BLOCK_SIZE_B - 9)) +
@@ -589,13 +583,11 @@
 	want=min_t(unsigned int,512/sizeof(long),bm_words-bm_i);
 
 	down(&mdev->md_io_mutex); // protects md_io_buffer
-	buffer = (unsigned long *)kmap(mdev->md_io_page);
+	buffer = (unsigned long *)page_address(mdev->md_io_page);
 
 	for(buf_i=0;buf_i<want;buf_i++) {
 		buffer[buf_i] = cpu_to_lel(bm[bm_i++]);
 	}
-
-	kunmap(mdev->md_io_page);
 
 	sector = drbd_md_ss(mdev) + MD_BM_OFFSET + enr/EXTENTS_PER_SECTOR;
 
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_compat_wrappers.h,v
retrieving revision 1.1.2.38
retrieving revision 1.1.2.39
diff -u -3 -r1.1.2.38 -r1.1.2.39
--- drbd_compat_wrappers.h	18 Apr 2004 10:33:56 -0000	1.1.2.38
+++ drbd_compat_wrappers.h	29 Apr 2004 14:43:26 -0000	1.1.2.39
@@ -245,7 +245,10 @@
 
 static inline void drbd_plug_device(drbd_dev *mdev)
 {
-	if(!test_and_set_bit(WRITE_HINT_QUEUED,&mdev->flags)) {
+	D_ASSERT(mdev->state == Primary);
+	if (mdev->cstate < Connected)
+		return;
+	if (!test_and_set_bit(WRITE_HINT_QUEUED,&mdev->flags)) {
 		queue_task(&mdev->write_hint_tq, &tq_disk); // IO HINT
 	}
 }
@@ -525,6 +528,10 @@
 	request_queue_t *q = bdev_get_queue(mdev->this_bdev);
 
 	spin_lock_irq(q->queue_lock);
+
+/* XXX the check on !blk_queue_plugged is redundant,
+ * implicitly checked in blk_plug_device */
+
 	if(!blk_queue_plugged(q)) {
 		blk_plug_device(q);
 		del_timer(&q->unplug_timer);
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_fs.c,v
retrieving revision 1.28.2.81
retrieving revision 1.28.2.82
diff -u -3 -r1.28.2.81 -r1.28.2.82
--- drbd_fs.c	27 Apr 2004 14:23:18 -0000	1.28.2.81
+++ drbd_fs.c	29 Apr 2004 14:43:26 -0000	1.28.2.82
@@ -173,6 +173,7 @@
 	minor=(int)(mdev-drbd_conf);
 
 	/* if you want to reconfigure, please tear down first */
+	smp_rmb();
 	if (!test_bit(DISKLESS,&mdev->flags))
 		return -EBUSY;
 
@@ -528,6 +529,7 @@
 		return -EACCES;
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+	smp_rmb();
 	if(newstate == Secondary &&
 	   (test_bit(WRITER_PRESENT,&mdev->flags) ||
 	    drbd_is_mounted(minor) == MountedRW))
@@ -692,6 +694,7 @@
 	D_ASSERT(disk == mdev->vdisk);
 );
 
+	smp_rmb();
 	switch (cmd) {
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
 /* see how sys_ioctl and blkdev_ioctl handle it in 2.6 .
@@ -743,8 +746,6 @@
 		mdev->lo_usize = (unsigned long)arg;
 		drbd_determin_dev_size(mdev);
 		drbd_md_write(mdev); // Write mdev->la_size to disk.
-		//#warning "yet an other reason to serialize all state changes on a rw_semaphore"
-		// PRE: Please explain the issue.
 		if (mdev->cstate == Connected) drbd_send_param(mdev,0);
 		break;
 
@@ -762,7 +763,15 @@
 		break;
 
 	case DRBD_IOCTL_UNCONFIG_NET:
-		if( mdev->cstate == Unconfigured) break;
+		if ( mdev->cstate == Unconfigured) break;
+		if (  (   mdev->state  == Primary
+		       && test_bit(DISKLESS,&mdev->flags) )
+		   || (   mdev->o_state == Primary
+		       && test_bit(PARTNER_DISKLESS,&mdev->flags) ) )
+		{
+			err=-ENODATA;
+			break;
+		}
 		/* FIXME what if fsync returns error */
 		drbd_sync_me(mdev);
 		set_bit(DO_NOT_INC_CONCNT,&mdev->flags);
@@ -800,7 +809,7 @@
 		drbd_sync_me(mdev);
 
 		set_bit(DISKLESS,&mdev->flags);
-		smp_mb__after_clear_bit();
+		smp_wmb();
 		if ( wait_event_interruptible(mdev->cstate_wait,
 					      atomic_read(&mdev->local_cnt)==0) ) {
 			clear_bit(DISKLESS,&mdev->flags);
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_int.h,v
retrieving revision 1.58.2.148
retrieving revision 1.58.2.149
diff -u -3 -r1.58.2.148 -r1.58.2.149
--- drbd_int.h	27 Apr 2004 14:23:18 -0000	1.58.2.148
+++ drbd_int.h	29 Apr 2004 14:43:26 -0000	1.58.2.149
@@ -694,7 +694,7 @@
 	wait_queue_head_t ee_wait;
 	struct list_head busy_blocks;
 	NOT_IN_26(struct tq_struct write_hint_tq;)
-	struct page *md_io_page; // one page buffer for md_io
+	struct page *md_io_page;      // one page buffer for md_io
 	struct semaphore md_io_mutex; // protects the md_io_buffer
 	spinlock_t al_lock;
 	wait_queue_head_t al_wait;
@@ -752,6 +752,7 @@
 extern int drbd_md_read(drbd_dev *mdev);
 extern void drbd_md_inc(drbd_dev *mdev, enum MetaDataIndex order);
 extern int drbd_md_compare(drbd_dev *mdev,Drbd_Parameter_Packet *partner);
+extern void drbd_dump_md(drbd_dev *, Drbd_Parameter_Packet *, int );
 
 // drbd_bitmap.c (still in drbd_main.c)
 #define SS_OUT_OF_SYNC (1)
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_main.c,v
retrieving revision 1.73.2.156
retrieving revision 1.73.2.157
diff -u -3 -r1.73.2.156 -r1.73.2.157
--- drbd_main.c	27 Apr 2004 13:49:23 -0000	1.73.2.156
+++ drbd_main.c	29 Apr 2004 14:43:26 -0000	1.73.2.157
@@ -399,11 +399,6 @@
 	}
 	if(test_bit(MD_IO_ALLOWED,&mdev->flags) &&
 	   test_bit(DISKLESS,&mdev->flags) && ns < Connected) {
-
-/* are you SURE you want this HERE ? */
-
-		clear_bit(DISKLESS,&mdev->flags);
-		smp_wmb();
 		clear_bit(MD_IO_ALLOWED,&mdev->flags);
 	}
 }
@@ -420,6 +415,11 @@
 
 	thi->task = 0;
 
+	/* propagate task == NULL to other CPUs */
+	smp_mb();         // necessary?
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule_timeout(1);
+
 	up(&thi->mutex); //allow thread_stop to proceed
 
 	return retval;
@@ -1050,21 +1050,25 @@
 #endif
 	drbd_dev *mdev = q->queuedata;
 
+	INFO("%s [%d]: unplug\n",current->comm, current->pid);
 	/* unplug FIRST */
 	spin_lock_irq(q->queue_lock);
 	blk_remove_plug(q);
 	spin_unlock_irq(q->queue_lock);
 
-	ERR_IF(mdev->state != Primary)
-		return;
-	/* add to the front of the data.work queue,
-         * unless already queued */
-	spin_lock_irq(&mdev->req_lock);
-	/* FIXME this might be a good addition to drbd_queu_work
-	 * anyways, to detect "double queuing" ... */
-	if (list_empty(&mdev->unplug_work.list))
-		_drbd_queue_work_front(&mdev->data.work,&mdev->unplug_work);
-	spin_unlock_irq(&mdev->req_lock);
+	/* only if connected */
+	if (mdev->cstate >= Connected) {
+		D_ASSERT(mdev->state == Primary);
+		spin_lock_irq(&mdev->req_lock);
+		/* add to the front of the data.work queue,
+		 * unless already queued.
+		 * XXX this might be a good addition to drbd_queue_work
+		 * anyways, to detect "double queuing" ... */
+		if (list_empty(&mdev->unplug_work.list))
+			_drbd_queue_work_front(&mdev->data.work,&mdev->unplug_work);
+		spin_unlock_irq(&mdev->req_lock);
+	}
+	/* allways */
 	drbd_kick_lo(mdev);
 }
 #endif
@@ -2032,7 +2036,7 @@
 	if(!inc_local_md_only(mdev)) return;
 
 	down(&mdev->md_io_mutex);
-	buffer = (struct meta_data_on_disk *)kmap(mdev->md_io_page);
+	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
 
 	flags=mdev->gen_cnt[Flags] & ~(MDF_PrimaryInd|MDF_ConnectedInd);
 	if(mdev->state==Primary) flags |= MDF_PrimaryInd;
@@ -2050,8 +2054,6 @@
 
 	buffer->bm_offset = __constant_cpu_to_be32(MD_BM_OFFSET);
 
-	kunmap(mdev->md_io_page);
-
 	sector = drbd_md_ss(mdev) + MD_GC_OFFSET;
 
 	/* FIXME what if this fails ?? */
@@ -2071,7 +2073,7 @@
 	if(!inc_local_md_only(mdev)) return -1;
 
 	down(&mdev->md_io_mutex);
-	buffer = (struct meta_data_on_disk *)kmap(mdev->md_io_page);
+	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
 
 	sector = drbd_md_ss(mdev) + MD_GC_OFFSET;
 
@@ -2086,14 +2088,12 @@
 	mdev->la_size = be64_to_cpu(buffer->la_size);
 	mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
 
-	kunmap(mdev->md_io_page);
 	up(&mdev->md_io_mutex);
 	dec_local(mdev);
 
 	return 1;
 
  err:
-	kunmap(mdev->md_io_page);
 	up(&mdev->md_io_mutex);
 	dec_local(mdev);
 
@@ -2108,6 +2108,44 @@
 	return 0;
 }
 
+#if 0
+#define MeGC(x) mdev->gen_cnt[x]
+#define PeGC(x) be32_to_cpu(peer->gen_cnt[x])
+
+void drbd_dump_md(drbd_dev *mdev, Drbd_Parameter_Packet *peer, int verbose)
+{
+	INFO("MeGCs: %c:%08x:%08x:%08x:%08x:%c%c\n",
+		MeGC(Flags) & MDF_Consistent ? '1' : '0',
+		MeGC(HumanCnt),
+		MeGC(TimeoutCnt),
+		MeGC(ConnectedCnt),
+		MeGC(ArbitraryCnt),
+		MeGC(Flags) & MDF_PrimaryInd   ? '1' : '0',
+		MeGC(Flags) & MDF_ConnectedInd ? '1' : '0');
+	if (peer) {
+		INFO("PeGCs: %c:%08x:%08x:%08x:%08x:%c%c\n",
+			PeGC(Flags) & MDF_Consistent ? '1' : '0',
+			PeGC(HumanCnt),
+			PeGC(TimeoutCnt),
+			PeGC(ConnectedCnt),
+			PeGC(ArbitraryCnt),
+			PeGC(Flags) & MDF_PrimaryInd   ? '1' : '0',
+			PeGC(Flags) & MDF_ConnectedInd ? '1' : '0');
+	}
+	if (verbose) {
+		/* TODO
+		 * dump activity log and bitmap summary,
+		 * and maybe other statistics
+		 */
+	}
+}
+
+#undef MeGC
+#undef PeGC
+#else
+void drbd_dump_md(drbd_dev *mdev, Drbd_Parameter_Packet *peer, int verbose)
+{ /* do nothing */ }
+#endif
 
 //  Returns  1 if I have the good bits,
 //           0 if both are nice
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_receiver.c,v
retrieving revision 1.97.2.134
retrieving revision 1.97.2.135
diff -u -3 -r1.97.2.134 -r1.97.2.135
--- drbd_receiver.c	24 Apr 2004 16:35:21 -0000	1.97.2.134
+++ drbd_receiver.c	29 Apr 2004 14:43:27 -0000	1.97.2.135
@@ -606,7 +606,7 @@
 		if(mdev->cstate==Unconnected) return 0;
 		if(signal_pending(current)) {
 			drbd_flush_signals(current);
-			smp_mb();
+			smp_rmb();
 			if ((volatile int)mdev->receiver.t_state != Running)
 				return 0;
 
@@ -1156,7 +1156,7 @@
 		set_bit(PARTNER_DISKLESS, &mdev->flags);
 		if(mdev->cstate >= Connected ) {
 			if(mdev->state == Primary) tl_clear(mdev);
-			if(mdev->state == Primary || 
+			if(mdev->state == Primary ||
 			   be32_to_cpu(p->state) == Primary ) {
 				drbd_md_inc(mdev,ConnectedCnt);
 			}
@@ -1176,11 +1176,12 @@
 	if (consider_sync) {
 		int have_good,sync;
 
-		have_good=drbd_md_compare(mdev,p);
+		have_good = drbd_md_compare(mdev,p);
 
 		if(have_good==0) sync=0;
 		else sync=1;
 
+		drbd_dump_md(mdev,p,0);
 		//INFO("have_good=%d sync=%d\n", have_good, sync);
 
 		if ( mdev->sync_conf.skip && sync ) {
@@ -1196,14 +1197,20 @@
 				drbd_send_bitmap(mdev);
 				set_cstate(mdev,WFBitMapS);
 			} else { // have_good == -1
+				if (mdev->state == Primary) {
+/*
+	FIXME
+*/
+					WARN("Current Primary becomming sync TARGET! Data corruption in progress?\n");
+				}
 				mdev->gen_cnt[Flags] &= ~MDF_Consistent;
 				set_cstate(mdev,WFBitMapT);
-			} 
+			}
 		} else {
 			set_cstate(mdev,Connected);
 			if(mdev->rs_total) {
 				/* We are not going to do a resync but there
-				   are marks in the bitmap. 
+				   are marks in the bitmap.
 				   (Could be from the AL, or someone used
 				   the write_gc.pl program)
 				   Clean the bitmap...
@@ -1281,8 +1288,9 @@
 	if (mdev->cstate == WFBitMapS) {
 		drbd_start_resync(mdev,SyncSource);
 	} else if (mdev->cstate == WFBitMapT) {
-		drbd_send_bitmap(mdev);
-		drbd_start_resync(mdev,SyncTarget);
+		if (!drbd_send_bitmap(mdev))
+			goto out;
+		drbd_start_resync(mdev,SyncTarget); // XXX cannot fail ???
 	} else {
 		D_ASSERT(0);
 	}
@@ -1434,6 +1442,17 @@
 
 STATIC void drbd_disconnect(drbd_dev *mdev)
 {
+/*
+ * FIXME what if
+ * (state == Primary) && !(gen_cnt[Flags] & MDF_Consistent) ??
+ * or I am DISKLESS ?
+ * we need to *at least* block all IO
+ *
+ * maybe get a write mutex on mdev ?
+ * sort of "suspend" the device, untill either operator, or monitoring
+ * software, or load, or whatever, kills the box, OR connection to the
+ * good data copy is reestablished.
+ */
 	mdev->o_state = Unknown;
 	drbd_thread_stop_nowait(&mdev->worker);
 	drbd_thread_stop(&mdev->asender);
@@ -1554,6 +1573,7 @@
 	sector_t sector = be64_to_cpu(p->sector);
 	int blksize = be32_to_cpu(p->blksize);
 
+	smp_rmb();
 	if(likely(!test_bit(PARTNER_DISKLESS,&mdev->flags))) {
 		// test_bit(PARTNER_DISKLESS,&mdev->flags)
 		// This happens if one a few IO requests on the peer
@@ -1648,6 +1668,7 @@
 {
 	Drbd_BarrierAck_Packet *p = (Drbd_BarrierAck_Packet*)h;
 
+	smp_rmb();
 	if(unlikely(test_bit(PARTNER_DISKLESS,&mdev->flags))) return TRUE;
 
 	tl_release(mdev,p->barrier,be32_to_cpu(p->set_size));
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_req-2.4.c,v
retrieving revision 1.33.2.66
retrieving revision 1.33.2.67
diff -u -3 -r1.33.2.66 -r1.33.2.67
--- drbd_req-2.4.c	24 Apr 2004 16:35:22 -0000	1.33.2.66
+++ drbd_req-2.4.c	29 Apr 2004 14:43:27 -0000	1.33.2.67
@@ -164,10 +164,40 @@
 	int local, remote;
 	int target_area_out_of_sync = FALSE; // only relevant for reads
 
+	/* FIXME
+	 * not always true, e.g. someone trying to mount on Secondary
+	 * maybe error out immediately here?
+	 */
+	D_ASSERT(mdev->state == Primary);
+
+	/*
+	 * Paranoia: we might have been primary, but sync target, or
+	 * even diskless, then lost the connection.
+	 * This should have been handled (panic? suspend?) somehwere
+	 * else. But maybe it was not, so check again here.
+	 * Caution: as long as we do not have a read/write lock on mdev,
+	 * to serialize state changes, this is racy, since we may loose
+	 * the connection *after* we test for the cstate.
+	 */
+	if ( (    test_bit(DISKLESS,&mdev->flags)
+	      || !(mdev->gen_cnt[Flags] & MDF_Consistent)
+	     ) && mdev->cstate < Connected )
+	{
+		ERR("Sorry, I have no access to good data anymore.\n");
+/*
+	FIXME suspend, loop waiting on cstate wait? panic?
+*/
+		drbd_bio_IO_error(bio);
+		return 0;
+	}
+
 	/* allocate outside of all locks
 	 */
 	req = mempool_alloc(drbd_request_mempool, GFP_DRBD);
 	if (!req) {
+		/* THINK really only pass the error to the upper layers?
+		 * maybe we should rather panic reight here?
+		 */
 		ERR("could not kmalloc() req\n");
 		drbd_bio_IO_error(bio);
 		return 0;
@@ -237,6 +267,8 @@
 	 * right here already?
 	 */
 
+	/* we need to plug ALWAYS since we possibly need to kick lo_dev */
+	drbd_plug_device(mdev);
 	if (rw == WRITE && local)
 		drbd_al_begin_io(mdev, sector);
 
@@ -250,7 +282,6 @@
 		 * or READ, and no local disk,
 		 * or READ, but not in sync.
 		 */
-		drbd_plug_device(mdev);
 		if (rw == WRITE) {
 			/* Syncronization with the syncer is done
 			 * via drbd_[rs|al]_[begin|end]_io()



[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic