[prev in list] [next in list] [prev in thread] [next in thread]
List: drbd-cvs
Subject: [DRBD-cvs] drbd by phil; [Patch by Lars] * we requested a "low me...
From: drbd-cvs () linbit ! com
Date: 2004-04-29 14:43:32
Message-ID: 20040429144332.7374515DF5C () garcon ! linbit ! com
[Download RAW message or body]
DRBD CVS committal
Author : phil
Module : drbd
Dir : drbd/drbd
Modified Files:
Tag: rel-0_7-branch
drbd_actlog.c drbd_compat_wrappers.h drbd_fs.c drbd_int.h
drbd_main.c drbd_receiver.c drbd_req-2.4.c
Log Message:
[Patch by Lars]
* we requested a "low mem page" for md_io, so we do not need to kmap it.
* always plug the device, since if "they" no longer do blk_run_queues(),
we need to pass the kick to the lo dev ourselves.
* do a "yield()" after thi->taks = NULL, so ther is not even a
theoretical race anymore in wake_asender()
* several smp_mb(), without detailed understanding, not even whether
they are usefull there :(
so you may want to drop them again.
* improvement to verify_ips() in drbdadm
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_actlog.c,v
retrieving revision 1.1.2.93
retrieving revision 1.1.2.94
diff -u -3 -r1.1.2.93 -r1.1.2.94
--- drbd_actlog.c 28 Apr 2004 13:36:07 -0000 1.1.2.93
+++ drbd_actlog.c 29 Apr 2004 14:43:26 -0000 1.1.2.94
@@ -43,9 +43,8 @@
#ifdef PARANOIA
if (rw != WRITE) {
- void *b = kmap(mdev->md_io_page);
+ void *b = page_address(mdev->md_io_page);
memset(b,0,PAGE_SIZE);
- kunmap(mdev->md_io_page);
}
#endif
init_completion(&event);
@@ -73,9 +72,8 @@
#ifdef PARANOIA
if (rw != WRITE) {
- void *b = kmap(mdev->md_io_page);
+ void *b = page_address(mdev->md_io_page);
memset(b,0,PAGE_SIZE);
- kunmap(mdev->md_io_page);
}
#endif
bio_init(&bio);
@@ -231,7 +229,7 @@
u32 xor_sum=0;
down(&mdev->md_io_mutex); // protects md_io_buffer, al_tr_cycle, ...
- buffer = (struct al_transaction*)kmap(mdev->md_io_page);
+ buffer = (struct al_transaction*)page_address(mdev->md_io_page);
buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
@@ -266,7 +264,6 @@
buffer->xor_sum = cpu_to_be32(xor_sum);
- kunmap(mdev->md_io_page);
sector = drbd_md_ss(mdev) + MD_AL_OFFSET + mdev->al_tr_pos ;
@@ -324,7 +321,7 @@
* and make sure the page is mapped.
*/
down(&mdev->md_io_mutex);
- buffer = kmap(mdev->md_io_page);
+ buffer = page_address(mdev->md_io_page);
// Find the valid transaction in the log
for(i=0;i<=mx;i++) {
@@ -347,7 +344,6 @@
if(from == -1 || to == -1) {
WARN("No usable activity log found.\n");
- kunmap(mdev->md_io_page);
up(&mdev->md_io_mutex);
return;
}
@@ -396,7 +392,6 @@
}
/* ok, we are done with it */
- kunmap(mdev->md_io_page);
up(&mdev->md_io_mutex);
INFO("Found %d transactions (%d active extents) in activity log.\n",
@@ -525,7 +520,7 @@
bm = mdev->mbds_id->bm;
down(&mdev->md_io_mutex);
- buffer = (unsigned long *)kmap(mdev->md_io_page);
+ buffer = (unsigned long *)page_address(mdev->md_io_page);
while (1) {
want=min_t(int,512/sizeof(long),bm_words-bm_i);
@@ -548,7 +543,6 @@
}
}
- kunmap(mdev->md_io_page);
up(&mdev->md_io_mutex);
mdev->rs_total = (bits << (BM_BLOCK_SIZE_B - 9)) +
@@ -589,13 +583,11 @@
want=min_t(unsigned int,512/sizeof(long),bm_words-bm_i);
down(&mdev->md_io_mutex); // protects md_io_buffer
- buffer = (unsigned long *)kmap(mdev->md_io_page);
+ buffer = (unsigned long *)page_address(mdev->md_io_page);
for(buf_i=0;buf_i<want;buf_i++) {
buffer[buf_i] = cpu_to_lel(bm[bm_i++]);
}
-
- kunmap(mdev->md_io_page);
sector = drbd_md_ss(mdev) + MD_BM_OFFSET + enr/EXTENTS_PER_SECTOR;
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/Attic/drbd_compat_wrappers.h,v
retrieving revision 1.1.2.38
retrieving revision 1.1.2.39
diff -u -3 -r1.1.2.38 -r1.1.2.39
--- drbd_compat_wrappers.h 18 Apr 2004 10:33:56 -0000 1.1.2.38
+++ drbd_compat_wrappers.h 29 Apr 2004 14:43:26 -0000 1.1.2.39
@@ -245,7 +245,10 @@
static inline void drbd_plug_device(drbd_dev *mdev)
{
- if(!test_and_set_bit(WRITE_HINT_QUEUED,&mdev->flags)) {
+ D_ASSERT(mdev->state == Primary);
+ if (mdev->cstate < Connected)
+ return;
+ if (!test_and_set_bit(WRITE_HINT_QUEUED,&mdev->flags)) {
queue_task(&mdev->write_hint_tq, &tq_disk); // IO HINT
}
}
@@ -525,6 +528,10 @@
request_queue_t *q = bdev_get_queue(mdev->this_bdev);
spin_lock_irq(q->queue_lock);
+
+/* XXX the check on !blk_queue_plugged is redundant,
+ * implicitly checked in blk_plug_device */
+
if(!blk_queue_plugged(q)) {
blk_plug_device(q);
del_timer(&q->unplug_timer);
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_fs.c,v
retrieving revision 1.28.2.81
retrieving revision 1.28.2.82
diff -u -3 -r1.28.2.81 -r1.28.2.82
--- drbd_fs.c 27 Apr 2004 14:23:18 -0000 1.28.2.81
+++ drbd_fs.c 29 Apr 2004 14:43:26 -0000 1.28.2.82
@@ -173,6 +173,7 @@
minor=(int)(mdev-drbd_conf);
/* if you want to reconfigure, please tear down first */
+ smp_rmb();
if (!test_bit(DISKLESS,&mdev->flags))
return -EBUSY;
@@ -528,6 +529,7 @@
return -EACCES;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+ smp_rmb();
if(newstate == Secondary &&
(test_bit(WRITER_PRESENT,&mdev->flags) ||
drbd_is_mounted(minor) == MountedRW))
@@ -692,6 +694,7 @@
D_ASSERT(disk == mdev->vdisk);
);
+ smp_rmb();
switch (cmd) {
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
/* see how sys_ioctl and blkdev_ioctl handle it in 2.6 .
@@ -743,8 +746,6 @@
mdev->lo_usize = (unsigned long)arg;
drbd_determin_dev_size(mdev);
drbd_md_write(mdev); // Write mdev->la_size to disk.
- //#warning "yet an other reason to serialize all state changes on a rw_semaphore"
- // PRE: Please explain the issue.
if (mdev->cstate == Connected) drbd_send_param(mdev,0);
break;
@@ -762,7 +763,15 @@
break;
case DRBD_IOCTL_UNCONFIG_NET:
- if( mdev->cstate == Unconfigured) break;
+ if ( mdev->cstate == Unconfigured) break;
+ if ( ( mdev->state == Primary
+ && test_bit(DISKLESS,&mdev->flags) )
+ || ( mdev->o_state == Primary
+ && test_bit(PARTNER_DISKLESS,&mdev->flags) ) )
+ {
+ err=-ENODATA;
+ break;
+ }
/* FIXME what if fsync returns error */
drbd_sync_me(mdev);
set_bit(DO_NOT_INC_CONCNT,&mdev->flags);
@@ -800,7 +809,7 @@
drbd_sync_me(mdev);
set_bit(DISKLESS,&mdev->flags);
- smp_mb__after_clear_bit();
+ smp_wmb();
if ( wait_event_interruptible(mdev->cstate_wait,
atomic_read(&mdev->local_cnt)==0) ) {
clear_bit(DISKLESS,&mdev->flags);
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_int.h,v
retrieving revision 1.58.2.148
retrieving revision 1.58.2.149
diff -u -3 -r1.58.2.148 -r1.58.2.149
--- drbd_int.h 27 Apr 2004 14:23:18 -0000 1.58.2.148
+++ drbd_int.h 29 Apr 2004 14:43:26 -0000 1.58.2.149
@@ -694,7 +694,7 @@
wait_queue_head_t ee_wait;
struct list_head busy_blocks;
NOT_IN_26(struct tq_struct write_hint_tq;)
- struct page *md_io_page; // one page buffer for md_io
+ struct page *md_io_page; // one page buffer for md_io
struct semaphore md_io_mutex; // protects the md_io_buffer
spinlock_t al_lock;
wait_queue_head_t al_wait;
@@ -752,6 +752,7 @@
extern int drbd_md_read(drbd_dev *mdev);
extern void drbd_md_inc(drbd_dev *mdev, enum MetaDataIndex order);
extern int drbd_md_compare(drbd_dev *mdev,Drbd_Parameter_Packet *partner);
+extern void drbd_dump_md(drbd_dev *, Drbd_Parameter_Packet *, int );
// drbd_bitmap.c (still in drbd_main.c)
#define SS_OUT_OF_SYNC (1)
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_main.c,v
retrieving revision 1.73.2.156
retrieving revision 1.73.2.157
diff -u -3 -r1.73.2.156 -r1.73.2.157
--- drbd_main.c 27 Apr 2004 13:49:23 -0000 1.73.2.156
+++ drbd_main.c 29 Apr 2004 14:43:26 -0000 1.73.2.157
@@ -399,11 +399,6 @@
}
if(test_bit(MD_IO_ALLOWED,&mdev->flags) &&
test_bit(DISKLESS,&mdev->flags) && ns < Connected) {
-
-/* are you SURE you want this HERE ? */
-
- clear_bit(DISKLESS,&mdev->flags);
- smp_wmb();
clear_bit(MD_IO_ALLOWED,&mdev->flags);
}
}
@@ -420,6 +415,11 @@
thi->task = 0;
+ /* propagate task == NULL to other CPUs */
+ smp_mb(); // necessary?
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(1);
+
up(&thi->mutex); //allow thread_stop to proceed
return retval;
@@ -1050,21 +1050,25 @@
#endif
drbd_dev *mdev = q->queuedata;
+ INFO("%s [%d]: unplug\n",current->comm, current->pid);
/* unplug FIRST */
spin_lock_irq(q->queue_lock);
blk_remove_plug(q);
spin_unlock_irq(q->queue_lock);
- ERR_IF(mdev->state != Primary)
- return;
- /* add to the front of the data.work queue,
- * unless already queued */
- spin_lock_irq(&mdev->req_lock);
- /* FIXME this might be a good addition to drbd_queu_work
- * anyways, to detect "double queuing" ... */
- if (list_empty(&mdev->unplug_work.list))
- _drbd_queue_work_front(&mdev->data.work,&mdev->unplug_work);
- spin_unlock_irq(&mdev->req_lock);
+ /* only if connected */
+ if (mdev->cstate >= Connected) {
+ D_ASSERT(mdev->state == Primary);
+ spin_lock_irq(&mdev->req_lock);
+ /* add to the front of the data.work queue,
+ * unless already queued.
+ * XXX this might be a good addition to drbd_queue_work
+ * anyways, to detect "double queuing" ... */
+ if (list_empty(&mdev->unplug_work.list))
+ _drbd_queue_work_front(&mdev->data.work,&mdev->unplug_work);
+ spin_unlock_irq(&mdev->req_lock);
+ }
+ /* allways */
drbd_kick_lo(mdev);
}
#endif
@@ -2032,7 +2036,7 @@
if(!inc_local_md_only(mdev)) return;
down(&mdev->md_io_mutex);
- buffer = (struct meta_data_on_disk *)kmap(mdev->md_io_page);
+ buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
flags=mdev->gen_cnt[Flags] & ~(MDF_PrimaryInd|MDF_ConnectedInd);
if(mdev->state==Primary) flags |= MDF_PrimaryInd;
@@ -2050,8 +2054,6 @@
buffer->bm_offset = __constant_cpu_to_be32(MD_BM_OFFSET);
- kunmap(mdev->md_io_page);
-
sector = drbd_md_ss(mdev) + MD_GC_OFFSET;
/* FIXME what if this fails ?? */
@@ -2071,7 +2073,7 @@
if(!inc_local_md_only(mdev)) return -1;
down(&mdev->md_io_mutex);
- buffer = (struct meta_data_on_disk *)kmap(mdev->md_io_page);
+ buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
sector = drbd_md_ss(mdev) + MD_GC_OFFSET;
@@ -2086,14 +2088,12 @@
mdev->la_size = be64_to_cpu(buffer->la_size);
mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
- kunmap(mdev->md_io_page);
up(&mdev->md_io_mutex);
dec_local(mdev);
return 1;
err:
- kunmap(mdev->md_io_page);
up(&mdev->md_io_mutex);
dec_local(mdev);
@@ -2108,6 +2108,44 @@
return 0;
}
+#if 0
+#define MeGC(x) mdev->gen_cnt[x]
+#define PeGC(x) be32_to_cpu(peer->gen_cnt[x])
+
+void drbd_dump_md(drbd_dev *mdev, Drbd_Parameter_Packet *peer, int verbose)
+{
+ INFO("MeGCs: %c:%08x:%08x:%08x:%08x:%c%c\n",
+ MeGC(Flags) & MDF_Consistent ? '1' : '0',
+ MeGC(HumanCnt),
+ MeGC(TimeoutCnt),
+ MeGC(ConnectedCnt),
+ MeGC(ArbitraryCnt),
+ MeGC(Flags) & MDF_PrimaryInd ? '1' : '0',
+ MeGC(Flags) & MDF_ConnectedInd ? '1' : '0');
+ if (peer) {
+ INFO("PeGCs: %c:%08x:%08x:%08x:%08x:%c%c\n",
+ PeGC(Flags) & MDF_Consistent ? '1' : '0',
+ PeGC(HumanCnt),
+ PeGC(TimeoutCnt),
+ PeGC(ConnectedCnt),
+ PeGC(ArbitraryCnt),
+ PeGC(Flags) & MDF_PrimaryInd ? '1' : '0',
+ PeGC(Flags) & MDF_ConnectedInd ? '1' : '0');
+ }
+ if (verbose) {
+ /* TODO
+ * dump activity log and bitmap summary,
+ * and maybe other statistics
+ */
+ }
+}
+
+#undef MeGC
+#undef PeGC
+#else
+void drbd_dump_md(drbd_dev *mdev, Drbd_Parameter_Packet *peer, int verbose)
+{ /* do nothing */ }
+#endif
// Returns 1 if I have the good bits,
// 0 if both are nice
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_receiver.c,v
retrieving revision 1.97.2.134
retrieving revision 1.97.2.135
diff -u -3 -r1.97.2.134 -r1.97.2.135
--- drbd_receiver.c 24 Apr 2004 16:35:21 -0000 1.97.2.134
+++ drbd_receiver.c 29 Apr 2004 14:43:27 -0000 1.97.2.135
@@ -606,7 +606,7 @@
if(mdev->cstate==Unconnected) return 0;
if(signal_pending(current)) {
drbd_flush_signals(current);
- smp_mb();
+ smp_rmb();
if ((volatile int)mdev->receiver.t_state != Running)
return 0;
@@ -1156,7 +1156,7 @@
set_bit(PARTNER_DISKLESS, &mdev->flags);
if(mdev->cstate >= Connected ) {
if(mdev->state == Primary) tl_clear(mdev);
- if(mdev->state == Primary ||
+ if(mdev->state == Primary ||
be32_to_cpu(p->state) == Primary ) {
drbd_md_inc(mdev,ConnectedCnt);
}
@@ -1176,11 +1176,12 @@
if (consider_sync) {
int have_good,sync;
- have_good=drbd_md_compare(mdev,p);
+ have_good = drbd_md_compare(mdev,p);
if(have_good==0) sync=0;
else sync=1;
+ drbd_dump_md(mdev,p,0);
//INFO("have_good=%d sync=%d\n", have_good, sync);
if ( mdev->sync_conf.skip && sync ) {
@@ -1196,14 +1197,20 @@
drbd_send_bitmap(mdev);
set_cstate(mdev,WFBitMapS);
} else { // have_good == -1
+ if (mdev->state == Primary) {
+/*
+ FIXME
+*/
+ WARN("Current Primary becomming sync TARGET! Data corruption in progress?\n");
+ }
mdev->gen_cnt[Flags] &= ~MDF_Consistent;
set_cstate(mdev,WFBitMapT);
- }
+ }
} else {
set_cstate(mdev,Connected);
if(mdev->rs_total) {
/* We are not going to do a resync but there
- are marks in the bitmap.
+ are marks in the bitmap.
(Could be from the AL, or someone used
the write_gc.pl program)
Clean the bitmap...
@@ -1281,8 +1288,9 @@
if (mdev->cstate == WFBitMapS) {
drbd_start_resync(mdev,SyncSource);
} else if (mdev->cstate == WFBitMapT) {
- drbd_send_bitmap(mdev);
- drbd_start_resync(mdev,SyncTarget);
+ if (!drbd_send_bitmap(mdev))
+ goto out;
+ drbd_start_resync(mdev,SyncTarget); // XXX cannot fail ???
} else {
D_ASSERT(0);
}
@@ -1434,6 +1442,17 @@
STATIC void drbd_disconnect(drbd_dev *mdev)
{
+/*
+ * FIXME what if
+ * (state == Primary) && !(gen_cnt[Flags] & MDF_Consistent) ??
+ * or I am DISKLESS ?
+ * we need to *at least* block all IO
+ *
+ * maybe get a write mutex on mdev ?
+ * sort of "suspend" the device, untill either operator, or monitoring
+ * software, or load, or whatever, kills the box, OR connection to the
+ * good data copy is reestablished.
+ */
mdev->o_state = Unknown;
drbd_thread_stop_nowait(&mdev->worker);
drbd_thread_stop(&mdev->asender);
@@ -1554,6 +1573,7 @@
sector_t sector = be64_to_cpu(p->sector);
int blksize = be32_to_cpu(p->blksize);
+ smp_rmb();
if(likely(!test_bit(PARTNER_DISKLESS,&mdev->flags))) {
// test_bit(PARTNER_DISKLESS,&mdev->flags)
// This happens if one a few IO requests on the peer
@@ -1648,6 +1668,7 @@
{
Drbd_BarrierAck_Packet *p = (Drbd_BarrierAck_Packet*)h;
+ smp_rmb();
if(unlikely(test_bit(PARTNER_DISKLESS,&mdev->flags))) return TRUE;
tl_release(mdev,p->barrier,be32_to_cpu(p->set_size));
===================================================================
RCS file: /var/lib/cvs/drbd/drbd/drbd/drbd_req-2.4.c,v
retrieving revision 1.33.2.66
retrieving revision 1.33.2.67
diff -u -3 -r1.33.2.66 -r1.33.2.67
--- drbd_req-2.4.c 24 Apr 2004 16:35:22 -0000 1.33.2.66
+++ drbd_req-2.4.c 29 Apr 2004 14:43:27 -0000 1.33.2.67
@@ -164,10 +164,40 @@
int local, remote;
int target_area_out_of_sync = FALSE; // only relevant for reads
+ /* FIXME
+ * not always true, e.g. someone trying to mount on Secondary
+ * maybe error out immediately here?
+ */
+ D_ASSERT(mdev->state == Primary);
+
+ /*
+ * Paranoia: we might have been primary, but sync target, or
+ * even diskless, then lost the connection.
+ * This should have been handled (panic? suspend?) somehwere
+ * else. But maybe it was not, so check again here.
+ * Caution: as long as we do not have a read/write lock on mdev,
+ * to serialize state changes, this is racy, since we may loose
+ * the connection *after* we test for the cstate.
+ */
+ if ( ( test_bit(DISKLESS,&mdev->flags)
+ || !(mdev->gen_cnt[Flags] & MDF_Consistent)
+ ) && mdev->cstate < Connected )
+ {
+ ERR("Sorry, I have no access to good data anymore.\n");
+/*
+ FIXME suspend, loop waiting on cstate wait? panic?
+*/
+ drbd_bio_IO_error(bio);
+ return 0;
+ }
+
/* allocate outside of all locks
*/
req = mempool_alloc(drbd_request_mempool, GFP_DRBD);
if (!req) {
+ /* THINK really only pass the error to the upper layers?
+ * maybe we should rather panic reight here?
+ */
ERR("could not kmalloc() req\n");
drbd_bio_IO_error(bio);
return 0;
@@ -237,6 +267,8 @@
* right here already?
*/
+ /* we need to plug ALWAYS since we possibly need to kick lo_dev */
+ drbd_plug_device(mdev);
if (rw == WRITE && local)
drbd_al_begin_io(mdev, sector);
@@ -250,7 +282,6 @@
* or READ, and no local disk,
* or READ, but not in sync.
*/
- drbd_plug_device(mdev);
if (rw == WRITE) {
/* Syncronization with the syncer is done
* via drbd_[rs|al]_[begin|end]_io()
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic