[prev in list] [next in list] [prev in thread] [next in thread]
List: linux-nfs
Subject: Re: [NFS]
From: Bernd Schubert <bernd-schubert () gmx ! de>
Date: 2007-09-03 20:02:37
Message-ID: 200709032202.37927.bernd-schubert () gmx ! de
[Download RAW message or body]
On Friday 31 August 2007, J. Bruce Fields wrote:
>
> Hm. Any chance this is the same problem?:
>
> http://marc.info/?l=linux-nfs&m=112289652218095&w=2
>
I have slightly modified Olafs patch, now the last page is saved and restored.
Well there's a problem with it, if there's more than one thread, files over
nfs become corrupted, if there's only one thread everything is fine. So far I
have no idea whats going on.
In principal we could do the same in do_readv_writev(), the iov vector there
should hopefully only belong to one thread.
Anyway, using this patch, the nfs-write speed on lustre is about 200 MB/s.
Cheers,
Bernd
["nfs_align.patch_racy" (text/x-diff)]
Index: linux-2.6.20.3/fs/nfsd/vfs.c
===================================================================
--- linux-2.6.20.3.orig/fs/nfsd/vfs.c 2007-09-03 14:41:54.000000000 +0200
+++ linux-2.6.20.3/fs/nfsd/vfs.c 2007-09-03 17:41:36.000000000 +0200
@@ -900,6 +900,48 @@
mutex_unlock(&dentry->d_inode->i_mutex);
}
+/*
+ * Helper function to page-align the write payload.
+ */
+static int
+nfsd_page_align_payload(struct kvec *vec, int vlen)
+{
+ unsigned char *this_page, *prev_page;
+ int i, chunk0, chunk1;
+
+ /* The following checks are just paranoia */
+ if (vlen < 2)
+ return 0;
+
+ if (vec[0].iov_len + vec[vlen-1].iov_len != PAGE_CACHE_SIZE)
+ return 0;
+ for (i = 1; i < vlen - 1; ++i) {
+ if (vec[i].iov_len != PAGE_CACHE_SIZE)
+ return 0;
+ }
+
+ chunk0 = vec[0].iov_len;
+ chunk1 = PAGE_CACHE_SIZE - chunk0;
+
+ this_page = (unsigned char *) vec[vlen-1].iov_base;
+ for (i = vlen-1; i; --i) {
+ prev_page = (unsigned char *) vec[i-1].iov_base;
+
+ /* Push trailing partial page so it's
+ * aligned with the end of the page, then
+ * pull up the missing chunk from the previous
+ * page */
+ memmove(this_page + chunk0, this_page, chunk1);
+ memcpy(this_page, prev_page + chunk1, chunk0);
+ vec[i].iov_len = PAGE_CACHE_SIZE;
+ this_page = prev_page;
+ }
+
+ return 1;
+}
+
+
+
static __be32
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
@@ -912,6 +954,8 @@
__be32 err = 0;
int host_err;
int stable = *stablep;
+ char *tmp_page; /* used for page alignement */
+ int did_align = 0;
#ifdef MSNFS
err = nfserr_perm;
@@ -944,6 +988,26 @@
if (stable && !EX_WGATHER(exp))
file->f_flags |= O_SYNC;
+ /* save the last vector element, besides file data it may have
+ * nfsv4 data, which will be overwritten by the alignement hack */
+ tmp_page = (char *) __get_free_page(GFP_KERNEL);
+ if (tmp_page
+ && (offset < inode->i_size)
+ && vec->iov_len != PAGE_CACHE_SIZE) {
+ memcpy(tmp_page, vec[vlen - 1].iov_base, PAGE_SIZE);
+
+ /* Hack: if we're rewriting the file, make sure
+ * we align the iovec properly to avoid costly
+ * read-modify-write operations on the block devices.
+ * This hack can go away once we have generic_file_writev.
+ */
+
+ if (nfsd_page_align_payload(vec, vlen)) {
+ did_align = 1;
+ vec++, vlen--;
+ }
+ }
+
/* Write the data. */
oldfs = get_fs(); set_fs(KERNEL_DS);
host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
@@ -953,6 +1017,12 @@
fsnotify_modify(file->f_path.dentry);
}
+ /* restore the last vector element */
+ if (did_align) {
+ memcpy(vec[vlen - 1].iov_base, tmp_page, PAGE_SIZE);
+ free_page((unsigned long) tmp_page);
+ }
+
/* clear setuid/setgid flag after write */
if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
kill_suid(dentry);
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
NFS maillist - NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic