[prev in list] [next in list] [prev in thread] [next in thread] 

List:       linux-nfs
Subject:    Re: [PATCH 3/4] NFSD: Add WRITE_PLUS support for hole punches
From:       "J. Bruce Fields" <bfields () fieldses ! org>
Date:       2013-10-29 13:34:12
Message-ID: 20131029133412.GE29606 () fieldses ! org
[Download RAW message or body]

On Tue, Oct 29, 2013 at 08:49:06AM -0400, Anna Schumaker wrote:
> On Mon 28 Oct 2013 05:40:30 PM EDT, J. Bruce Fields wrote:
> > On Mon, Oct 28, 2013 at 10:57:25AM -0400, Anna Schumaker wrote:
> > > This patch only implements DATA_CONTENT_HOLE support, but I tried to
> > > write it so other arms of the discriminated union can be added easily
> > > later.
> > 
> > And there's an NFS4ERR_UNION_NOTSUPP to handle that case, OK.  (Though
> > the spec language is a little ambiguous:
> > 
> > 	If the client asks for a hole and the server does not support
> > 	that arm of the discriminated union, but does support one or
> > 	more additional arms, it can signal to the client that it
> > 	supports the operation, but not the arm with
> > 	NFS4ERR_UNION_NOTSUPP.
> > 
> > So it's unclear whether we can return this in the case the client is
> > *not* asking for a hole.  Are we sure the NFS4_CONTENT_DATA case is
> > optional?  The language in 5.3 ("Instead a client should utlize
> > READ_PLUS and WRITE_PLUS") also suggests the CONTENT_DATA case might be
> > meant to be mandatory.)
> 
> Fair enough.  I wasn't planning on implementing the NFS4_CONTENT_DATA 
> arm on the client right now, but I might be able to hack something 
> together to test this on the server!

OK.  Either way, could you also submit a patch to the spec to make it
explicit whether CONTENT_DATA is mandatory or not?

--b.

> 
> > 
> > > This patch is missing support for decoding multiple requests on the same
> > > file.  The way it's written now only the last range provided by the
> > > client will be written to.
> > 
> > But that doesn't seem to be a limitation anticipated by the spec, so I
> > guess we need to fix that.
> > 
> > (Why is there an array, anyway?  Wouldn't it work just as well to send
> > multiple RPC's, or a single RPC with multiple WRITE_PLUS ops?)
> 
> Yeah, I'll fix that up.  The Linux client will send requests one at a 
> time, so that's what I tested against.
> 
> > 
> > > 
> > > Signed-off-by: Anna Schumaker <bjschuma@netapp.com>
> > > ---
> > > fs/nfsd/nfs4proc.c   | 45 ++++++++++++++++++++++++++++++++++++++
> > > fs/nfsd/nfs4xdr.c    | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++--
> > > fs/nfsd/vfs.c        | 14 ++++++++++++
> > > fs/nfsd/vfs.h        |  1 +
> > > fs/nfsd/xdr4.h       | 24 ++++++++++++++++++++
> > > fs/open.c            |  1 +
> > > include/linux/nfs4.h |  8 ++++++-
> > > 7 files changed, 152 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > > index 419572f..3210c6f 100644
> > > --- a/fs/nfsd/nfs4proc.c
> > > +++ b/fs/nfsd/nfs4proc.c
> > > @@ -1028,6 +1028,42 @@ nfsd4_write(struct svc_rqst *rqstp, struct \
> > > nfsd4_compound_state *cstate,  return status;
> > > }
> > > 
> > > +static __be32
> > > +nfsd4_write_plus_hole(struct file *file, struct nfsd4_write_plus *writeplus,
> > > +		      struct net *net)
> > > +{
> > > +	__be32 status;
> > > +
> > > +	status = nfsd4_vfs_fallocate(file, writeplus->wp_allocated,
> > > +				writeplus->wp_offset, writeplus->wp_length);
> > > +	if (status == nfs_ok) {
> > > +		writeplus->wp_res.wr_stid = NULL;
> > > +		writeplus->wp_res.wr_bytes_written = writeplus->wp_length;
> > > +		writeplus->wp_res.wr_stable_how = NFS_FILE_SYNC;
> > 
> > Do we need to sync?
> > 
> > > +		gen_boot_verifier(&writeplus->wp_res.wr_verifier, net);
> > > +	}
> > > +
> > > +	return status;
> > 
> > Nit, but I like the usual idiom better in most cases:
> > 
> > 	if (status)
> > 		return status;
> > 	normal case here...
> > 	return 0;
> > 
> > --b.
> > 
> > > +}
> > > +
> > > +static __be32
> > > +nfsd4_write_plus(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > > +		struct nfsd4_write_plus *writeplus)
> > > +{
> > > +	struct file *file;
> > > +	__be32 status;
> > > +	struct net *net = SVC_NET(rqstp);
> > > +
> > > +	status = nfs4_preprocess_stateid_op(net, cstate, &writeplus->wp_stateid,
> > > +					    WR_STATE, &file);
> > > +	if (status)
> > > +		return status;
> > > +
> > > +	if (writeplus->wp_data_content == NFS4_CONTENT_HOLE)
> > > +		return nfsd4_write_plus_hole(file, writeplus, net);
> > > +	return nfserr_union_notsupp;
> > > +}
> > > +
> > > /* This routine never returns NFS_OK!  If there are no other errors, it
> > > * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the
> > > * attributes matched.  VERIFY is implemented by mapping NFSERR_SAME
> > > @@ -1840,6 +1876,15 @@ static struct nfsd4_operation nfsd4_ops[] = {
> > > 		.op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
> > > 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
> > > 	},
> > > +
> > > +	/* NFSv4.2 operations */
> > > +	[OP_WRITE_PLUS] = {
> > > +		.op_func = (nfsd4op_func)nfsd4_write_plus,
> > > +		.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
> > > +		.op_name = "OP_WRITE_PLUS",
> > > +		.op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
> > > +		.op_get_currentstateid = (stateid_getter)nfsd4_get_writestateid,
> > > +	},
> > > };
> > > 
> > > #ifdef NFSD_DEBUG
> > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > > index 60f5a1f..1e4e9af 100644
> > > --- a/fs/nfsd/nfs4xdr.c
> > > +++ b/fs/nfsd/nfs4xdr.c
> > > @@ -1485,6 +1485,32 @@ static __be32 nfsd4_decode_reclaim_complete(struct \
> > > nfsd4_compoundargs *argp, str }
> > > 
> > > static __be32
> > > +nfsd4_decode_write_plus(struct nfsd4_compoundargs *argp,
> > > +			struct nfsd4_write_plus *writeplus)
> > > +{
> > > +	DECODE_HEAD;
> > > +	unsigned int i;
> > > +
> > > +	status = nfsd4_decode_stateid(argp, &writeplus->wp_stateid);
> > > +	if (status)
> > > +		return status;
> > > +
> > > +	READ_BUF(8);
> > > +	READ32(writeplus->wp_stable_how);
> > > +	READ32(writeplus->wp_data_length);
> > > +
> > > +	for (i = 0; i < writeplus->wp_data_length; i++) {
> > > +		READ_BUF(24);
> > > +		READ32(writeplus->wp_data_content);
> > > +		READ64(writeplus->wp_offset);
> > > +		READ64(writeplus->wp_length);
> > > +		READ32(writeplus->wp_allocated);
> > > +	}
> > > +
> > > +	DECODE_TAIL;
> > > +}
> > > +
> > > +static __be32
> > > nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
> > > {
> > > 	return nfs_ok;
> > > @@ -1665,7 +1691,7 @@ static nfsd4_dec nfsd42_dec_ops[] = {
> > > 	[OP_COPY_NOTIFY]	= (nfsd4_dec)nfsd4_decode_notsupp,
> > > 	[OP_OFFLOAD_REVOKE]	= (nfsd4_dec)nfsd4_decode_notsupp,
> > > 	[OP_OFFLOAD_STATUS]	= (nfsd4_dec)nfsd4_decode_notsupp,
> > > -	[OP_WRITE_PLUS]		= (nfsd4_dec)nfsd4_decode_notsupp,
> > > +	[OP_WRITE_PLUS]		= (nfsd4_dec)nfsd4_decode_write_plus,
> > > 	[OP_READ_PLUS]		= (nfsd4_dec)nfsd4_decode_notsupp,
> > > 	[OP_SEEK]		= (nfsd4_dec)nfsd4_decode_notsupp,
> > > 	[OP_IO_ADVISE]		= (nfsd4_dec)nfsd4_decode_notsupp,
> > > @@ -3591,6 +3617,38 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres \
> > > *resp, __be32 nfserr,  return nfserr;
> > > }
> > > 
> > > +static void
> > > +nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct \
> > > nfsd42_write_res *write) +{
> > > +	__be32 *p;
> > > +
> > > +	RESERVE_SPACE(4);
> > > +
> > > +	if (write->wr_stid == NULL) {
> > > +		WRITE32(0);
> > > +		ADJUST_ARGS();
> > > +	} else {
> > > +		WRITE32(1);
> > > +		ADJUST_ARGS();
> > > +		nfsd4_encode_stateid(resp, &write->wr_stid->sc_stateid);
> > > +	}
> > > +
> > > +	RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE);
> > > +	WRITE64(write->wr_bytes_written);
> > > +	WRITE32(write->wr_stable_how);
> > > +	WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE);
> > > +	ADJUST_ARGS();
> > > +}
> > > +
> > > +static __be32
> > > +nfsd4_encode_write_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> > > +		        struct nfsd4_write_plus *writeplus)
> > > +{
> > > +	if (!nfserr)
> > > +		nfsd42_encode_write_res(resp, &writeplus->wp_res);
> > > +	return nfserr;
> > > +}
> > > +
> > > static __be32
> > > nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
> > > {
> > > @@ -3670,7 +3728,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
> > > 	[OP_COPY_NOTIFY]	= (nfsd4_enc)nfsd4_encode_noop,
> > > 	[OP_OFFLOAD_REVOKE]	= (nfsd4_enc)nfsd4_encode_noop,
> > > 	[OP_OFFLOAD_STATUS]	= (nfsd4_enc)nfsd4_encode_noop,
> > > -	[OP_WRITE_PLUS]		= (nfsd4_enc)nfsd4_encode_noop,
> > > +	[OP_WRITE_PLUS]		= (nfsd4_enc)nfsd4_encode_write_plus,
> > > 	[OP_READ_PLUS]		= (nfsd4_enc)nfsd4_encode_noop,
> > > 	[OP_SEEK]		= (nfsd4_enc)nfsd4_encode_noop,
> > > 	[OP_IO_ADVISE]		= (nfsd4_enc)nfsd4_encode_noop,
> > > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> > > index c827acb..7fec087 100644
> > > --- a/fs/nfsd/vfs.c
> > > +++ b/fs/nfsd/vfs.c
> > > @@ -16,6 +16,7 @@
> > > #include <linux/fs.h>
> > > #include <linux/file.h>
> > > #include <linux/splice.h>
> > > +#include <linux/falloc.h>
> > > #include <linux/fcntl.h>
> > > #include <linux/namei.h>
> > > #include <linux/delay.h>
> > > @@ -649,6 +650,19 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct \
> > > svc_fh *fhp, }
> > > #endif
> > > 
> > > +__be32 nfsd4_vfs_fallocate(struct file *file, bool allocated, loff_t offset, \
> > > loff_t len) +{
> > > +	int error, mode = 0;
> > > +
> > > +	if (allocated == false)
> > > +		mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
> > > +
> > > +	error = do_fallocate(file, mode, offset, len);
> > > +	if (error == 0)
> > > +		error = vfs_fsync_range(file, offset, offset + len, 0);
> > > +	return nfserrno(error);
> > > +}
> > > +
> > > #endif /* defined(CONFIG_NFSD_V4) */
> > > 
> > > #ifdef CONFIG_NFSD_V3
> > > diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
> > > index a4be2e3..187eb95 100644
> > > --- a/fs/nfsd/vfs.h
> > > +++ b/fs/nfsd/vfs.h
> > > @@ -57,6 +57,7 @@ __be32          nfsd4_set_nfs4_acl(struct svc_rqst *, struct \
> > > svc_fh *, int             nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry \
> > > *, struct nfs4_acl **); __be32          nfsd4_set_nfs4_label(struct svc_rqst *, \
> > > struct svc_fh *,  struct xdr_netobj *);
> > > +__be32		nfsd4_vfs_fallocate(struct file *, bool, loff_t, loff_t);
> > > #endif /* CONFIG_NFSD_V4 */
> > > __be32		nfsd_create(struct svc_rqst *, struct svc_fh *,
> > > 				char *name, int len, struct iattr *attrs,
> > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > > index b3ed644..aaef9ab 100644
> > > --- a/fs/nfsd/xdr4.h
> > > +++ b/fs/nfsd/xdr4.h
> > > @@ -430,6 +430,27 @@ struct nfsd4_reclaim_complete {
> > > 	u32 rca_one_fs;
> > > };
> > > 
> > > +struct nfsd42_write_res {
> > > +	struct nfs4_stid	*wr_stid;
> > > +	u64			wr_bytes_written;
> > > +	u32			wr_stable_how;
> > > +	nfs4_verifier		wr_verifier;
> > > +};
> > > +
> > > +struct nfsd4_write_plus {
> > > +	/* request */
> > > +	stateid_t	wp_stateid;
> > > +	u32		wp_stable_how;
> > > +	u32		wp_data_length;
> > > +	u32		wp_data_content;
> > > +	u64		wp_offset;
> > > +	u64		wp_length;
> > > +	u32		wp_allocated;
> > > +
> > > +	/* response */
> > > +	struct nfsd42_write_res	wp_res;
> > > +};
> > > +
> > > struct nfsd4_op {
> > > 	int					opnum;
> > > 	__be32					status;
> > > @@ -475,6 +496,9 @@ struct nfsd4_op {
> > > 		struct nfsd4_reclaim_complete	reclaim_complete;
> > > 		struct nfsd4_test_stateid	test_stateid;
> > > 		struct nfsd4_free_stateid	free_stateid;
> > > +
> > > +		/* NFSv4.2 */
> > > +		struct nfsd4_write_plus		write_plus;
> > > 	} u;
> > > 	struct nfs4_replay *			replay;
> > > };
> > > diff --git a/fs/open.c b/fs/open.c
> > > index d420331..09db2d5 100644
> > > --- a/fs/open.c
> > > +++ b/fs/open.c
> > > @@ -278,6 +278,7 @@ int do_fallocate(struct file *file, int mode, loff_t \
> > > offset, loff_t len)  sb_end_write(inode->i_sb);
> > > 	return ret;
> > > }
> > > +EXPORT_SYMBOL_GPL(do_fallocate);
> > > 
> > > SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
> > > {
> > > diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
> > > index 2bc5217..55ed991 100644
> > > --- a/include/linux/nfs4.h
> > > +++ b/include/linux/nfs4.h
> > > @@ -128,7 +128,7 @@ enum nfs_opnum4 {
> > > Needs to be updated if more operations are defined in future.*/
> > > 
> > > #define FIRST_NFS4_OP	OP_ACCESS
> > > -#define LAST_NFS4_OP 	OP_RECLAIM_COMPLETE
> > > +#define LAST_NFS4_OP 	OP_WRITE_PLUS
> > > 
> > > enum nfsstat4 {
> > > 	NFS4_OK = 0,
> > > @@ -552,4 +552,10 @@ struct nfs4_deviceid {
> > > 	char data[NFS4_DEVICEID4_SIZE];
> > > };
> > > 
> > > +enum data_content4 {
> > > +	NFS4_CONTENT_DATA		= 0,
> > > +	NFS4_CONTENT_APP_DATA_HOLE	= 1,
> > > +	NFS4_CONTENT_HOLE		= 2,
> > > +};
> > > +
> > > #endif
> > > --
> > > 1.8.4.1
> > > 
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic