[prev in list] [next in list] [prev in thread] [next in thread] 

List:       linux-nfs
Subject:    [NFS] PATCH: NFS over TCP support for 2.2.16
From:       saadia () khan ! engr ! sgi ! com (Saadia Khan)
Date:       2000-06-19 23:18:25
[Download RAW message or body]

--%--multipart-mixed-boundary-1.345193.961456705--%
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Hi Alan,

The following patch is against 2.2.16 with linux-2.2.16-nfsv3-0.21.3.dif
and dhiggen-over-0.21.3 patches applied. It passes the connectathon
tests and fixes the following bugs in the nfs tcp code:

1. svc_tcp_accept rejects connections from ports > 1024. Instead of
   rejecting connections, a warning is printed out.
2. svc_tcp_data_ready adds 1 to sk_data implying that one message has
   arrived at the socket, therefore svc_tcp_recvfrom reads only one
   record at a time, ignoring any other messages that might be waiting
   in the recv queue.
3. If there is an incomplete record at the socket, instead of waiting
   for the rest of the data to arrive, svc_tcp_recvfrom sets the
   received length to EAGAIN and assumes that the complete record
   has been read. The fix is to return EAGAIN to the calling routine 
   so that svc_recv waits for the rest of the data to arrive.
4. The server matches an incoming request with a cached one in the reply
   cache by checking the xid and client IP address only, it should
   also check the protocol and client port nos.

There is one other issue that i have looked at, that is if the server
is sending back a reply and the send buffer fills up before the complete
reply goes through, the output stream will not make sense to the client.
However i haven't seen this happen but that doesn't mean it shouldn't
be fixed. I'm looking into it and will put out the fix soon.

thanks,
Saadia



--%--multipart-mixed-boundary-1.345193.961456705--%
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Description: ascii text
Content-Disposition: attachment; filename="nfs_tcp.dif"

--- linux-2.2.16-patched/include/linux/nfsd/export.h	Mon Jun 19 13:21:12 2000
+++ linux-2.2.16-tcp/include/linux/nfsd/export.h	Mon Jun 19 12:16:54 2000
@@ -95,13 +95,14 @@
 
 
 extern __inline__ int
-exp_checkaddr(struct svc_client *clp, struct in_addr addr)
+exp_checkaddr(struct svc_client *clp, struct sockaddr_in addr)
 {
-	struct in_addr	*ap = clp->cl_addr;
+	struct in_addr	*ap = clp->cl_addr,
+			ad  = addr.sin_addr;
 	int		i;
 
 	for (i = clp->cl_naddr; i--; ap++)
-		if (ap->s_addr == addr.s_addr)
+		if (ap->s_addr == ad.s_addr)
 			return 1;
 	return 0;
 }
--- linux-2.2.16-patched/include/linux/nfsd/cache.h	Mon Dec 28 14:09:59 1998
+++ linux-2.2.16-tcp/include/linux/nfsd/cache.h	Mon Jun 19 14:11:55 2000
@@ -25,9 +25,12 @@
 	unsigned char		c_state,	/* unused, inprog, done */
 				c_type,		/* status, buffer */
 				c_secure : 1;	/* req came from port < 1024 */
-	struct in_addr		c_client;
-	u32			c_xid;
+	struct sockaddr_in	c_client;	/* changing from in_addr to
+						 * sockaddr_in to check for 
+						 * port no. */
+        u32                     c_xid;
 	u32			c_proc;
+	u32 			c_prot;
 	unsigned long		c_timestamp;
 	union {
 		struct svc_buf	u_buffer;
--- linux-2.2.16-patched/net/sunrpc/svcsock.c	Tue Jan  4 10:12:27 2000
+++ linux-2.2.16-tcp/net/sunrpc/svcsock.c	Mon Jun 19 14:12:59 2000
@@ -548,15 +548,18 @@
 
 	/* Ideally, we would want to reject connections from unauthorized
 	 * hosts here, but we have no generic client tables. For now,
-	 * we just punt connects from unprivileged ports. */
-	if (ntohs(sin.sin_port) >= 1024) {
-		if (net_ratelimit())
-			printk(KERN_WARNING
-				   "%s: connect from unprivileged port: %s:%d",
-				   serv->sv_name, 
-				   in_ntoa(sin.sin_addr.s_addr), ntohs(sin.sin_port));
-		goto failed;
-	}
+	 * we just punt connects from unprivileged ports. 
+         * hosts here, but when we get encription, the IP of the host won't
+         * tell us anything. For now just warn about unpriv connections.
+         */
+        if (ntohs(sin.sin_port) >= 1024) {
+                if (net_ratelimit())
+                        printk(KERN_WARNING
+                                   "%s: connect from unprivileged port: %u.%u.%u.%u:%d\n",
+                                   serv->sv_name, 
+                                   NIPQUAD(sin.sin_addr.s_addr), 
+			           ntohs(sin.sin_port));
+        }
 
 	dprintk("%s: connect from %s:%04x\n", serv->sv_name,
 			in_ntoa(sin.sin_addr.s_addr), ntohs(sin.sin_port));
@@ -590,7 +593,7 @@
 	struct svc_sock	*svsk = rqstp->rq_sock;
 	struct svc_serv	*serv = svsk->sk_server;
 	struct svc_buf	*bufp = &rqstp->rq_argbuf;
-	int		len, ready;
+	int		len, ready, used;
 
 	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
 			svsk, svsk->sk_data, svsk->sk_conn, svsk->sk_close);
@@ -624,10 +627,11 @@
 
 		svsk->sk_reclen = ntohl(svsk->sk_reclen);
 		if (!(svsk->sk_reclen & 0x80000000)) {
-			/* FIXME: shutdown socket */
-			printk(KERN_NOTICE "RPC: bad TCP reclen %08lx",
-			       (unsigned long) svsk->sk_reclen);
-			return -EIO;
+			if (net_ratelimit())
+				printk(KERN_NOTICE "RPC: bad TCP reclen %08lx",
+			       		(unsigned long) svsk->sk_reclen);
+			svc_delete_socket(svsk);
+			return 0;
 		}
 		svsk->sk_reclen &= 0x7fffffff;
 		dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
@@ -642,8 +646,19 @@
 		dprintk("svc: incomplete TCP record (%d of %d)\n",
 			len, svsk->sk_reclen);
 		svc_sock_received(svsk, ready);
-		len = -EAGAIN;	/* record not complete */
+		return -EAGAIN;	/* record not complete */
+	}
+
+        /* if we think there is only one more record to read, but
+         * it is bigger than we expect, then two records must have arrived
+         * together, so pretend we aren't using the record.. */
+        if (len > svsk->sk_reclen && ready == 1){
+                used = 0;
+		dprintk("svc_recv: more data at hte socket len %d > svsk->sk_reclen %d",
+                        len, svsk->sk_reclen);
 	}
+        else    used = 1;
+ 
 
 	/* Frob argbuf */
 	bufp->iov[0].iov_base += 4;
@@ -670,7 +685,7 @@
 	svsk->sk_reclen = 0;
 	svsk->sk_tcplen = 0;
 
-	svc_sock_received(svsk, 1);
+	svc_sock_received(svsk, used);
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpcnt++;
 
--- linux-2.2.16-patched/fs/nfsd/nfssvc.c	Mon Jun 19 13:21:12 2000
+++ linux-2.2.16-tcp/fs/nfsd/nfssvc.c	Mon Jun 19 14:13:57 2000
@@ -76,11 +76,9 @@
 	if (error < 0)
 		goto failure;
 
-#if 0	/* Don't even pretend that TCP works. It doesn't. */
 	error = svc_makesock(serv, IPPROTO_TCP, port);
 	if (error < 0)
 		goto failure;
-#endif
 
 	nfsd_racache_init();	/* Readahead param cache */
 
@@ -142,8 +140,10 @@
 		while ((err = svc_recv(serv, rqstp,
 				       MAX_SCHEDULE_TIMEOUT)) == -EAGAIN)
 		    ;
-		if (err < 0)
-			break;
+
+                if (err < 0)
+                        break;
+
 
 		/* Lock the export hash tables for reading. */
 		exp_readlock();
--- linux-2.2.16-patched/fs/nfsd/nfscache.c	Sun Jan 24 21:54:35 1999
+++ linux-2.2.16-tcp/fs/nfsd/nfscache.c	Mon Jun 19 14:14:34 2000
@@ -145,7 +145,9 @@
 	struct svc_cacherep	*rh, *rp;
 	struct svc_client	*clp = rqstp->rq_client;
 	u32			xid = rqstp->rq_xid,
-				proc = rqstp->rq_proc;
+				proc = rqstp->rq_proc,
+				proto =  rqstp->rq_prot,
+				port = rqstp->rq_addr.sin_port;
 	unsigned long		age;
 
 	rqstp->rq_cacherep = NULL;
@@ -158,6 +160,7 @@
 	while ((rp = rp->c_hash_next) != rh) {
 		if (rp->c_state != RC_UNUSED &&
 		    xid == rp->c_xid && proc == rp->c_proc &&
+		    proto == rp->c_prot && port == rp->c_client.sin_port &&		
 		    exp_checkaddr(clp, rp->c_client)) {
 			nfsdstats.rchits++;
 			goto found_entry;
@@ -195,7 +198,8 @@
 	rp->c_state = RC_INPROG;
 	rp->c_xid = xid;
 	rp->c_proc = proc;
-	rp->c_client = rqstp->rq_addr.sin_addr;
+	rp->c_client = rqstp->rq_addr;
+	rp->c_prot = rqstp->rq_prot;
 	hash_refile(rp);
 
 	/* release any buffer */
--- linux-2.2.16-patched/net/sunrpc/xprt.c	Mon Jun 19 13:21:04 2000
+++ linux-2.2.16-tcp/net/sunrpc/xprt.c	Mon Jun 19 11:56:38 2000
@@ -1100,8 +1100,8 @@
 	struct rpc_rqst	*req = task->tk_rqstp;
 	struct rpc_xprt	*xprt = req->rq_xprt;
 
-	dprintk("RPC: %4d xprt_transmit(%x)\n", task->tk_pid, 
-				*(u32 *)(req->rq_svec[0].iov_base));
+	dprintk("RPC: %4d xprt_transmit(%x)\n", task->tk_pid, req->rq_xid);
+				
 
 	if (xprt->shutdown)
 		task->tk_status = -EIO;

--%--multipart-mixed-boundary-1.345193.961456705--%--

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic