[prev in list] [next in list] [prev in thread] [next in thread] 

List:       lustre-cvs
Subject:    [Lustre-cvs] CVS: obd/rpc connmgr.c,NONE,1.1.2.1 Makefile.am,1.5,1.5.2.1 client.c,1.27.2.1,1.27.2.2
From:       Phil Schwan <pschwan () users ! sourceforge ! net>
Date:       2002-04-29 21:49:06
[Download RAW message or body]

Update of /cvsroot/lustre/obd/rpc
In directory usw-pr-cvs1:/tmp/cvs-serv28223/rpc

Modified Files:
      Tag: ldlm_testing
	Makefile.am client.c connection.c niobuf.c pack_generic.c 
	recovd.c rpc.c service.c 
Added Files:
      Tag: ldlm_testing
	connmgr.c 
Log Message:
Moving the last few days' changes to the ldlm_testing branch.


--- NEW FILE ---
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
 * vim:expandtab:shiftwidth=8:tabstop=8:
 *
 *  obd/rpc/recovd.c
 *
 *  Lustre High Availability Daemon
 *
 *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
 *
 *  This code is issued under the GNU General Public License.
 *  See the file COPYING in this distribution
 *
 *  by Peter Braam <braam@clusterfs.com>
 *
 */

#define EXPORT_SYMTAB
#define DEBUG_SUBSYSTEM S_RPC

#include <linux/kmod.h>
#include <linux/lustre_lite.h>
#include <linux/lustre_ha.h>

static int connmgr_unpack_body(struct ptlrpc_request *req)
{
        struct connmgr_body *b = lustre_msg_buf(req->rq_repmsg, 0);
        if (b == NULL) {
                LBUG();
                RETURN(-EINVAL);
        }

        b->generation = NTOH__u32(b->generation);

        return 0;
}

int connmgr_connect(struct recovd_obd *recovd, struct ptlrpc_connection *conn)
{
        struct ptlrpc_request *req;
        struct ptlrpc_client *cl;
        struct connmgr_body *body;
        int rc, size = sizeof(*body);
        ENTRY;

        if (!recovd) {
                CERROR("no manager\n");
                LBUG();
        }
        cl = recovd->recovd_client;

        req = ptlrpc_prep_req(cl, conn, CONNMGR_CONNECT, 1, &size, NULL);
        if (!req)
                GOTO(out, rc = -ENOMEM);

        body = lustre_msg_buf(req->rq_reqmsg, 0);
        body->generation = HTON__u32(conn->c_generation);
        body->conn = (__u64)(unsigned long)conn;
        body->conn_token = conn->c_token;
        strncpy(body->conn_uuid, conn->c_local_uuid, sizeof(body->conn_uuid));

        req->rq_replen = lustre_msg_size(1, &size);

        rc = ptlrpc_queue_wait(req);
        rc = ptlrpc_check_status(req, rc);
        if (!rc) {
                rc = connmgr_unpack_body(req);
                if (rc)
                        GOTO(out_free, rc);
                body = lustre_msg_buf(req->rq_repmsg, 0);
                CDEBUG(D_NET, "remote generation: %o\n", body->generation);
                conn->c_level = LUSTRE_CONN_CON;
                conn->c_remote_conn = body->conn;
                conn->c_remote_token = body->conn_token;
                strncpy(conn->c_remote_uuid, body->conn_uuid,
                        sizeof(conn->c_remote_uuid));
        }

        EXIT;
 out_free:
        ptlrpc_free_req(req);
 out:
        return rc;
}

static int connmgr_handle_connect(struct ptlrpc_request *req)
{
        struct connmgr_body *body;
        int rc, size = sizeof(*body);
        ENTRY;

        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
        if (rc) {
                CERROR("connmgr: out of memory\n");
                req->rq_status = -ENOMEM;
                RETURN(0);
        }

        body = lustre_msg_buf(req->rq_reqmsg, 0);
        connmgr_unpack_body(req);

        req->rq_connection->c_remote_conn = body->conn;
        req->rq_connection->c_remote_token = body->conn_token;
        strncpy(req->rq_connection->c_remote_uuid, body->conn_uuid,
                sizeof(req->rq_connection->c_remote_uuid));

        CERROR("incoming generation %d\n", body->generation);
        body = lustre_msg_buf(req->rq_repmsg, 0);
        body->generation = 4711;
        body->conn = (__u64)(unsigned long)req->rq_connection;
        body->conn_token = req->rq_connection->c_token;

        req->rq_connection->c_level = LUSTRE_CONN_CON;
        RETURN(0);
}

int connmgr_handle(struct obd_device *dev, struct ptlrpc_service *svc,
                   struct ptlrpc_request *req)
{
        int rc;
        ENTRY;

        rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
        if (rc) {
                CERROR("Invalid request\n");
                GOTO(out, rc);
        }

        if (req->rq_reqmsg->type != NTOH__u32(PTL_RPC_MSG_REQUEST)) {
                CERROR("wrong packet type sent %d\n",
                       req->rq_reqmsg->type);
                GOTO(out, rc = -EINVAL);
        }

        switch (req->rq_reqmsg->opc) {
        case CONNMGR_CONNECT:
                CDEBUG(D_INODE, "connmgr connect\n");
                rc = connmgr_handle_connect(req);
                break;

        default:
                rc = ptlrpc_error(svc, req);
                RETURN(rc);
        }

        EXIT;
out:
        if (rc) {
                ptlrpc_error(svc, req);
        } else {
                CDEBUG(D_NET, "sending reply\n");
                ptlrpc_reply(svc, req);
        }

        return 0;
}

Index: Makefile.am
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/Makefile.am,v
retrieving revision 1.5
retrieving revision 1.5.2.1
diff -u -r1.5 -r1.5.2.1
--- Makefile.am	15 Apr 2002 08:13:58 -0000	1.5
+++ Makefile.am	29 Apr 2002 21:49:04 -0000	1.5.2.1
@@ -9,6 +9,6 @@
 modulefs_DATA = ptlrpc.o
 EXTRA_PROGRAMS = ptlrpc
 
-ptlrpc_SOURCES = recovd.c connection.c rpc.c events.c service.c client.c niobuf.c \
pack_generic.c +ptlrpc_SOURCES = connmgr.c recovd.c connection.c rpc.c events.c \
service.c client.c niobuf.c pack_generic.c  
 include $(top_srcdir)/Rules

Index: client.c
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/client.c,v
retrieving revision 1.27.2.1
retrieving revision 1.27.2.2
diff -u -r1.27.2.1 -r1.27.2.2
--- client.c	22 Apr 2002 15:13:09 -0000	1.27.2.1
+++ client.c	29 Apr 2002 21:49:04 -0000	1.27.2.2
@@ -26,22 +26,32 @@
 
 #include <linux/lustre_ha.h>
 
-void ptlrpc_init_client(struct recovd_obd *recovd, int req_portal,
+void ptlrpc_init_client(struct recovd_obd *recovd, 
+                        void (*recover)(struct ptlrpc_client *recover),
+                        int req_portal,
                         int rep_portal, struct ptlrpc_client *cl)
 {
         memset(cl, 0, sizeof(*cl));
         cl->cli_recovd = recovd;
+        cl->cli_recover = recover;
         if (recovd)
-                connmgr_cli_manage(recovd, cl);
+                recovd_cli_manage(recovd, cl);
         cl->cli_obd = NULL;
         cl->cli_request_portal = req_portal;
         cl->cli_reply_portal = rep_portal;
         INIT_LIST_HEAD(&cl->cli_sending_head);
         INIT_LIST_HEAD(&cl->cli_sent_head);
+        INIT_LIST_HEAD(&cl->cli_replied_head);
+        INIT_LIST_HEAD(&cl->cli_replay_head);
         spin_lock_init(&cl->cli_lock);
         sema_init(&cl->cli_rpc_sem, 32);
 }
 
+__u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
+{
+        return req->rq_connection->c_remote_uuid;
+}
+
 struct ptlrpc_connection *ptlrpc_uuid_to_connection(char *uuid)
 {
         struct ptlrpc_connection *c;
@@ -76,12 +86,16 @@
 
 void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk)
 {
-        if (bulk == NULL)
+        ENTRY;
+        if (bulk == NULL) {
+                EXIT;
                 return;
+        }
 
         ptlrpc_put_connection(bulk->b_connection);
 
         OBD_FREE(bulk, sizeof(*bulk));
+        EXIT;
 }
 
 struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl,
@@ -106,7 +120,6 @@
                 RETURN(NULL);
         }
 
-        request->rq_time = CURRENT_TIME;
         request->rq_type = PTL_RPC_TYPE_REQUEST;
         request->rq_connection = ptlrpc_connection_addref(conn);
 
@@ -116,6 +129,9 @@
         request->rq_reqmsg->type = HTON__u32(PTL_RPC_MSG_REQUEST);
         INIT_LIST_HEAD(&request->rq_list);
 
+        /* this will be dec()d once in req_finished, once in free_committed */
+        atomic_set(&request->rq_refcount, 2);
+
         spin_lock(&conn->c_lock);
         request->rq_reqmsg->xid = HTON__u32(++conn->c_xid_out);
         spin_unlock(&conn->c_lock);
@@ -125,6 +141,20 @@
         RETURN(request);
 }
 
+void ptlrpc_req_finished(struct ptlrpc_request *request)
+{
+        if (request == NULL)
+                return;
+
+        if (request->rq_repmsg != NULL) { 
+                OBD_FREE(request->rq_repmsg, request->rq_replen);
+                request->rq_repmsg = NULL;
+        }
+
+        if (atomic_dec_and_test(&request->rq_refcount))
+                ptlrpc_free_req(request);
+}
+
 void ptlrpc_free_req(struct ptlrpc_request *request)
 {
         if (request == NULL)
@@ -132,6 +162,8 @@
 
         if (request->rq_repmsg != NULL)
                 OBD_FREE(request->rq_repmsg, request->rq_replen);
+        if (request->rq_reqmsg != NULL)
+                OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
 
         if (request->rq_client) {
                 spin_lock(&request->rq_client->cli_lock);
@@ -148,18 +180,30 @@
 {
         int rc = 0;
 
-        schedule_timeout(3 * HZ);  /* 3 second timeout */
         if (req->rq_repmsg != NULL) {
+                req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
                 req->rq_flags |= PTL_RPC_FL_REPLY;
                 GOTO(out, rc = 1);
         }
 
-        if (CURRENT_TIME - req->rq_time >= 3) {
+        if (req->rq_flags & PTL_RPC_FL_RESEND) { 
+                CERROR("-- RESEND --\n");
+                req->rq_status = -EAGAIN;
+                GOTO(out, rc = 1);
+        }
+
+        if (CURRENT_TIME - req->rq_time >= req->rq_timeout) {
                 CERROR("-- REQ TIMEOUT --\n");
+                /* clear the timeout */
+                req->rq_timeout = 0;
                 req->rq_flags |= PTL_RPC_FL_TIMEOUT;
                 if (req->rq_client && req->rq_client->cli_recovd)
-                        connmgr_cli_fail(req->rq_client);
-                return 0;
+                        recovd_cli_fail(req->rq_client);
+                GOTO(out, rc = 0);
+        }
+
+        if (req->rq_timeout) { 
+                schedule_timeout(req->rq_timeout * HZ);
         }
 
         if (sigismember(&(current->pending.signal), SIGKILL) ||
@@ -226,13 +270,86 @@
         return 0;
 }
 
+/* caller must lock cli */
+void ptlrpc_free_committed(struct ptlrpc_client *cli)
+{
+        struct list_head *tmp, *saved;
+        struct ptlrpc_request *req;
+
+        list_for_each_safe(tmp, saved, &cli->cli_replied_head) {
+                req = list_entry(tmp, struct ptlrpc_request, rq_list);
+
+                /* not yet committed */ 
+                if (req->rq_transno > cli->cli_last_committed)
+                        break; 
+
+                /* retain for replay if flagged */
+                if (req->rq_flags & PTL_RPC_FL_RETAIN) {
+                        list_del(&req->rq_list); 
+                        list_add(&req->rq_list, &cli->cli_replay_head);
+                } else {
+                        CDEBUG(D_INFO, "Marking request %p as committed ("
+                               "transno=%Lu, last_committed=%Lu\n", req,
+                               req->rq_transno, cli->cli_last_committed);
+                        if (atomic_dec_and_test(&req->rq_refcount))
+                                ptlrpc_free_req(req);
+                }
+        }
+
+        EXIT;
+        return;
+}
+
+void ptlrpc_cleanup_client(struct ptlrpc_client *cli)
+{
+        struct list_head *tmp, *saved;
+        struct ptlrpc_request *req;
+        ENTRY;
+
+        spin_lock(&cli->cli_lock);
+        list_for_each_safe(tmp, saved, &cli->cli_replied_head) {
+                req = list_entry(tmp, struct ptlrpc_request, rq_list);
+                /* We do this to prevent ptlrpc_free_req from taking cli_lock */
+                CDEBUG(D_INFO, "Cleaning req %p from replied head.\n", req);
+                list_del(&req->rq_list);
+                req->rq_client = NULL;
+                ptlrpc_free_req(req); 
+        }
+        list_for_each_safe(tmp, saved, &cli->cli_sent_head) {
+                req = list_entry(tmp, struct ptlrpc_request, rq_list);
+                CDEBUG(D_INFO, "Cleaning req %p from sent head.\n", req);
+                list_del(&req->rq_list);
+                req->rq_client = NULL;
+                ptlrpc_free_req(req); 
+        }
+        list_for_each_safe(tmp, saved, &cli->cli_replay_head) {
+                req = list_entry(tmp, struct ptlrpc_request, rq_list);
+                CERROR("Request %p is on the replay head at cleanup!\n", req);
+                list_del(&req->rq_list);
+                req->rq_client = NULL;
+                ptlrpc_free_req(req); 
+        }
+        list_for_each_safe(tmp, saved, &cli->cli_sending_head) {
+                req = list_entry(tmp, struct ptlrpc_request, rq_list);
+                CDEBUG(D_INFO, "Cleaning req %p from sending head.\n", req);
+                list_del(&req->rq_list);
+                req->rq_client = NULL;
+                ptlrpc_free_req(req); 
+        }
+        spin_unlock(&cli->cli_lock);
+        EXIT;
+        return;
+}
+
 int ptlrpc_queue_wait(struct ptlrpc_request *req)
 {
         int rc = 0;
         ENTRY;
 
         init_waitqueue_head(&req->rq_wait_for_rep);
-
+ resend:
+        req->rq_time = CURRENT_TIME;
+        req->rq_timeout = 3;
         rc = ptl_send_rpc(req);
         if (rc) {
                 CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
@@ -244,7 +361,13 @@
         CDEBUG(D_OTHER, "-- sleeping\n");
         wait_event_interruptible(req->rq_wait_for_rep, ptlrpc_check_reply(req));
         CDEBUG(D_OTHER, "-- done\n");
-        ptlrpc_cleanup_request_buf(req);
+
+        if (req->rq_flags & PTL_RPC_FL_RESEND) {
+                req->rq_flags &= ~PTL_RPC_FL_RESEND;
+                goto resend;
+        }
+
+        //ptlrpc_cleanup_request_buf(req);
         up(&req->rq_client->cli_rpc_sem);
         if (req->rq_flags & PTL_RPC_FL_INTR) {
                 /* Clean up the dangling reply buffers */
@@ -265,10 +388,19 @@
                 GOTO(out, rc);
         }
         CDEBUG(D_NET, "got rep %d\n", req->rq_repmsg->xid);
-
         if (req->rq_repmsg->status == 0)
                 CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
                        req->rq_replen, req->rq_repmsg->status);
+
+        spin_lock(&req->rq_client->cli_lock);
+        /* add to the tail of the replied head */
+        list_del(&req->rq_list);
+        list_add(&req->rq_list, req->rq_client->cli_replied_head.prev); 
+
+        req->rq_client->cli_last_rcvd = req->rq_repmsg->last_rcvd;
+        req->rq_client->cli_last_committed = req->rq_repmsg->last_committed;
+        ptlrpc_free_committed(req->rq_client); 
+        spin_unlock(&req->rq_client->cli_lock);
 
         EXIT;
  out:

Index: connection.c
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/connection.c,v
retrieving revision 1.2.2.1
retrieving revision 1.2.2.2
diff -u -r1.2.2.1 -r1.2.2.2
--- connection.c	22 Apr 2002 15:13:09 -0000	1.2.2.1
+++ connection.c	29 Apr 2002 21:49:04 -0000	1.2.2.2
@@ -82,6 +82,7 @@
 int ptlrpc_put_connection(struct ptlrpc_connection *c)
 {
         int rc = 0;
+        ENTRY;
 
         if (atomic_dec_and_test(&c->c_refcount)) {
                 spin_lock(&conn_lock);
@@ -91,13 +92,14 @@
                 rc = 1;
         }
 
-        return rc;
+        RETURN(rc);
 }
 
 struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *c)
 {
+        ENTRY;
         atomic_inc(&c->c_refcount);
-        return c;
+        RETURN(c);
 }
 
 void ptlrpc_init_connection(void)

Index: niobuf.c
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/niobuf.c,v
retrieving revision 1.26.2.1
retrieving revision 1.26.2.2
diff -u -r1.26.2.1 -r1.26.2.2
--- niobuf.c	22 Apr 2002 15:13:09 -0000	1.26.2.1
+++ niobuf.c	29 Apr 2002 21:49:04 -0000	1.26.2.2
@@ -228,6 +228,16 @@
         RETURN(rc);
 }
 
+void ptlrpc_resend_req(struct ptlrpc_request *req)
+{
+        ENTRY;
+        req->rq_flags |= PTL_RPC_FL_RESEND;
+        req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
+        wake_up_interruptible(&req->rq_wait_for_rep);
+        EXIT;
+        return; 
+}
+
 int ptl_send_rpc(struct ptlrpc_request *request)
 {
         ptl_process_id_t local_id;
@@ -286,7 +296,7 @@
 
         CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid %u, portal %u\n",
                request->rq_replen, request->rq_reqmsg->xid,
-               request->rq_client->cli_request_portal);
+               request->rq_client->cli_reply_portal);
 
         spin_lock(&request->rq_client->cli_lock);
         list_add(&request->rq_list, &request->rq_client->cli_sending_head);

Index: pack_generic.c
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/pack_generic.c,v
retrieving revision 1.2
retrieving revision 1.2.2.1
diff -u -r1.2 -r1.2.2.1
--- pack_generic.c	13 Apr 2002 17:01:01 -0000	1.2
+++ pack_generic.c	29 Apr 2002 21:49:04 -0000	1.2.2.1
@@ -88,6 +88,8 @@
         m->type = NTOH__u32(m->type);
         m->connid = NTOH__u32(m->connid);
         m->bufcount = NTOH__u32(m->bufcount);
+        m->last_rcvd = NTOH__u32(m->last_rcvd);
+        m->last_committed = NTOH__u32(m->last_committed);
 
         required_len += m->bufcount * sizeof(__u32);
         if (len < required_len)

Index: recovd.c
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/recovd.c,v
retrieving revision 1.2.2.2
retrieving revision 1.2.2.3
diff -u -r1.2.2.2 -r1.2.2.3
--- recovd.c	24 Apr 2002 20:00:44 -0000	1.2.2.2
+++ recovd.c	29 Apr 2002 21:49:04 -0000	1.2.2.3
@@ -23,21 +23,22 @@
 
 struct recovd_obd *ptlrpc_connmgr;
 
-void connmgr_cli_manage(struct recovd_obd *recovd, struct ptlrpc_client *cli)
+void recovd_cli_manage(struct recovd_obd *recovd, struct ptlrpc_client *cli)
 {
         ENTRY;
         cli->cli_recovd = recovd;
         spin_lock(&recovd->recovd_lock);
-        list_add(&cli->cli_ha_item, &recovd->recovd_connections_lh);
+        list_add(&cli->cli_ha_item, &recovd->recovd_clients_lh);
         spin_unlock(&recovd->recovd_lock);
         EXIT;
 }
 
-void connmgr_cli_fail(struct ptlrpc_client *cli)
+void recovd_cli_fail(struct ptlrpc_client *cli)
 {
         ENTRY;
         spin_lock(&cli->cli_recovd->recovd_lock);
-        cli->cli_recovd->recovd_flags |= SVC_HA_EVENT;
+        cli->cli_recovd->recovd_flags |= RECOVD_FAIL;
+        cli->cli_recovd->recovd_wakeup_flag = 1;
         list_del(&cli->cli_ha_item);
         list_add(&cli->cli_ha_item, &cli->cli_recovd->recovd_troubled_lh);
         spin_unlock(&cli->cli_recovd->recovd_lock);
@@ -45,7 +46,16 @@
         EXIT;
 }
 
-static int connmgr_upcall(void)
+void recovd_cli_fixed(struct ptlrpc_client *cli)
+{
+        ENTRY;
+        list_del(&cli->cli_ha_item);
+        list_add(&cli->cli_ha_item, &cli->cli_recovd->recovd_clients_lh);
+        EXIT;
+}
+
+
+static int recovd_upcall(void)
 {
         char *argv[2];
         char *envp[3];
@@ -60,138 +70,6 @@
         return call_usermodehelper(argv[0], argv, envp);
 }
 
-static int connmgr_unpack_body(struct ptlrpc_request *req)
-{
-        struct connmgr_body *b = lustre_msg_buf(req->rq_repmsg, 0);
-        if (b == NULL) {
-                LBUG();
-                RETURN(-EINVAL);
-        }
-
-        b->generation = NTOH__u32(b->generation);
-
-        return 0;
-}
-
-int connmgr_connect(struct recovd_obd *recovd, struct ptlrpc_connection *conn)
-{
-        struct ptlrpc_request *req;
-        struct ptlrpc_client *cl;
-        struct connmgr_body *body;
-        int rc, size = sizeof(*body);
-        ENTRY;
-
-        if (!recovd) {
-                CERROR("no manager\n");
-                LBUG();
-                GOTO(out, rc = -EINVAL);
-        }
-        cl = recovd->recovd_client;
-
-        req = ptlrpc_prep_req(cl, conn, CONNMGR_CONNECT, 1, &size, NULL);
-        if (!req)
-                GOTO(out, rc = -ENOMEM);
-
-        body = lustre_msg_buf(req->rq_reqmsg, 0);
-        body->generation = HTON__u32(conn->c_generation);
-        body->conn = (__u64)(unsigned long)conn;
-        body->conn_token = conn->c_token;
-
-        req->rq_replen = lustre_msg_size(1, &size);
-
-        rc = ptlrpc_queue_wait(req);
-        rc = ptlrpc_check_status(req, rc);
-        if (!rc) {
-                rc = connmgr_unpack_body(req);
-                if (rc)
-                        GOTO(out_free, rc);
-                body = lustre_msg_buf(req->rq_repmsg, 0);
-                CDEBUG(D_NET, "remote generation: %o\n", body->generation);
-                conn->c_level = LUSTRE_CONN_CON;
-                conn->c_remote_conn = body->conn;
-                conn->c_remote_token = body->conn_token;
-        }
-
-out_free:
-        ptlrpc_free_req(req);
-out:
-        RETURN(rc);
-}
-
-static int connmgr_handle_connect(struct ptlrpc_request *req)
-{
-        struct connmgr_body *body;
-        int rc, size = sizeof(*body);
-        ENTRY;
-
-        rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
-        if (rc) {
-                CERROR("connmgr: out of memory\n");
-                req->rq_status = -ENOMEM;
-                RETURN(0);
-        }
-
-        body = lustre_msg_buf(req->rq_reqmsg, 0);
-        rc = connmgr_unpack_body(req);
-        if (rc) {
-                req->rq_status = rc;
-                RETURN(0);
-        }
-
-        req->rq_connection->c_remote_conn = body->conn;
-        req->rq_connection->c_remote_token = body->conn_token;
-
-        CERROR("incoming generation %d\n", body->generation);
-        body = lustre_msg_buf(req->rq_repmsg, 0);
-        body->generation = 4711;
-        body->conn = (__u64)(unsigned long)req->rq_connection;
-        body->conn_token = req->rq_connection->c_token;
-
-        req->rq_connection->c_level = LUSTRE_CONN_CON;
-        RETURN(0);
-}
-
-int connmgr_handle(struct obd_device *dev, struct ptlrpc_service *svc,
-                   struct ptlrpc_request *req)
-{
-        int rc;
-        ENTRY;
-
-        rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
-        if (rc) {
-                CERROR("Invalid request\n");
-                GOTO(out, rc);
-        }
-
-        if (req->rq_reqmsg->type != NTOH__u32(PTL_RPC_MSG_REQUEST)) {
-                CERROR("wrong packet type sent %d\n",
-                       req->rq_reqmsg->type);
-                GOTO(out, rc = -EINVAL);
-        }
-
-        switch (req->rq_reqmsg->opc) {
-        case CONNMGR_CONNECT:
-                CDEBUG(D_INODE, "connmgr connect\n");
-                rc = connmgr_handle_connect(req);
-                break;
-
-        default:
-                rc = ptlrpc_error(svc, req);
-                RETURN(rc);
-        }
-
-        EXIT;
-out:
-        if (rc) {
-                ptlrpc_error(svc, req);
-        } else {
-                CDEBUG(D_NET, "sending reply\n");
-                ptlrpc_reply(svc, req);
-        }
-
-        return 0;
-}
-
 static int recovd_check_event(struct recovd_obd *recovd)
 {
         int rc = 0;
@@ -199,61 +77,77 @@
 
         spin_lock(&recovd->recovd_lock);
 
-        if (!(recovd->recovd_flags & MGR_WORKING) &&
-            !list_empty(&recovd->recovd_troubled_lh)) {
-
-                CERROR("connection in trouble - state: WORKING, upcall\n");
-                recovd->recovd_flags = MGR_WORKING;
-
-                recovd->recovd_waketime = CURRENT_TIME;
-                recovd->recovd_timeout = 5 * HZ;
+        recovd->recovd_waketime = CURRENT_TIME;
+        if (recovd->recovd_timeout) 
                 schedule_timeout(recovd->recovd_timeout);
-        }
 
-        if (recovd->recovd_flags & MGR_WORKING &&
-            CURRENT_TIME <= recovd->recovd_waketime + recovd->recovd_timeout) {
-                CERROR("WORKING: new event\n");
+        if (recovd->recovd_wakeup_flag) {
+                CERROR("service woken\n"); 
+                GOTO(out, rc = 1);
+        }
 
-                recovd->recovd_waketime = CURRENT_TIME;
-                schedule_timeout(recovd->recovd_timeout);
+        if (recovd->recovd_timeout && 
+            CURRENT_TIME > recovd->recovd_waketime + recovd->recovd_timeout) {
+                recovd->recovd_flags |= RECOVD_TIMEOUT;
+                CERROR("timeout\n");
+                GOTO(out, rc = 1);
         }
 
-        if (recovd->recovd_flags & MGR_STOPPING) {
-                CERROR("ha mgr stopping\n");
+        if (recovd->recovd_flags & RECOVD_STOPPING) {
+                CERROR("recovd stopping\n");
                 rc = 1;
         }
 
+ out:
+        recovd->recovd_wakeup_flag = 0;
         spin_unlock(&recovd->recovd_lock);
         RETURN(rc);
 }
 
 static int recovd_handle_event(struct recovd_obd *recovd)
 {
+        ENTRY;
         spin_lock(&recovd->recovd_lock);
 
-        if (!(recovd->recovd_flags & MGR_WORKING) &&
-            !list_empty(&recovd->recovd_troubled_lh)) {
-
-                CERROR("connection in trouble - state: WORKING, upcall\n");
-                recovd->recovd_flags = MGR_WORKING;
+        if (!(recovd->recovd_flags & RECOVD_UPCALL_WAIT) &&
+            recovd->recovd_flags & RECOVD_FAIL) { 
 
+                CERROR("client in trouble: flags -> UPCALL_WAITING\n");
+                recovd->recovd_flags |= RECOVD_UPCALL_WAIT;
 
-                connmgr_upcall();
+                recovd_upcall();
                 recovd->recovd_waketime = CURRENT_TIME;
-                recovd->recovd_timeout = 5 * HZ;
+                recovd->recovd_timeout = 10 * HZ;
                 schedule_timeout(recovd->recovd_timeout);
         }
 
-        if (recovd->recovd_flags & MGR_WORKING &&
-            CURRENT_TIME <= recovd->recovd_waketime + recovd->recovd_timeout) {
-                CERROR("WORKING: new event\n");
+        if (recovd->recovd_flags & RECOVD_TIMEOUT) { 
+                CERROR("timeout - no news from upcall?\n");
+                recovd->recovd_flags &= ~RECOVD_TIMEOUT;
+        }
 
-                recovd->recovd_waketime = CURRENT_TIME;
-                schedule_timeout(recovd->recovd_timeout);
+        if (recovd->recovd_flags & RECOVD_UPCALL_ANSWER) { 
+                struct list_head *tmp, *pos;
+                CERROR("UPCALL_WAITING: upcall answer\n");
+                CERROR("** fill me in with recovery\n");
+
+                list_for_each_safe(tmp, pos, &recovd->recovd_troubled_lh) { 
+                        struct ptlrpc_client *cli = list_entry
+                                (tmp, struct ptlrpc_client, cli_ha_item);
+
+                        list_del(&cli->cli_ha_item); 
+                        spin_unlock(&recovd->recovd_lock);
+                        if (cli->cli_recover)
+                                cli->cli_recover(cli); 
+                        spin_lock(&recovd->recovd_lock);
+                }
+
+                recovd->recovd_timeout = 0;
+                recovd->recovd_flags = RECOVD_IDLE; 
         }
 
         spin_unlock(&recovd->recovd_lock);
-        return 0;
+        RETURN(0);
 }
 
 static int recovd_main(void *arg)
@@ -273,7 +167,7 @@
 
         /* Record that the  thread is running */
         recovd->recovd_thread = current;
-        recovd->recovd_flags = MGR_RUNNING;
+        recovd->recovd_flags = RECOVD_IDLE;
         wake_up(&recovd->recovd_ctl_waitq);
 
         /* And now, loop forever on requests */
@@ -282,9 +176,9 @@
                                          recovd_check_event(recovd));
 
                 spin_lock(&recovd->recovd_lock);
-                if (recovd->recovd_flags & MGR_STOPPING) {
+                if (recovd->recovd_flags & RECOVD_STOPPING) {
                         spin_unlock(&recovd->recovd_lock);
-                        CERROR("lustre_hamgr quitting\n");
+                        CERROR("lustre_recovd stopping\n");
                         EXIT;
                         break;
                 }
@@ -294,7 +188,7 @@
         }
 
         recovd->recovd_thread = NULL;
-        recovd->recovd_flags = MGR_STOPPED;
+        recovd->recovd_flags = RECOVD_STOPPED;
         wake_up(&recovd->recovd_ctl_waitq);
         CDEBUG(D_NET, "mgr exiting process %d\n", current->pid);
         RETURN(0);
@@ -305,7 +199,7 @@
         int rc;
         ENTRY;
 
-        INIT_LIST_HEAD(&recovd->recovd_connections_lh);
+        INIT_LIST_HEAD(&recovd->recovd_clients_lh);
         INIT_LIST_HEAD(&recovd->recovd_troubled_lh);
         spin_lock_init(&recovd->recovd_lock);
 
@@ -319,17 +213,19 @@
                 CERROR("cannot start thread\n");
                 RETURN(-EINVAL);
         }
-        wait_event(recovd->recovd_ctl_waitq, recovd->recovd_flags & MGR_RUNNING);
+        wait_event(recovd->recovd_ctl_waitq, recovd->recovd_flags & RECOVD_IDLE);
 
         RETURN(0);
 }
 
 int recovd_cleanup(struct recovd_obd *recovd)
 {
-        recovd->recovd_flags = MGR_STOPPING;
-
+        spin_lock(&recovd->recovd_lock);
+        recovd->recovd_flags = RECOVD_STOPPING;
         wake_up(&recovd->recovd_waitq);
+        spin_unlock(&recovd->recovd_lock);
+
         wait_event_interruptible(recovd->recovd_ctl_waitq,
-                                 (recovd->recovd_flags & MGR_STOPPED));
+                                 (recovd->recovd_flags & RECOVD_STOPPED));
         RETURN(0);
 }

Index: rpc.c
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/rpc.c,v
retrieving revision 1.26.2.1
retrieving revision 1.26.2.2
diff -u -r1.26.2.1 -r1.26.2.2
--- rpc.c	22 Apr 2002 15:13:09 -0000	1.26.2.1
+++ rpc.c	29 Apr 2002 21:49:04 -0000	1.26.2.2
@@ -55,7 +55,7 @@
                 GOTO(err_recovd, err = -EINVAL);
         }
 
-        ptlrpc_init_client(NULL, CONNMGR_REQUEST_PORTAL, 
+        ptlrpc_init_client(NULL, NULL, CONNMGR_REQUEST_PORTAL, 
                            CONNMGR_REPLY_PORTAL, recovd->recovd_client);
 
         err = ptlrpc_start_thread(obddev, recovd->recovd_service, "lustre_connmgr");
@@ -95,17 +95,36 @@
         }
 
         OBD_FREE(recovd->recovd_service, sizeof(*recovd->recovd_service));
-        recovd->recovd_flags = MGR_STOPPING;
-
+        ptlrpc_cleanup_client(recovd->recovd_client);
         OBD_FREE(recovd->recovd_client, sizeof(*recovd->recovd_client));
         MOD_DEC_USE_COUNT;
         RETURN(0);
 }
 
+
+int connmgr_iocontrol(int cmd, struct obd_conn *conn, int len, void *karg,
+                         void *uarg)
+{
+        struct recovd_obd *recovd = &conn->oc_dev->u.recovd;
+
+        ENTRY;
+        if (cmd == OBD_RECOVD_NEWCONN) { 
+                spin_lock(&recovd->recovd_lock);
+                recovd->recovd_flags |= RECOVD_UPCALL_ANSWER;
+                recovd->recovd_wakeup_flag = 1;
+                wake_up(&recovd->recovd_waitq);
+                spin_unlock(&recovd->recovd_lock);
+                EXIT;
+        }
+        return 0;
+}
+
+
 /* use obd ops to offer management infrastructure */
 static struct obd_ops recovd_obd_ops = {
         o_setup:       connmgr_setup,
         o_cleanup:     connmgr_cleanup,
+        o_iocontrol:     connmgr_iocontrol,
 };
 
 static int __init ptlrpc_init(void)

Index: service.c
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/service.c,v
retrieving revision 1.25.2.3
retrieving revision 1.25.2.4
diff -u -r1.25.2.3 -r1.25.2.4
--- service.c	29 Apr 2002 21:40:43 -0000	1.25.2.3
+++ service.c	29 Apr 2002 21:49:04 -0000	1.25.2.4
@@ -97,7 +97,7 @@
 
         err = kportal_uuid_to_peer(uuid, &service->srv_self);
         if (err) {
-                CERROR("cannot get peer for uuid %s", uuid);
+                CERROR("cannot get peer for uuid '%s'", uuid);
                 GOTO(err_free, NULL);
         }
 


_______________________________________________
Lustre-cvs mailing list
Lustre-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/lustre-cvs


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic