[prev in list] [next in list] [prev in thread] [next in thread]
List: lustre-cvs
Subject: [Lustre-cvs] CVS: obd/rpc connmgr.c,NONE,1.1.2.1 Makefile.am,1.5,1.5.2.1 client.c,1.27.2.1,1.27.2.2
From: Phil Schwan <pschwan () users ! sourceforge ! net>
Date: 2002-04-29 21:49:06
[Download RAW message or body]
Update of /cvsroot/lustre/obd/rpc
In directory usw-pr-cvs1:/tmp/cvs-serv28223/rpc
Modified Files:
Tag: ldlm_testing
Makefile.am client.c connection.c niobuf.c pack_generic.c
recovd.c rpc.c service.c
Added Files:
Tag: ldlm_testing
connmgr.c
Log Message:
Moving the last few days' changes to the ldlm_testing branch.
--- NEW FILE ---
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* obd/rpc/recovd.c
*
* Lustre High Availability Daemon
*
* Copyright (C) 2001, 2002 Cluster File Systems, Inc.
*
* This code is issued under the GNU General Public License.
* See the file COPYING in this distribution
*
* by Peter Braam <braam@clusterfs.com>
*
*/
#define EXPORT_SYMTAB
#define DEBUG_SUBSYSTEM S_RPC
#include <linux/kmod.h>
#include <linux/lustre_lite.h>
#include <linux/lustre_ha.h>
static int connmgr_unpack_body(struct ptlrpc_request *req)
{
struct connmgr_body *b = lustre_msg_buf(req->rq_repmsg, 0);
if (b == NULL) {
LBUG();
RETURN(-EINVAL);
}
b->generation = NTOH__u32(b->generation);
return 0;
}
int connmgr_connect(struct recovd_obd *recovd, struct ptlrpc_connection *conn)
{
struct ptlrpc_request *req;
struct ptlrpc_client *cl;
struct connmgr_body *body;
int rc, size = sizeof(*body);
ENTRY;
if (!recovd) {
CERROR("no manager\n");
LBUG();
}
cl = recovd->recovd_client;
req = ptlrpc_prep_req(cl, conn, CONNMGR_CONNECT, 1, &size, NULL);
if (!req)
GOTO(out, rc = -ENOMEM);
body = lustre_msg_buf(req->rq_reqmsg, 0);
body->generation = HTON__u32(conn->c_generation);
body->conn = (__u64)(unsigned long)conn;
body->conn_token = conn->c_token;
strncpy(body->conn_uuid, conn->c_local_uuid, sizeof(body->conn_uuid));
req->rq_replen = lustre_msg_size(1, &size);
rc = ptlrpc_queue_wait(req);
rc = ptlrpc_check_status(req, rc);
if (!rc) {
rc = connmgr_unpack_body(req);
if (rc)
GOTO(out_free, rc);
body = lustre_msg_buf(req->rq_repmsg, 0);
CDEBUG(D_NET, "remote generation: %o\n", body->generation);
conn->c_level = LUSTRE_CONN_CON;
conn->c_remote_conn = body->conn;
conn->c_remote_token = body->conn_token;
strncpy(conn->c_remote_uuid, body->conn_uuid,
sizeof(conn->c_remote_uuid));
}
EXIT;
out_free:
ptlrpc_free_req(req);
out:
return rc;
}
static int connmgr_handle_connect(struct ptlrpc_request *req)
{
struct connmgr_body *body;
int rc, size = sizeof(*body);
ENTRY;
rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
if (rc) {
CERROR("connmgr: out of memory\n");
req->rq_status = -ENOMEM;
RETURN(0);
}
body = lustre_msg_buf(req->rq_reqmsg, 0);
connmgr_unpack_body(req);
req->rq_connection->c_remote_conn = body->conn;
req->rq_connection->c_remote_token = body->conn_token;
strncpy(req->rq_connection->c_remote_uuid, body->conn_uuid,
sizeof(req->rq_connection->c_remote_uuid));
CERROR("incoming generation %d\n", body->generation);
body = lustre_msg_buf(req->rq_repmsg, 0);
body->generation = 4711;
body->conn = (__u64)(unsigned long)req->rq_connection;
body->conn_token = req->rq_connection->c_token;
req->rq_connection->c_level = LUSTRE_CONN_CON;
RETURN(0);
}
int connmgr_handle(struct obd_device *dev, struct ptlrpc_service *svc,
struct ptlrpc_request *req)
{
int rc;
ENTRY;
rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
if (rc) {
CERROR("Invalid request\n");
GOTO(out, rc);
}
if (req->rq_reqmsg->type != NTOH__u32(PTL_RPC_MSG_REQUEST)) {
CERROR("wrong packet type sent %d\n",
req->rq_reqmsg->type);
GOTO(out, rc = -EINVAL);
}
switch (req->rq_reqmsg->opc) {
case CONNMGR_CONNECT:
CDEBUG(D_INODE, "connmgr connect\n");
rc = connmgr_handle_connect(req);
break;
default:
rc = ptlrpc_error(svc, req);
RETURN(rc);
}
EXIT;
out:
if (rc) {
ptlrpc_error(svc, req);
} else {
CDEBUG(D_NET, "sending reply\n");
ptlrpc_reply(svc, req);
}
return 0;
}
Index: Makefile.am
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/Makefile.am,v
retrieving revision 1.5
retrieving revision 1.5.2.1
diff -u -r1.5 -r1.5.2.1
--- Makefile.am 15 Apr 2002 08:13:58 -0000 1.5
+++ Makefile.am 29 Apr 2002 21:49:04 -0000 1.5.2.1
@@ -9,6 +9,6 @@
modulefs_DATA = ptlrpc.o
EXTRA_PROGRAMS = ptlrpc
-ptlrpc_SOURCES = recovd.c connection.c rpc.c events.c service.c client.c niobuf.c \
pack_generic.c +ptlrpc_SOURCES = connmgr.c recovd.c connection.c rpc.c events.c \
service.c client.c niobuf.c pack_generic.c
include $(top_srcdir)/Rules
Index: client.c
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/client.c,v
retrieving revision 1.27.2.1
retrieving revision 1.27.2.2
diff -u -r1.27.2.1 -r1.27.2.2
--- client.c 22 Apr 2002 15:13:09 -0000 1.27.2.1
+++ client.c 29 Apr 2002 21:49:04 -0000 1.27.2.2
@@ -26,22 +26,32 @@
#include <linux/lustre_ha.h>
-void ptlrpc_init_client(struct recovd_obd *recovd, int req_portal,
+void ptlrpc_init_client(struct recovd_obd *recovd,
+ void (*recover)(struct ptlrpc_client *recover),
+ int req_portal,
int rep_portal, struct ptlrpc_client *cl)
{
memset(cl, 0, sizeof(*cl));
cl->cli_recovd = recovd;
+ cl->cli_recover = recover;
if (recovd)
- connmgr_cli_manage(recovd, cl);
+ recovd_cli_manage(recovd, cl);
cl->cli_obd = NULL;
cl->cli_request_portal = req_portal;
cl->cli_reply_portal = rep_portal;
INIT_LIST_HEAD(&cl->cli_sending_head);
INIT_LIST_HEAD(&cl->cli_sent_head);
+ INIT_LIST_HEAD(&cl->cli_replied_head);
+ INIT_LIST_HEAD(&cl->cli_replay_head);
spin_lock_init(&cl->cli_lock);
sema_init(&cl->cli_rpc_sem, 32);
}
+__u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
+{
+ return req->rq_connection->c_remote_uuid;
+}
+
struct ptlrpc_connection *ptlrpc_uuid_to_connection(char *uuid)
{
struct ptlrpc_connection *c;
@@ -76,12 +86,16 @@
void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk)
{
- if (bulk == NULL)
+ ENTRY;
+ if (bulk == NULL) {
+ EXIT;
return;
+ }
ptlrpc_put_connection(bulk->b_connection);
OBD_FREE(bulk, sizeof(*bulk));
+ EXIT;
}
struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl,
@@ -106,7 +120,6 @@
RETURN(NULL);
}
- request->rq_time = CURRENT_TIME;
request->rq_type = PTL_RPC_TYPE_REQUEST;
request->rq_connection = ptlrpc_connection_addref(conn);
@@ -116,6 +129,9 @@
request->rq_reqmsg->type = HTON__u32(PTL_RPC_MSG_REQUEST);
INIT_LIST_HEAD(&request->rq_list);
+ /* this will be dec()d once in req_finished, once in free_committed */
+ atomic_set(&request->rq_refcount, 2);
+
spin_lock(&conn->c_lock);
request->rq_reqmsg->xid = HTON__u32(++conn->c_xid_out);
spin_unlock(&conn->c_lock);
@@ -125,6 +141,20 @@
RETURN(request);
}
+void ptlrpc_req_finished(struct ptlrpc_request *request)
+{
+ if (request == NULL)
+ return;
+
+ if (request->rq_repmsg != NULL) {
+ OBD_FREE(request->rq_repmsg, request->rq_replen);
+ request->rq_repmsg = NULL;
+ }
+
+ if (atomic_dec_and_test(&request->rq_refcount))
+ ptlrpc_free_req(request);
+}
+
void ptlrpc_free_req(struct ptlrpc_request *request)
{
if (request == NULL)
@@ -132,6 +162,8 @@
if (request->rq_repmsg != NULL)
OBD_FREE(request->rq_repmsg, request->rq_replen);
+ if (request->rq_reqmsg != NULL)
+ OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
if (request->rq_client) {
spin_lock(&request->rq_client->cli_lock);
@@ -148,18 +180,30 @@
{
int rc = 0;
- schedule_timeout(3 * HZ); /* 3 second timeout */
if (req->rq_repmsg != NULL) {
+ req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
req->rq_flags |= PTL_RPC_FL_REPLY;
GOTO(out, rc = 1);
}
- if (CURRENT_TIME - req->rq_time >= 3) {
+ if (req->rq_flags & PTL_RPC_FL_RESEND) {
+ CERROR("-- RESEND --\n");
+ req->rq_status = -EAGAIN;
+ GOTO(out, rc = 1);
+ }
+
+ if (CURRENT_TIME - req->rq_time >= req->rq_timeout) {
CERROR("-- REQ TIMEOUT --\n");
+ /* clear the timeout */
+ req->rq_timeout = 0;
req->rq_flags |= PTL_RPC_FL_TIMEOUT;
if (req->rq_client && req->rq_client->cli_recovd)
- connmgr_cli_fail(req->rq_client);
- return 0;
+ recovd_cli_fail(req->rq_client);
+ GOTO(out, rc = 0);
+ }
+
+ if (req->rq_timeout) {
+ schedule_timeout(req->rq_timeout * HZ);
}
if (sigismember(&(current->pending.signal), SIGKILL) ||
@@ -226,13 +270,86 @@
return 0;
}
+/* caller must lock cli */
+void ptlrpc_free_committed(struct ptlrpc_client *cli)
+{
+ struct list_head *tmp, *saved;
+ struct ptlrpc_request *req;
+
+ list_for_each_safe(tmp, saved, &cli->cli_replied_head) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+
+ /* not yet committed */
+ if (req->rq_transno > cli->cli_last_committed)
+ break;
+
+ /* retain for replay if flagged */
+ if (req->rq_flags & PTL_RPC_FL_RETAIN) {
+ list_del(&req->rq_list);
+ list_add(&req->rq_list, &cli->cli_replay_head);
+ } else {
+ CDEBUG(D_INFO, "Marking request %p as committed ("
+ "transno=%Lu, last_committed=%Lu\n", req,
+ req->rq_transno, cli->cli_last_committed);
+ if (atomic_dec_and_test(&req->rq_refcount))
+ ptlrpc_free_req(req);
+ }
+ }
+
+ EXIT;
+ return;
+}
+
+void ptlrpc_cleanup_client(struct ptlrpc_client *cli)
+{
+ struct list_head *tmp, *saved;
+ struct ptlrpc_request *req;
+ ENTRY;
+
+ spin_lock(&cli->cli_lock);
+ list_for_each_safe(tmp, saved, &cli->cli_replied_head) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ /* We do this to prevent ptlrpc_free_req from taking cli_lock */
+ CDEBUG(D_INFO, "Cleaning req %p from replied head.\n", req);
+ list_del(&req->rq_list);
+ req->rq_client = NULL;
+ ptlrpc_free_req(req);
+ }
+ list_for_each_safe(tmp, saved, &cli->cli_sent_head) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ CDEBUG(D_INFO, "Cleaning req %p from sent head.\n", req);
+ list_del(&req->rq_list);
+ req->rq_client = NULL;
+ ptlrpc_free_req(req);
+ }
+ list_for_each_safe(tmp, saved, &cli->cli_replay_head) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ CERROR("Request %p is on the replay head at cleanup!\n", req);
+ list_del(&req->rq_list);
+ req->rq_client = NULL;
+ ptlrpc_free_req(req);
+ }
+ list_for_each_safe(tmp, saved, &cli->cli_sending_head) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ CDEBUG(D_INFO, "Cleaning req %p from sending head.\n", req);
+ list_del(&req->rq_list);
+ req->rq_client = NULL;
+ ptlrpc_free_req(req);
+ }
+ spin_unlock(&cli->cli_lock);
+ EXIT;
+ return;
+}
+
int ptlrpc_queue_wait(struct ptlrpc_request *req)
{
int rc = 0;
ENTRY;
init_waitqueue_head(&req->rq_wait_for_rep);
-
+ resend:
+ req->rq_time = CURRENT_TIME;
+ req->rq_timeout = 3;
rc = ptl_send_rpc(req);
if (rc) {
CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
@@ -244,7 +361,13 @@
CDEBUG(D_OTHER, "-- sleeping\n");
wait_event_interruptible(req->rq_wait_for_rep, ptlrpc_check_reply(req));
CDEBUG(D_OTHER, "-- done\n");
- ptlrpc_cleanup_request_buf(req);
+
+ if (req->rq_flags & PTL_RPC_FL_RESEND) {
+ req->rq_flags &= ~PTL_RPC_FL_RESEND;
+ goto resend;
+ }
+
+ //ptlrpc_cleanup_request_buf(req);
up(&req->rq_client->cli_rpc_sem);
if (req->rq_flags & PTL_RPC_FL_INTR) {
/* Clean up the dangling reply buffers */
@@ -265,10 +388,19 @@
GOTO(out, rc);
}
CDEBUG(D_NET, "got rep %d\n", req->rq_repmsg->xid);
-
if (req->rq_repmsg->status == 0)
CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
req->rq_replen, req->rq_repmsg->status);
+
+ spin_lock(&req->rq_client->cli_lock);
+ /* add to the tail of the replied head */
+ list_del(&req->rq_list);
+ list_add(&req->rq_list, req->rq_client->cli_replied_head.prev);
+
+ req->rq_client->cli_last_rcvd = req->rq_repmsg->last_rcvd;
+ req->rq_client->cli_last_committed = req->rq_repmsg->last_committed;
+ ptlrpc_free_committed(req->rq_client);
+ spin_unlock(&req->rq_client->cli_lock);
EXIT;
out:
Index: connection.c
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/connection.c,v
retrieving revision 1.2.2.1
retrieving revision 1.2.2.2
diff -u -r1.2.2.1 -r1.2.2.2
--- connection.c 22 Apr 2002 15:13:09 -0000 1.2.2.1
+++ connection.c 29 Apr 2002 21:49:04 -0000 1.2.2.2
@@ -82,6 +82,7 @@
int ptlrpc_put_connection(struct ptlrpc_connection *c)
{
int rc = 0;
+ ENTRY;
if (atomic_dec_and_test(&c->c_refcount)) {
spin_lock(&conn_lock);
@@ -91,13 +92,14 @@
rc = 1;
}
- return rc;
+ RETURN(rc);
}
struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *c)
{
+ ENTRY;
atomic_inc(&c->c_refcount);
- return c;
+ RETURN(c);
}
void ptlrpc_init_connection(void)
Index: niobuf.c
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/niobuf.c,v
retrieving revision 1.26.2.1
retrieving revision 1.26.2.2
diff -u -r1.26.2.1 -r1.26.2.2
--- niobuf.c 22 Apr 2002 15:13:09 -0000 1.26.2.1
+++ niobuf.c 29 Apr 2002 21:49:04 -0000 1.26.2.2
@@ -228,6 +228,16 @@
RETURN(rc);
}
+void ptlrpc_resend_req(struct ptlrpc_request *req)
+{
+ ENTRY;
+ req->rq_flags |= PTL_RPC_FL_RESEND;
+ req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
+ wake_up_interruptible(&req->rq_wait_for_rep);
+ EXIT;
+ return;
+}
+
int ptl_send_rpc(struct ptlrpc_request *request)
{
ptl_process_id_t local_id;
@@ -286,7 +296,7 @@
CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid %u, portal %u\n",
request->rq_replen, request->rq_reqmsg->xid,
- request->rq_client->cli_request_portal);
+ request->rq_client->cli_reply_portal);
spin_lock(&request->rq_client->cli_lock);
list_add(&request->rq_list, &request->rq_client->cli_sending_head);
Index: pack_generic.c
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/pack_generic.c,v
retrieving revision 1.2
retrieving revision 1.2.2.1
diff -u -r1.2 -r1.2.2.1
--- pack_generic.c 13 Apr 2002 17:01:01 -0000 1.2
+++ pack_generic.c 29 Apr 2002 21:49:04 -0000 1.2.2.1
@@ -88,6 +88,8 @@
m->type = NTOH__u32(m->type);
m->connid = NTOH__u32(m->connid);
m->bufcount = NTOH__u32(m->bufcount);
+ m->last_rcvd = NTOH__u32(m->last_rcvd);
+ m->last_committed = NTOH__u32(m->last_committed);
required_len += m->bufcount * sizeof(__u32);
if (len < required_len)
Index: recovd.c
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/recovd.c,v
retrieving revision 1.2.2.2
retrieving revision 1.2.2.3
diff -u -r1.2.2.2 -r1.2.2.3
--- recovd.c 24 Apr 2002 20:00:44 -0000 1.2.2.2
+++ recovd.c 29 Apr 2002 21:49:04 -0000 1.2.2.3
@@ -23,21 +23,22 @@
struct recovd_obd *ptlrpc_connmgr;
-void connmgr_cli_manage(struct recovd_obd *recovd, struct ptlrpc_client *cli)
+void recovd_cli_manage(struct recovd_obd *recovd, struct ptlrpc_client *cli)
{
ENTRY;
cli->cli_recovd = recovd;
spin_lock(&recovd->recovd_lock);
- list_add(&cli->cli_ha_item, &recovd->recovd_connections_lh);
+ list_add(&cli->cli_ha_item, &recovd->recovd_clients_lh);
spin_unlock(&recovd->recovd_lock);
EXIT;
}
-void connmgr_cli_fail(struct ptlrpc_client *cli)
+void recovd_cli_fail(struct ptlrpc_client *cli)
{
ENTRY;
spin_lock(&cli->cli_recovd->recovd_lock);
- cli->cli_recovd->recovd_flags |= SVC_HA_EVENT;
+ cli->cli_recovd->recovd_flags |= RECOVD_FAIL;
+ cli->cli_recovd->recovd_wakeup_flag = 1;
list_del(&cli->cli_ha_item);
list_add(&cli->cli_ha_item, &cli->cli_recovd->recovd_troubled_lh);
spin_unlock(&cli->cli_recovd->recovd_lock);
@@ -45,7 +46,16 @@
EXIT;
}
-static int connmgr_upcall(void)
+void recovd_cli_fixed(struct ptlrpc_client *cli)
+{
+ ENTRY;
+ list_del(&cli->cli_ha_item);
+ list_add(&cli->cli_ha_item, &cli->cli_recovd->recovd_clients_lh);
+ EXIT;
+}
+
+
+static int recovd_upcall(void)
{
char *argv[2];
char *envp[3];
@@ -60,138 +70,6 @@
return call_usermodehelper(argv[0], argv, envp);
}
-static int connmgr_unpack_body(struct ptlrpc_request *req)
-{
- struct connmgr_body *b = lustre_msg_buf(req->rq_repmsg, 0);
- if (b == NULL) {
- LBUG();
- RETURN(-EINVAL);
- }
-
- b->generation = NTOH__u32(b->generation);
-
- return 0;
-}
-
-int connmgr_connect(struct recovd_obd *recovd, struct ptlrpc_connection *conn)
-{
- struct ptlrpc_request *req;
- struct ptlrpc_client *cl;
- struct connmgr_body *body;
- int rc, size = sizeof(*body);
- ENTRY;
-
- if (!recovd) {
- CERROR("no manager\n");
- LBUG();
- GOTO(out, rc = -EINVAL);
- }
- cl = recovd->recovd_client;
-
- req = ptlrpc_prep_req(cl, conn, CONNMGR_CONNECT, 1, &size, NULL);
- if (!req)
- GOTO(out, rc = -ENOMEM);
-
- body = lustre_msg_buf(req->rq_reqmsg, 0);
- body->generation = HTON__u32(conn->c_generation);
- body->conn = (__u64)(unsigned long)conn;
- body->conn_token = conn->c_token;
-
- req->rq_replen = lustre_msg_size(1, &size);
-
- rc = ptlrpc_queue_wait(req);
- rc = ptlrpc_check_status(req, rc);
- if (!rc) {
- rc = connmgr_unpack_body(req);
- if (rc)
- GOTO(out_free, rc);
- body = lustre_msg_buf(req->rq_repmsg, 0);
- CDEBUG(D_NET, "remote generation: %o\n", body->generation);
- conn->c_level = LUSTRE_CONN_CON;
- conn->c_remote_conn = body->conn;
- conn->c_remote_token = body->conn_token;
- }
-
-out_free:
- ptlrpc_free_req(req);
-out:
- RETURN(rc);
-}
-
-static int connmgr_handle_connect(struct ptlrpc_request *req)
-{
- struct connmgr_body *body;
- int rc, size = sizeof(*body);
- ENTRY;
-
- rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
- if (rc) {
- CERROR("connmgr: out of memory\n");
- req->rq_status = -ENOMEM;
- RETURN(0);
- }
-
- body = lustre_msg_buf(req->rq_reqmsg, 0);
- rc = connmgr_unpack_body(req);
- if (rc) {
- req->rq_status = rc;
- RETURN(0);
- }
-
- req->rq_connection->c_remote_conn = body->conn;
- req->rq_connection->c_remote_token = body->conn_token;
-
- CERROR("incoming generation %d\n", body->generation);
- body = lustre_msg_buf(req->rq_repmsg, 0);
- body->generation = 4711;
- body->conn = (__u64)(unsigned long)req->rq_connection;
- body->conn_token = req->rq_connection->c_token;
-
- req->rq_connection->c_level = LUSTRE_CONN_CON;
- RETURN(0);
-}
-
-int connmgr_handle(struct obd_device *dev, struct ptlrpc_service *svc,
- struct ptlrpc_request *req)
-{
- int rc;
- ENTRY;
-
- rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
- if (rc) {
- CERROR("Invalid request\n");
- GOTO(out, rc);
- }
-
- if (req->rq_reqmsg->type != NTOH__u32(PTL_RPC_MSG_REQUEST)) {
- CERROR("wrong packet type sent %d\n",
- req->rq_reqmsg->type);
- GOTO(out, rc = -EINVAL);
- }
-
- switch (req->rq_reqmsg->opc) {
- case CONNMGR_CONNECT:
- CDEBUG(D_INODE, "connmgr connect\n");
- rc = connmgr_handle_connect(req);
- break;
-
- default:
- rc = ptlrpc_error(svc, req);
- RETURN(rc);
- }
-
- EXIT;
-out:
- if (rc) {
- ptlrpc_error(svc, req);
- } else {
- CDEBUG(D_NET, "sending reply\n");
- ptlrpc_reply(svc, req);
- }
-
- return 0;
-}
-
static int recovd_check_event(struct recovd_obd *recovd)
{
int rc = 0;
@@ -199,61 +77,77 @@
spin_lock(&recovd->recovd_lock);
- if (!(recovd->recovd_flags & MGR_WORKING) &&
- !list_empty(&recovd->recovd_troubled_lh)) {
-
- CERROR("connection in trouble - state: WORKING, upcall\n");
- recovd->recovd_flags = MGR_WORKING;
-
- recovd->recovd_waketime = CURRENT_TIME;
- recovd->recovd_timeout = 5 * HZ;
+ recovd->recovd_waketime = CURRENT_TIME;
+ if (recovd->recovd_timeout)
schedule_timeout(recovd->recovd_timeout);
- }
- if (recovd->recovd_flags & MGR_WORKING &&
- CURRENT_TIME <= recovd->recovd_waketime + recovd->recovd_timeout) {
- CERROR("WORKING: new event\n");
+ if (recovd->recovd_wakeup_flag) {
+ CERROR("service woken\n");
+ GOTO(out, rc = 1);
+ }
- recovd->recovd_waketime = CURRENT_TIME;
- schedule_timeout(recovd->recovd_timeout);
+ if (recovd->recovd_timeout &&
+ CURRENT_TIME > recovd->recovd_waketime + recovd->recovd_timeout) {
+ recovd->recovd_flags |= RECOVD_TIMEOUT;
+ CERROR("timeout\n");
+ GOTO(out, rc = 1);
}
- if (recovd->recovd_flags & MGR_STOPPING) {
- CERROR("ha mgr stopping\n");
+ if (recovd->recovd_flags & RECOVD_STOPPING) {
+ CERROR("recovd stopping\n");
rc = 1;
}
+ out:
+ recovd->recovd_wakeup_flag = 0;
spin_unlock(&recovd->recovd_lock);
RETURN(rc);
}
static int recovd_handle_event(struct recovd_obd *recovd)
{
+ ENTRY;
spin_lock(&recovd->recovd_lock);
- if (!(recovd->recovd_flags & MGR_WORKING) &&
- !list_empty(&recovd->recovd_troubled_lh)) {
-
- CERROR("connection in trouble - state: WORKING, upcall\n");
- recovd->recovd_flags = MGR_WORKING;
+ if (!(recovd->recovd_flags & RECOVD_UPCALL_WAIT) &&
+ recovd->recovd_flags & RECOVD_FAIL) {
+ CERROR("client in trouble: flags -> UPCALL_WAITING\n");
+ recovd->recovd_flags |= RECOVD_UPCALL_WAIT;
- connmgr_upcall();
+ recovd_upcall();
recovd->recovd_waketime = CURRENT_TIME;
- recovd->recovd_timeout = 5 * HZ;
+ recovd->recovd_timeout = 10 * HZ;
schedule_timeout(recovd->recovd_timeout);
}
- if (recovd->recovd_flags & MGR_WORKING &&
- CURRENT_TIME <= recovd->recovd_waketime + recovd->recovd_timeout) {
- CERROR("WORKING: new event\n");
+ if (recovd->recovd_flags & RECOVD_TIMEOUT) {
+ CERROR("timeout - no news from upcall?\n");
+ recovd->recovd_flags &= ~RECOVD_TIMEOUT;
+ }
- recovd->recovd_waketime = CURRENT_TIME;
- schedule_timeout(recovd->recovd_timeout);
+ if (recovd->recovd_flags & RECOVD_UPCALL_ANSWER) {
+ struct list_head *tmp, *pos;
+ CERROR("UPCALL_WAITING: upcall answer\n");
+ CERROR("** fill me in with recovery\n");
+
+ list_for_each_safe(tmp, pos, &recovd->recovd_troubled_lh) {
+ struct ptlrpc_client *cli = list_entry
+ (tmp, struct ptlrpc_client, cli_ha_item);
+
+ list_del(&cli->cli_ha_item);
+ spin_unlock(&recovd->recovd_lock);
+ if (cli->cli_recover)
+ cli->cli_recover(cli);
+ spin_lock(&recovd->recovd_lock);
+ }
+
+ recovd->recovd_timeout = 0;
+ recovd->recovd_flags = RECOVD_IDLE;
}
spin_unlock(&recovd->recovd_lock);
- return 0;
+ RETURN(0);
}
static int recovd_main(void *arg)
@@ -273,7 +167,7 @@
/* Record that the thread is running */
recovd->recovd_thread = current;
- recovd->recovd_flags = MGR_RUNNING;
+ recovd->recovd_flags = RECOVD_IDLE;
wake_up(&recovd->recovd_ctl_waitq);
/* And now, loop forever on requests */
@@ -282,9 +176,9 @@
recovd_check_event(recovd));
spin_lock(&recovd->recovd_lock);
- if (recovd->recovd_flags & MGR_STOPPING) {
+ if (recovd->recovd_flags & RECOVD_STOPPING) {
spin_unlock(&recovd->recovd_lock);
- CERROR("lustre_hamgr quitting\n");
+ CERROR("lustre_recovd stopping\n");
EXIT;
break;
}
@@ -294,7 +188,7 @@
}
recovd->recovd_thread = NULL;
- recovd->recovd_flags = MGR_STOPPED;
+ recovd->recovd_flags = RECOVD_STOPPED;
wake_up(&recovd->recovd_ctl_waitq);
CDEBUG(D_NET, "mgr exiting process %d\n", current->pid);
RETURN(0);
@@ -305,7 +199,7 @@
int rc;
ENTRY;
- INIT_LIST_HEAD(&recovd->recovd_connections_lh);
+ INIT_LIST_HEAD(&recovd->recovd_clients_lh);
INIT_LIST_HEAD(&recovd->recovd_troubled_lh);
spin_lock_init(&recovd->recovd_lock);
@@ -319,17 +213,19 @@
CERROR("cannot start thread\n");
RETURN(-EINVAL);
}
- wait_event(recovd->recovd_ctl_waitq, recovd->recovd_flags & MGR_RUNNING);
+ wait_event(recovd->recovd_ctl_waitq, recovd->recovd_flags & RECOVD_IDLE);
RETURN(0);
}
int recovd_cleanup(struct recovd_obd *recovd)
{
- recovd->recovd_flags = MGR_STOPPING;
-
+ spin_lock(&recovd->recovd_lock);
+ recovd->recovd_flags = RECOVD_STOPPING;
wake_up(&recovd->recovd_waitq);
+ spin_unlock(&recovd->recovd_lock);
+
wait_event_interruptible(recovd->recovd_ctl_waitq,
- (recovd->recovd_flags & MGR_STOPPED));
+ (recovd->recovd_flags & RECOVD_STOPPED));
RETURN(0);
}
Index: rpc.c
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/rpc.c,v
retrieving revision 1.26.2.1
retrieving revision 1.26.2.2
diff -u -r1.26.2.1 -r1.26.2.2
--- rpc.c 22 Apr 2002 15:13:09 -0000 1.26.2.1
+++ rpc.c 29 Apr 2002 21:49:04 -0000 1.26.2.2
@@ -55,7 +55,7 @@
GOTO(err_recovd, err = -EINVAL);
}
- ptlrpc_init_client(NULL, CONNMGR_REQUEST_PORTAL,
+ ptlrpc_init_client(NULL, NULL, CONNMGR_REQUEST_PORTAL,
CONNMGR_REPLY_PORTAL, recovd->recovd_client);
err = ptlrpc_start_thread(obddev, recovd->recovd_service, "lustre_connmgr");
@@ -95,17 +95,36 @@
}
OBD_FREE(recovd->recovd_service, sizeof(*recovd->recovd_service));
- recovd->recovd_flags = MGR_STOPPING;
-
+ ptlrpc_cleanup_client(recovd->recovd_client);
OBD_FREE(recovd->recovd_client, sizeof(*recovd->recovd_client));
MOD_DEC_USE_COUNT;
RETURN(0);
}
+
+int connmgr_iocontrol(int cmd, struct obd_conn *conn, int len, void *karg,
+ void *uarg)
+{
+ struct recovd_obd *recovd = &conn->oc_dev->u.recovd;
+
+ ENTRY;
+ if (cmd == OBD_RECOVD_NEWCONN) {
+ spin_lock(&recovd->recovd_lock);
+ recovd->recovd_flags |= RECOVD_UPCALL_ANSWER;
+ recovd->recovd_wakeup_flag = 1;
+ wake_up(&recovd->recovd_waitq);
+ spin_unlock(&recovd->recovd_lock);
+ EXIT;
+ }
+ return 0;
+}
+
+
/* use obd ops to offer management infrastructure */
static struct obd_ops recovd_obd_ops = {
o_setup: connmgr_setup,
o_cleanup: connmgr_cleanup,
+ o_iocontrol: connmgr_iocontrol,
};
static int __init ptlrpc_init(void)
Index: service.c
===================================================================
RCS file: /cvsroot/lustre/obd/rpc/service.c,v
retrieving revision 1.25.2.3
retrieving revision 1.25.2.4
diff -u -r1.25.2.3 -r1.25.2.4
--- service.c 29 Apr 2002 21:40:43 -0000 1.25.2.3
+++ service.c 29 Apr 2002 21:49:04 -0000 1.25.2.4
@@ -97,7 +97,7 @@
err = kportal_uuid_to_peer(uuid, &service->srv_self);
if (err) {
- CERROR("cannot get peer for uuid %s", uuid);
+ CERROR("cannot get peer for uuid '%s'", uuid);
GOTO(err_free, NULL);
}
_______________________________________________
Lustre-cvs mailing list
Lustre-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/lustre-cvs
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic