[prev in list] [next in list] [prev in thread] [next in thread]
List: linux-rdma
Subject: Re: [PATCH RFC 05/12] xprtrdma: Remove fr_state
From: Chuck Lever <chuck.lever () oracle ! com>
Date: 2019-05-31 13:36:53
Message-ID: 635F6490-7635-4E17-8967-3766FA03D613 () oracle ! com
[Download RAW message or body]
> On May 30, 2019, at 10:05 AM, Anna Schumaker <schumaker.anna@gmail.com> wrote:
>
> Hi Chuck,
>
> On Tue, 2019-05-28 at 14:21 -0400, Chuck Lever wrote:
>> Since both the Send and Receive completion queues are processed in
>> a workqueue context, it should be safe to DMA unmap and return MRs
>> to the free or recycle lists directly in the completion handlers.
>>
>> Doing this means rpcrdma_frwr no longer needs to track the state
>> of each MR... a VALID or FLUSHED MR can no longer appear on an
>> xprt's MR free list.
>>
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> include/trace/events/rpcrdma.h | 19 ----
>> net/sunrpc/xprtrdma/frwr_ops.c | 202 ++++++++++++++++++------------------
>> ---
>> net/sunrpc/xprtrdma/rpc_rdma.c | 2
>> net/sunrpc/xprtrdma/xprt_rdma.h | 11 --
>> 4 files changed, 95 insertions(+), 139 deletions(-)
>>
>> diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
>> index a4ab3a2..7fe21ba 100644
>> --- a/include/trace/events/rpcrdma.h
>> +++ b/include/trace/events/rpcrdma.h
>> @@ -181,18 +181,6 @@
>> ), \
>> TP_ARGS(task, mr, nsegs))
>>
>> -TRACE_DEFINE_ENUM(FRWR_IS_INVALID);
>> -TRACE_DEFINE_ENUM(FRWR_IS_VALID);
>> -TRACE_DEFINE_ENUM(FRWR_FLUSHED_FR);
>> -TRACE_DEFINE_ENUM(FRWR_FLUSHED_LI);
>> -
>> -#define xprtrdma_show_frwr_state(x) \
>> - __print_symbolic(x, \
>> - { FRWR_IS_INVALID, "INVALID" }, \
>> - { FRWR_IS_VALID, "VALID" }, \
>> - { FRWR_FLUSHED_FR, "FLUSHED_FR" }, \
>> - { FRWR_FLUSHED_LI, "FLUSHED_LI" })
>> -
>> DECLARE_EVENT_CLASS(xprtrdma_frwr_done,
>> TP_PROTO(
>> const struct ib_wc *wc,
>> @@ -203,22 +191,19 @@
>>
>> TP_STRUCT__entry(
>> __field(const void *, mr)
>> - __field(unsigned int, state)
>> __field(unsigned int, status)
>> __field(unsigned int, vendor_err)
>> ),
>>
>> TP_fast_assign(
>> __entry->mr = container_of(frwr, struct rpcrdma_mr, frwr);
>> - __entry->state = frwr->fr_state;
>> __entry->status = wc->status;
>> __entry->vendor_err = __entry->status ? wc->vendor_err : 0;
>> ),
>>
>> TP_printk(
>> - "mr=%p state=%s: %s (%u/0x%x)",
>> - __entry->mr, xprtrdma_show_frwr_state(__entry->state),
>> - rdma_show_wc_status(__entry->status),
>> + "mr=%p: %s (%u/0x%x)",
>> + __entry->mr, rdma_show_wc_status(__entry->status),
>> __entry->status, __entry->vendor_err
>> )
>> );
>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
>> index ac47314..99871fbf 100644
>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>> @@ -168,7 +168,6 @@ int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr
>> *mr)
>> goto out_list_err;
>>
>> mr->frwr.fr_mr = frmr;
>> - mr->frwr.fr_state = FRWR_IS_INVALID;
>> mr->mr_dir = DMA_NONE;
>> INIT_LIST_HEAD(&mr->mr_list);
>> INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker);
>> @@ -298,65 +297,6 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
>> }
>>
>> /**
>> - * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
>> - * @cq: completion queue (ignored)
>> - * @wc: completed WR
>> - *
>> - */
>> -static void
>> -frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
>> -{
>> - struct ib_cqe *cqe = wc->wr_cqe;
>> - struct rpcrdma_frwr *frwr =
>> - container_of(cqe, struct rpcrdma_frwr, fr_cqe);
>> -
>> - /* WARNING: Only wr_cqe and status are reliable at this point */
>> - if (wc->status != IB_WC_SUCCESS)
>> - frwr->fr_state = FRWR_FLUSHED_FR;
>> - trace_xprtrdma_wc_fastreg(wc, frwr);
>> -}
>> -
>> -/**
>> - * frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC
>> - * @cq: completion queue (ignored)
>> - * @wc: completed WR
>> - *
>> - */
>> -static void
>> -frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
>> -{
>> - struct ib_cqe *cqe = wc->wr_cqe;
>> - struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
>> - fr_cqe);
>> -
>> - /* WARNING: Only wr_cqe and status are reliable at this point */
>> - if (wc->status != IB_WC_SUCCESS)
>> - frwr->fr_state = FRWR_FLUSHED_LI;
>> - trace_xprtrdma_wc_li(wc, frwr);
>> -}
>> -
>> -/**
>> - * frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv
>> WC
>> - * @cq: completion queue (ignored)
>> - * @wc: completed WR
>> - *
>> - * Awaken anyone waiting for an MR to finish being fenced.
>> - */
>> -static void
>> -frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
>> -{
>> - struct ib_cqe *cqe = wc->wr_cqe;
>> - struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
>> - fr_cqe);
>> -
>> - /* WARNING: Only wr_cqe and status are reliable at this point */
>> - if (wc->status != IB_WC_SUCCESS)
>> - frwr->fr_state = FRWR_FLUSHED_LI;
>> - trace_xprtrdma_wc_li_wake(wc, frwr);
>> - complete(&frwr->fr_linv_done);
>> -}
>> -
>> -/**
>> * frwr_map - Register a memory region
>> * @r_xprt: controlling transport
>> * @seg: memory region co-ordinates
>> @@ -378,23 +318,15 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt
>> *r_xprt,
>> {
>> struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
>> - struct rpcrdma_frwr *frwr;
>> struct rpcrdma_mr *mr;
>> struct ib_mr *ibmr;
>> struct ib_reg_wr *reg_wr;
>> int i, n;
>> u8 key;
>>
>> - mr = NULL;
>> - do {
>> - if (mr)
>> - rpcrdma_mr_recycle(mr);
>> - mr = rpcrdma_mr_get(r_xprt);
>> - if (!mr)
>> - goto out_getmr_err;
>> - } while (mr->frwr.fr_state != FRWR_IS_INVALID);
>> - frwr = &mr->frwr;
>> - frwr->fr_state = FRWR_IS_VALID;
>> + mr = rpcrdma_mr_get(r_xprt);
>> + if (!mr)
>> + goto out_getmr_err;
>>
>> if (nsegs > ia->ri_max_frwr_depth)
>> nsegs = ia->ri_max_frwr_depth;
>> @@ -423,7 +355,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt
>> *r_xprt,
>> if (!mr->mr_nents)
>> goto out_dmamap_err;
>>
>> - ibmr = frwr->fr_mr;
>> + ibmr = mr->frwr.fr_mr;
>> n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
>> if (unlikely(n != mr->mr_nents))
>> goto out_mapmr_err;
>> @@ -433,7 +365,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt
>> *r_xprt,
>> key = (u8)(ibmr->rkey & 0x000000FF);
>> ib_update_fast_reg_key(ibmr, ++key);
>>
>> - reg_wr = &frwr->fr_regwr;
>> + reg_wr = &mr->frwr.fr_regwr;
>> reg_wr->mr = ibmr;
>> reg_wr->key = ibmr->rkey;
>> reg_wr->access = writing ?
>> @@ -465,6 +397,23 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt
>> *r_xprt,
>> }
>>
>> /**
>> + * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
>> + * @cq: completion queue (ignored)
>> + * @wc: completed WR
>> + *
>> + */
>> +static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
>> +{
>> + struct ib_cqe *cqe = wc->wr_cqe;
>> + struct rpcrdma_frwr *frwr =
>> + container_of(cqe, struct rpcrdma_frwr, fr_cqe);
>> +
>> + /* WARNING: Only wr_cqe and status are reliable at this point */
>> + trace_xprtrdma_wc_fastreg(wc, frwr);
>> + /* The MR will get recycled when the associated req is retransmitted */
>> +}
>> +
>> +/**
>> * frwr_send - post Send WR containing the RPC Call message
>> * @ia: interface adapter
>> * @req: Prepared RPC Call
>> @@ -516,65 +465,104 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct
>> list_head *mrs)
>> if (mr->mr_handle == rep->rr_inv_rkey) {
>> list_del_init(&mr->mr_list);
>> trace_xprtrdma_mr_remoteinv(mr);
>> - mr->frwr.fr_state = FRWR_IS_INVALID;
>> rpcrdma_mr_unmap_and_put(mr);
>> break; /* only one invalidated MR per RPC */
>> }
>> }
>>
>> +static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr)
>> +{
>> + if (wc->status != IB_WC_SUCCESS)
>> + rpcrdma_mr_recycle(mr);
>> + else
>> + rpcrdma_mr_unmap_and_put(mr);
>> +}
>> +
>> /**
>> - * frwr_unmap_sync - invalidate memory regions that were registered for @req
>> - * @r_xprt: controlling transport
>> - * @mrs: list of MRs to process
>> + * frwr_wc_localinv - Invoked by RDMA provider for a LOCAL_INV WC
>> + * @cq: completion queue (ignored)
>> + * @wc: completed WR
>> *
>> - * Sleeps until it is safe for the host CPU to access the
>> - * previously mapped memory regions.
>> + */
>> +static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
>> +{
>> + struct ib_cqe *cqe = wc->wr_cqe;
>> + struct rpcrdma_frwr *frwr =
>> + container_of(cqe, struct rpcrdma_frwr, fr_cqe);
>> + struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
>> +
>> + /* WARNING: Only wr_cqe and status are reliable at this point */
>> + __frwr_release_mr(wc, mr);
>> + trace_xprtrdma_wc_li(wc, frwr);
>> +}
>> +
>> +/**
>> + * frwr_wc_localinv_wake - Invoked by RDMA provider for a LOCAL_INV WC
>> + * @cq: completion queue (ignored)
>> + * @wc: completed WR
>> *
>> - * Caller ensures that @mrs is not empty before the call. This
>> - * function empties the list.
>> + * Awaken anyone waiting for an MR to finish being fenced.
>> */
>> -void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
>> +static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
>> +{
>> + struct ib_cqe *cqe = wc->wr_cqe;
>> + struct rpcrdma_frwr *frwr =
>> + container_of(cqe, struct rpcrdma_frwr, fr_cqe);
>> + struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
>> +
>> + /* WARNING: Only wr_cqe and status are reliable at this point */
>> + __frwr_release_mr(wc, mr);
>> + trace_xprtrdma_wc_li_wake(wc, frwr);
>> + complete(&frwr->fr_linv_done);
>> +}
>> +
>> +/**
>> + * frwr_unmap_sync - invalidate memory regions that were registered for @req
>> + * @r_xprt: controlling transport instance
>> + * @req: rpcrdma_req with a non-empty list of MRs to process
>> + *
>> + * Sleeps until it is safe for the host CPU to access the previously mapped
>> + * memory regions.
>> + */
>> +void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
>> {
>> struct ib_send_wr *first, **prev, *last;
>> const struct ib_send_wr *bad_wr;
>> - struct rpcrdma_ia *ia = &r_xprt->rx_ia;
>> struct rpcrdma_frwr *frwr;
>> struct rpcrdma_mr *mr;
>> - int count, rc;
>> + int rc;
>>
>> /* ORDER: Invalidate all of the MRs first
>> *
>> * Chain the LOCAL_INV Work Requests and post them with
>> * a single ib_post_send() call.
>> */
>> - frwr = NULL;
>> - count = 0;
>> prev = &first;
>> - list_for_each_entry(mr, mrs, mr_list) {
>> - mr->frwr.fr_state = FRWR_IS_INVALID;
>> + while (!list_empty(&req->rl_registered)) {
>
> Is this list guaranteed to always start full? Because we could potentially use
> frwr uninitialized a few lines down if it's not.
The only frwr_unmap_async looks like this:
1353 if (!list_empty(&req->rl_registered))
1354 frwr_unmap_async(r_xprt, req);
The warning is a false positive. I'll see about rearranging the logic.
> net/sunrpc/xprtrdma/frwr_ops.c: In function ‘frwr_unmap_sync':
> net/sunrpc/xprtrdma/frwr_ops.c:582:3: error: ‘frwr' may be used uninitialized in
> this function [-Werror=maybe-uninitialized]
> wait_for_completion(&frwr->fr_linv_done);
> ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>
> Thanks,
> Anna
>> + mr = rpcrdma_mr_pop(&req->rl_registered);
>>
>> - frwr = &mr->frwr;
>> trace_xprtrdma_mr_localinv(mr);
>> + r_xprt->rx_stats.local_inv_needed++;
>>
>> + frwr = &mr->frwr;
>> frwr->fr_cqe.done = frwr_wc_localinv;
>> last = &frwr->fr_invwr;
>> - memset(last, 0, sizeof(*last));
>> + last->next = NULL;
>> last->wr_cqe = &frwr->fr_cqe;
>> + last->sg_list = NULL;
>> + last->num_sge = 0;
>> last->opcode = IB_WR_LOCAL_INV;
>> + last->send_flags = IB_SEND_SIGNALED;
>> last->ex.invalidate_rkey = mr->mr_handle;
>> - count++;
>>
>> *prev = last;
>> prev = &last->next;
>> }
>> - if (!frwr)
>> - goto unmap;
>>
>> /* Strong send queue ordering guarantees that when the
>> * last WR in the chain completes, all WRs in the chain
>> * are complete.
>> */
>> - last->send_flags = IB_SEND_SIGNALED;
>> frwr->fr_cqe.done = frwr_wc_localinv_wake;
>> reinit_completion(&frwr->fr_linv_done);
>>
>> @@ -582,26 +570,18 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct
>> list_head *mrs)
>> * replaces the QP. The RPC reply handler won't call us
>> * unless ri_id->qp is a valid pointer.
>> */
>> - r_xprt->rx_stats.local_inv_needed++;
>> bad_wr = NULL;
>> - rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
>> - if (bad_wr != first)
>> - wait_for_completion(&frwr->fr_linv_done);
>> - if (rc)
>> - goto out_release;
>> + rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr);
>> + trace_xprtrdma_post_send(req, rc);
>>
>> - /* ORDER: Now DMA unmap all of the MRs, and return
>> - * them to the free MR list.
>> + /* The final LOCAL_INV WR in the chain is supposed to
>> + * do the wake. If it never gets posted, the wake will
>> + * not happen, so don't wait in that case.
>> */
>> -unmap:
>> - while (!list_empty(mrs)) {
>> - mr = rpcrdma_mr_pop(mrs);
>> - rpcrdma_mr_unmap_and_put(mr);
>> - }
>> - return;
>> -
>> -out_release:
>> - pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
>> + if (bad_wr != first)
>> + wait_for_completion(&frwr->fr_linv_done);
>> + if (!rc)
>> + return;
>>
>> /* Unmap and release the MRs in the LOCAL_INV WRs that did not
>> * get posted.
>> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
>> index 77fc1e4..6c049fd 100644
>> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
>> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
>> @@ -1277,7 +1277,7 @@ void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt,
>> struct rpcrdma_req *req)
>> * RPC has relinquished all its Send Queue entries.
>> */
>> if (!list_empty(&req->rl_registered))
>> - frwr_unmap_sync(r_xprt, &req->rl_registered);
>> + frwr_unmap_sync(r_xprt, req);
>>
>> /* Ensure that any DMA mapped pages associated with
>> * the Send of the RPC Call have been unmapped before
>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
>> index 3c39aa3..a9de116 100644
>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>> @@ -240,17 +240,9 @@ struct rpcrdma_sendctx {
>> * An external memory region is any buffer or page that is registered
>> * on the fly (ie, not pre-registered).
>> */
>> -enum rpcrdma_frwr_state {
>> - FRWR_IS_INVALID, /* ready to be used */
>> - FRWR_IS_VALID, /* in use */
>> - FRWR_FLUSHED_FR, /* flushed FASTREG WR */
>> - FRWR_FLUSHED_LI, /* flushed LOCALINV WR */
>> -};
>> -
>> struct rpcrdma_frwr {
>> struct ib_mr *fr_mr;
>> struct ib_cqe fr_cqe;
>> - enum rpcrdma_frwr_state fr_state;
>> struct completion fr_linv_done;
>> union {
>> struct ib_reg_wr fr_regwr;
>> @@ -567,8 +559,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt
>> *r_xprt,
>> struct rpcrdma_mr **mr);
>> int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req);
>> void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
>> -void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt,
>> - struct list_head *mrs);
>> +void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
>>
>> /*
>> * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
--
Chuck Lever
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic