[prev in list] [next in list] [prev in thread] [next in thread]
List: fuse-devel
Subject: [fuse-devel] [PATCH 12/13] fuse: Add uring sqe commit and fetch support
From: Bernd Schubert via fuse-devel <fuse-devel () lists ! sourceforge ! net>
Date: 2023-03-21 1:10:46
Message-ID: 20230321011047.3425786-13-bschubert () ddn ! com
[Download RAW message or body]
This adds support for fuse request completion through ring SQEs
(FUSE_URING_REQ_COMMIT_AND_FETCH handling). After committing
the ring entry it becomes available for new fuse requests.
Handling of requests through the ring (SQE/CQE handling)
is complete now.
Fuse request data are copied through the mmaped ring buffer,
there is no support for any zero copy yet.
Signed-off-by: Bernd Schubert <bschubert@ddn.com>
cc: Miklos Szeredi <miklos@szeredi.hu>
cc: linux-fsdevel@vger.kernel.org
cc: Amir Goldstein <amir73il@gmail.com>
cc: fuse-devel@lists.sourceforge.net
---
fs/fuse/dev.c | 1 +
fs/fuse/dev_uring.c | 408 +++++++++++++++++++++++++++++++++++++++++-
fs/fuse/dev_uring_i.h | 1 +
3 files changed, 401 insertions(+), 9 deletions(-)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index de9193f66c8b..cce55eaed8a3 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2330,6 +2330,7 @@ const struct file_operations fuse_dev_operations = {
.unlocked_ioctl = fuse_dev_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.mmap = fuse_uring_mmap,
+ .uring_cmd = fuse_uring_cmd,
};
EXPORT_SYMBOL_GPL(fuse_dev_operations);
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index 744a38064131..5c41f9f71410 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -34,6 +34,9 @@ module_param(enable_uring, bool, 0644);
MODULE_PARM_DESC(enable_uring,
"Enable uring userspace communication through uring.");
+static bool fuse_uring_ent_release_and_fetch(struct fuse_ring_ent *ring_ent);
+static void fuse_uring_send_to_ring(struct fuse_ring_ent *ring_ent);
+
static struct fuse_ring_queue *
fuse_uring_get_queue(struct fuse_conn *fc, int qid)
{
@@ -47,15 +50,6 @@ fuse_uring_get_queue(struct fuse_conn *fc, int qid)
return (struct fuse_ring_queue *)(ptr + qid * fc->ring.queue_size);
}
-/* dummy function will be replaced in later commits */
-static void fuse_uring_bit_set(struct fuse_ring_ent *ent, bool is_bg,
- const char *str)
-{
- (void)ent;
- (void)is_bg;
- (void)str;
-}
-
/* Abort all list queued request on the given ring queue */
static void fuse_uring_end_queue_requests(struct fuse_ring_queue *queue)
{
@@ -81,6 +75,363 @@ void fuse_uring_end_requests(struct fuse_conn *fc)
}
}
+/*
+ * Finalize a fuse request, then fetch and send the next entry, if available
+ *
+ * has lock/unlock/lock to avoid holding the lock on calling fuse_request_end
+ */
+static void
+fuse_uring_req_end_and_get_next(struct fuse_ring_ent *ring_ent, bool set_err,
+ int error)
+{
+ bool already = false;
+ struct fuse_req *req = ring_ent->fuse_req;
+ bool send;
+
+ spin_lock(&ring_ent->queue->lock);
+ if (ring_ent->state & FRRS_FUSE_REQ_END || !ring_ent->need_req_end)
+ already = true;
+ else {
+ ring_ent->state |= FRRS_FUSE_REQ_END;
+ ring_ent->need_req_end = 0;
+ }
+ spin_unlock(&ring_ent->queue->lock);
+
+ if (already) {
+ struct fuse_ring_queue *queue = ring_ent->queue;
+
+ if (!queue->aborted) {
+ pr_info("request end not needed state=%llu end-bit=%d\n",
+ ring_ent->state, ring_ent->need_req_end);
+ WARN_ON(1);
+ }
+ return;
+ }
+
+ if (set_err)
+ req->out.h.error = error;
+
+ fuse_request_end(ring_ent->fuse_req);
+ ring_ent->fuse_req = NULL;
+
+ send = fuse_uring_ent_release_and_fetch(ring_ent);
+ if (send)
+ fuse_uring_send_to_ring(ring_ent);
+}
+
+/*
+ * Copy data from the req to the ring buffer
+ */
+static int fuse_uring_copy_to_ring(struct fuse_conn *fc,
+ struct fuse_req *req,
+ struct fuse_ring_req *rreq)
+{
+ struct fuse_copy_state cs;
+ struct fuse_args *args = req->args;
+ int err;
+
+ fuse_copy_init(&cs, 1, NULL);
+ cs.is_uring = 1;
+ cs.ring.buf = rreq->in_out_arg;
+ cs.ring.buf_sz = fc->ring.req_arg_len;
+ cs.req = req;
+
+ pr_devel("%s:%d buf=%p len=%d args=%d\n", __func__, __LINE__,
+ cs.ring.buf, cs.ring.buf_sz, args->out_numargs);
+
+ err = fuse_copy_args(&cs, args->in_numargs, args->in_pages,
+ (struct fuse_arg *) args->in_args, 0);
+ rreq->in_out_arg_len = cs.ring.offset;
+
+ pr_devel("%s:%d buf=%p len=%d args=%d err=%d\n", __func__, __LINE__,
+ cs.ring.buf, cs.ring.buf_sz, args->out_numargs, err);
+
+ return err;
+}
+
+/*
+ * Copy data from the ring buffer to the fuse request
+ */
+static int fuse_uring_copy_from_ring(struct fuse_conn *fc,
+ struct fuse_req *req,
+ struct fuse_ring_req *rreq)
+{
+ struct fuse_copy_state cs;
+ struct fuse_args *args = req->args;
+
+ fuse_copy_init(&cs, 0, NULL);
+ cs.is_uring = 1;
+ cs.ring.buf = rreq->in_out_arg;
+
+ if (rreq->in_out_arg_len > fc->ring.req_arg_len) {
+ pr_devel("Max ring buffer len exceeded (%u vs %zu\n",
+ rreq->in_out_arg_len, fc->ring.req_arg_len);
+ return -EINVAL;
+ }
+ cs.ring.buf_sz = rreq->in_out_arg_len;
+ cs.req = req;
+
+ pr_devel("%s:%d buf=%p len=%d args=%d\n", __func__, __LINE__,
+ cs.ring.buf, cs.ring.buf_sz, args->out_numargs);
+
+ return fuse_copy_out_args(&cs, args, rreq->in_out_arg_len);
+}
+
+/**
+ * Write data to the ring buffer and send the request to userspace,
+ * userspace will read it
+ * This is comparable with classical read(/dev/fuse)
+ */
+static void fuse_uring_send_to_ring(struct fuse_ring_ent *ring_ent)
+{
+ struct fuse_conn *fc = ring_ent->queue->fc;
+ struct fuse_ring_req *rreq = ring_ent->rreq;
+ struct fuse_req *req = ring_ent->fuse_req;
+ int err = 0;
+
+ pr_devel("%s:%d ring-req=%p fuse_req=%p state=%llu args=%p\n", __func__,
+ __LINE__, ring_ent, ring_ent->fuse_req, ring_ent->state, req->args);
+
+ spin_lock(&ring_ent->queue->lock);
+ if (unlikely((ring_ent->state & FRRS_USERSPACE) ||
+ (ring_ent->state & FRRS_FREED))) {
+ pr_err("ring-req=%p buf_req=%p invalid state %llu on send\n",
+ ring_ent, rreq, ring_ent->state);
+ WARN_ON(1);
+ err = -EIO;
+ } else
+ ring_ent->state |= FRRS_USERSPACE;
+
+ ring_ent->need_cmd_done = 0;
+ spin_unlock(&ring_ent->queue->lock);
+ if (err)
+ goto err;
+
+ err = fuse_uring_copy_to_ring(fc, req, rreq);
+ if (unlikely(err)) {
+ ring_ent->state &= ~FRRS_USERSPACE;
+ ring_ent->need_cmd_done = 1;
+ goto err;
+ }
+
+ /* ring req go directly into the shared memory buffer */
+ rreq->in = req->in.h;
+
+ pr_devel("%s qid=%d tag=%d state=%llu cmd-done op=%d unique=%llu\n",
+ __func__, ring_ent->queue->qid, ring_ent->tag, ring_ent->state,
+ rreq->in.opcode, rreq->in.unique);
+
+ io_uring_cmd_done(ring_ent->cmd, 0, 0);
+ return;
+
+err:
+ fuse_uring_req_end_and_get_next(ring_ent, true, err);
+}
+
+/**
+ * Set the given ring entry as available in the queue bitmap
+ */
+static void fuse_uring_bit_set(struct fuse_ring_ent *ring_ent, bool bg,
+ const char *str)
+__must_hold(ring_ent->queue->lock)
+{
+ int old;
+ struct fuse_ring_queue *queue = ring_ent->queue;
+ const struct fuse_conn *fc = queue->fc;
+ int tag = ring_ent->tag;
+
+ old = test_and_set_bit(tag, queue->req_avail_map);
+ if (unlikely(old != 0)) {
+ pr_warn("%8s invalid bit value on clear for qid=%d tag=%d",
+ str, queue->qid, tag);
+ WARN_ON(1);
+ }
+ if (bg)
+ queue->req_bg++;
+ else
+ queue->req_fg++;
+
+ pr_devel("%35s bit set fc=%p is_bg=%d qid=%d tag=%d fg=%d bg=%d bgq: %d\n",
+ str, fc, bg, queue->qid, ring_ent->tag, queue->req_fg,
+ queue->req_bg, !list_empty(&queue->bg_queue));
+}
+
+/**
+ * Mark the ring entry as not available for other requests
+ */
+static int fuse_uring_bit_clear(struct fuse_ring_ent *ring_ent, int is_bg,
+ const char *str)
+__must_hold(ring_ent->queue->lock)
+{
+ int old;
+ struct fuse_ring_queue *queue = ring_ent->queue;
+ const struct fuse_conn *fc = queue->fc;
+ int tag = ring_ent->tag;
+ int *value = is_bg ? &queue->req_bg : &queue->req_fg;
+
+ if (unlikely(*value <= 0)) {
+ pr_warn("%s qid=%d tag=%d is_bg=%d zero req avail fg=%d bg=%d\n",
+ str, queue->qid, ring_ent->tag, is_bg,
+ queue->req_bg, queue->req_fg);
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ old = test_and_clear_bit(tag, queue->req_avail_map);
+ if (unlikely(old != 1)) {
+ pr_warn("%8s invalid bit value on clear for qid=%d tag=%d",
+ str, queue->qid, tag);
+ WARN_ON(1);
+ return -EIO;
+ }
+
+ ring_ent->rreq->flags = 0;
+
+ if (is_bg) {
+ ring_ent->rreq->flags |= FUSE_RING_REQ_FLAG_BACKGROUND;
+ queue->req_bg--;
+ } else
+ queue->req_fg--;
+
+ pr_devel("%35s ring bit clear fc=%p is_bg=%d qid=%d tag=%d fg=%d bg=%d\n",
+ str, fc, is_bg, queue->qid, ring_ent->tag,
+ queue->req_fg, queue->req_bg);
+
+ ring_ent->state |= FRRS_FUSE_REQ;
+
+ return 0;
+}
+
+/*
+ * Assign a fuse queue entry to the given entry
+ *
+ */
+static bool fuse_uring_assign_ring_entry(struct fuse_ring_ent *ring_ent,
+ struct list_head *head,
+ int is_bg)
+__must_hold(&queue.waitq.lock)
+{
+ struct fuse_req *req;
+ int res;
+
+ if (list_empty(head))
+ return false;
+
+ res = fuse_uring_bit_clear(ring_ent, is_bg, __func__);
+ if (unlikely(res))
+ return false;
+
+ req = list_first_entry(head, struct fuse_req, list);
+ list_del_init(&req->list);
+ clear_bit(FR_PENDING, &req->flags);
+ ring_ent->fuse_req = req;
+ ring_ent->need_req_end = 1;
+
+ return true;
+}
+
+/*
+ * Checks for errors and stores it into the request
+ */
+static int fuse_uring_ring_ent_has_err(struct fuse_conn *fc,
+ struct fuse_ring_ent *ring_ent)
+{
+ struct fuse_req *req = ring_ent->fuse_req;
+ struct fuse_out_header *oh = &req->out.h;
+ int err;
+
+ if (oh->unique == 0) {
+ /* Not supportd through request based uring, this needs another
+ * ring from user space to kernel
+ */
+ pr_warn("Unsupported fuse-notify\n");
+ err = -EINVAL;
+ goto seterr;
+ }
+
+ if (oh->error <= -512 || oh->error > 0) {
+ err = -EINVAL;
+ goto seterr;
+ }
+
+ if (oh->error) {
+ err = oh->error;
+ pr_devel("%s:%d err=%d op=%d req-ret=%d",
+ __func__, __LINE__, err, req->args->opcode,
+ req->out.h.error);
+ goto err; /* error already set */
+ }
+
+ if ((oh->unique & ~FUSE_INT_REQ_BIT) != req->in.h.unique) {
+
+ pr_warn("Unpexted seqno mismatch, expected: %llu got %llu\n",
+ req->in.h.unique, oh->unique & ~FUSE_INT_REQ_BIT);
+ err = -ENOENT;
+ goto seterr;
+ }
+
+ /* Is it an interrupt reply ID? */
+ if (oh->unique & FUSE_INT_REQ_BIT) {
+ err = 0;
+ if (oh->error == -ENOSYS)
+ fc->no_interrupt = 1;
+ else if (oh->error == -EAGAIN) {
+ /* XXX Needs to copy to the next cq and submit it */
+ // err = queue_interrupt(req);
+ pr_warn("Intrerupt EAGAIN not supported yet");
+ err = -EINVAL;
+ }
+
+ goto seterr;
+ }
+
+ return 0;
+
+seterr:
+ pr_devel("%s:%d err=%d op=%d req-ret=%d",
+ __func__, __LINE__, err, req->args->opcode,
+ req->out.h.error);
+ oh->error = err;
+err:
+ pr_devel("%s:%d err=%d op=%d req-ret=%d",
+ __func__, __LINE__, err, req->args->opcode,
+ req->out.h.error);
+ return err;
+}
+
+/**
+ * Read data from the ring buffer, which user space has written to
+ * This is comparible with handling of classical write(/dev/fuse).
+ * Also make the ring request available again for new fuse requests.
+ */
+static void fuse_uring_commit_and_release(struct fuse_dev *fud,
+ struct fuse_ring_ent *ring_ent)
+{
+ struct fuse_ring_req *rreq = ring_ent->rreq;
+ struct fuse_req *req = ring_ent->fuse_req;
+ ssize_t err = 0;
+ bool set_err = false;
+
+ req->out.h = rreq->out;
+
+ err = fuse_uring_ring_ent_has_err(fud->fc, ring_ent);
+ if (err) {
+ /* req->out.h.error already set */
+ pr_devel("%s:%d err=%zd oh->err=%d\n",
+ __func__, __LINE__, err, req->out.h.error);
+ goto out;
+ }
+
+ err = fuse_uring_copy_from_ring(fud->fc, req, rreq);
+ if (err)
+ set_err = true;
+
+out:
+ pr_devel("%s:%d ret=%zd op=%d req-ret=%d\n",
+ __func__, __LINE__, err, req->args->opcode, req->out.h.error);
+ fuse_uring_req_end_and_get_next(ring_ent, set_err, err);
+}
+
/*
* Release a ring request, it is no longer needed and can handle new data
*
@@ -119,6 +470,25 @@ __must_hold(&queue->lock)
ring_ent->rreq->flags = 0;
ring_ent->state = FRRS_FUSE_WAIT;
}
+
+/*
+ * Release a uring entry and fetch the next fuse request if available
+ */
+static bool fuse_uring_ent_release_and_fetch(struct fuse_ring_ent *ring_ent)
+{
+ struct fuse_ring_queue *queue = ring_ent->queue;
+ bool is_bg = !!(ring_ent->rreq->flags & FUSE_RING_REQ_FLAG_BACKGROUND);
+ bool send = false;
+ struct list_head *head = is_bg ? &queue->bg_queue : &queue->fg_queue;
+
+ spin_lock(&ring_ent->queue->lock);
+ fuse_uring_ent_release(ring_ent, queue, is_bg);
+ send = fuse_uring_assign_ring_entry(ring_ent, head, is_bg);
+ spin_unlock(&ring_ent->queue->lock);
+
+ return send;
+}
+
/**
* Simplified ring-entry release function, for shutdown only
*/
@@ -810,6 +1180,26 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
ret = fuse_uring_fetch(ring_ent, cmd);
break;
+ case FUSE_URING_REQ_COMMIT_AND_FETCH:
+ if (unlikely(!fc->ring.ready)) {
+ pr_info("commit and fetch, but the ring is not ready yet");
+ goto out;
+ }
+
+ if (!(prev_state & FRRS_USERSPACE)) {
+ pr_info("qid=%d tag=%d state %llu misses %d\n",
+ queue->qid, ring_ent->tag, ring_ent->state,
+ FRRS_USERSPACE);
+ goto out;
+ }
+
+ /* XXX Test inject error */
+
+ WRITE_ONCE(ring_ent->cmd, cmd);
+ fuse_uring_commit_and_release(fud, ring_ent);
+
+ ret = 0;
+ break;
default:
ret = -EINVAL;
pr_devel("Unknown uring command %d", cmd_op);
diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
index 4032dccca8b6..b0ef36215b80 100644
--- a/fs/fuse/dev_uring_i.h
+++ b/fs/fuse/dev_uring_i.h
@@ -13,6 +13,7 @@ void fuse_uring_end_requests(struct fuse_conn *fc);
int fuse_uring_ioctl(struct file *file, struct fuse_uring_cfg *cfg);
void fuse_uring_ring_destruct(struct fuse_conn *fc);
int fuse_uring_mmap(struct file *filp, struct vm_area_struct *vma);
+int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
#endif
--
2.37.2
--
fuse-devel mailing list
To unsubscribe or subscribe, visit https://lists.sourceforge.net/lists/listinfo/fuse-devel
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic