[prev in list] [next in list] [prev in thread] [next in thread]
List: linux-xfs
Subject: Re: [PATCH 6/8] xfs: have getfsmap fall back to the freesp btrees when rmap is not present
From: "Darrick J. Wong" <darrick.wong () oracle ! com>
Date: 2017-02-24 22:33:52
Message-ID: 20170224223352.GM5846 () birch ! djwong ! org
[Download RAW message or body]
On Fri, Feb 24, 2017 at 09:48:52AM -0800, Darrick J. Wong wrote:
> On Fri, Feb 24, 2017 at 08:04:49AM -0500, Brian Foster wrote:
> > On Fri, Feb 17, 2017 at 05:17:55PM -0800, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <darrick.wong@oracle.com>
> > >
> > > If the reverse-mapping btree isn't available, fall back to the
> > > free space btrees to provide partial reverse mapping information.
> > > The online scrub tool can make use of even partial information to
> > > speed up the data block scan.
> > >
> > > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > ---
> > > fs/xfs/xfs_fsmap.c | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> > > 1 file changed, 152 insertions(+), 3 deletions(-)
> > >
> > >
> > > diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
> > > index 09d6b92..82e44a9 100644
> > > --- a/fs/xfs/xfs_fsmap.c
> > > +++ b/fs/xfs/xfs_fsmap.c
> > > @@ -40,6 +40,7 @@
> > > #include "xfs_fsmap.h"
> > > #include "xfs_refcount.h"
> > > #include "xfs_refcount_btree.h"
> > > +#include "xfs_alloc_btree.h"
> > >
> > > /* Convert an xfs_fsmap to an fsmap. */
> > > void
> > > @@ -158,6 +159,9 @@ xfs_fsmap_owner_from_rmap(
> > > case XFS_RMAP_OWN_COW:
> > > fmr->fmr_owner = XFS_FMR_OWN_COW;
> > > break;
> > > + case XFS_RMAP_OWN_NULL: /* "free" */
> > > + fmr->fmr_owner = XFS_FMR_OWN_FREE;
> > > + break;
> > > default:
> > > return -EFSCORRUPTED;
> > > }
> > > @@ -433,6 +437,31 @@ xfs_getfsmap_rtdev_helper(
> > > return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
> > > }
> > >
> > > +/* Transform a bnobt irec into a fsmap */
> > > +STATIC int
> > > +xfs_getfsmap_datadev_bnobt_helper(
> > > + struct xfs_btree_cur *cur,
> > > + struct xfs_alloc_rec_incore *rec,
> > > + void *priv)
> > > +{
> > > + struct xfs_mount *mp = cur->bc_mp;
> > > + struct xfs_getfsmap_info *info = priv;
> > > + struct xfs_rmap_irec irec;
> > > + xfs_fsblock_t fsb;
> > > + xfs_daddr_t rec_daddr;
> > > +
> > > + fsb = XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, rec->ar_startblock);
> > > + rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
> > > +
> > > + irec.rm_startblock = rec->ar_startblock;
> > > + irec.rm_blockcount = rec->ar_blockcount;
> > > + irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
> > > + irec.rm_offset = 0;
> > > + irec.rm_flags = 0;
> > > +
> >
> > Slightly confused... if we directly pass free space records to
> > xfs_getfsmap_helper(), won't it also assume the gaps between the records
> > are free space?
> >
> > > + return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr);
> > > +}
> > > +
> > > /* Set rmap flags based on the getfsmap flags */
> > > static void
> > > xfs_getfsmap_set_irec_flags(
> > > @@ -621,6 +650,125 @@ xfs_getfsmap_datadev(
> > > return error;
> > > }
> > >
> > > +/* Execute a getfsmap query against the regular data device's bnobt. */
> > > +STATIC int
> > > +xfs_getfsmap_datadev_bnobt(
> > > + struct xfs_trans *tp,
> > > + struct xfs_fsmap *keys,
> > > + struct xfs_getfsmap_info *info)
> > > +{
> > > + struct xfs_mount *mp = tp->t_mountp;
> > > + struct xfs_btree_cur *bt_cur = NULL;
> > > + struct xfs_fsmap *dkey_low;
> > > + struct xfs_fsmap *dkey_high;
> > > + struct xfs_alloc_rec_incore alow;
> > > + struct xfs_alloc_rec_incore ahigh;
> > > + xfs_fsblock_t start_fsb;
> > > + xfs_fsblock_t end_fsb;
> > > + xfs_agnumber_t start_ag;
> > > + xfs_agnumber_t end_ag;
> > > + xfs_daddr_t eofs;
> > > + int error = 0;
> > > +
> > > + dkey_low = keys;
> > > + dkey_high = keys + 1;
> > > + eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
> > > + if (dkey_low->fmr_physical >= eofs)
> > > + return 0;
> > > + if (dkey_high->fmr_physical >= eofs)
> > > + dkey_high->fmr_physical = eofs - 1;
> > > + start_fsb = XFS_DADDR_TO_FSB(mp, dkey_low->fmr_physical);
> > > + end_fsb = XFS_DADDR_TO_FSB(mp, dkey_high->fmr_physical);
> > > +
> > > + /* Set up search keys */
> > > + info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
> > > + info->low.rm_offset = XFS_BB_TO_FSBT(mp, dkey_low->fmr_offset);
> > > + error = xfs_fsmap_owner_to_rmap(dkey_low, &info->low);
> > > + if (error)
> > > + return error;
> > > + info->low.rm_blockcount = 0;
> > > + xfs_getfsmap_set_irec_flags(&info->low, dkey_low);
> > > +
> > > + info->high.rm_startblock = -1U;
> > > + info->high.rm_owner = ULLONG_MAX;
> > > + info->high.rm_offset = ULLONG_MAX;
> > > + info->high.rm_blockcount = 0;
> > > + info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
> > > +
> > > + info->missing_owner = XFS_FMR_OWN_UNKNOWN;
> > > +
> >
> > Ok, I see. We've swapped the meaning of the gaps in the record space to
> > allocated blocks with an unknown owner. Comment please :). Also, I
> > suppose the comments in xfs_getfsmap_helper() could be more discrete
> > about this as they currently explicitly say that gaps refer to free
> > space in one or two places.
>
> Er... yeah. My first reaction to this was "Oh, I should introduce
> info.missing_owner in the first patch" but apparently I already did but
> evidently forgot to update the comments. :(
>
> So, just to be clear, xfs_getfsmap_helper notices gaps between the rmaps
> it's being fed, and emits synthesized fsmaps for the gaps that are owned
> by info.missing_owner.
>
> > > + start_ag = XFS_FSB_TO_AGNO(mp, start_fsb);
> > > + end_ag = XFS_FSB_TO_AGNO(mp, end_fsb);
> > > +
> > > + /* Query each AG */
> > > + for (info->agno = start_ag; info->agno <= end_ag; info->agno++) {
> > > + if (info->agno == end_ag) {
> > > + info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp,
> > > + end_fsb);
> > > + info->high.rm_offset = XFS_BB_TO_FSBT(mp,
> > > + dkey_high->fmr_offset);
> > > + error = xfs_fsmap_owner_to_rmap(dkey_high, &info->high);
> > > + if (error)
> > > + goto err;
> > > + xfs_getfsmap_set_irec_flags(&info->high, dkey_high);
> > > + }
> > > +
> > > + if (bt_cur) {
> > > + xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
> > > + bt_cur = NULL;
> > > + info->agf_bp = NULL;
> > > + }
> > > +
> > > + error = xfs_alloc_read_agf(mp, tp, info->agno, 0,
> > > + &info->agf_bp);
> > > + if (error)
> > > + goto err;
> > > +
> > > + trace_xfs_fsmap_low_key(mp, info->dev, info->agno,
> > > + info->low.rm_startblock,
> > > + info->low.rm_blockcount,
> > > + info->low.rm_owner,
> > > + info->low.rm_offset);
> > > +
> > > + trace_xfs_fsmap_high_key(mp, info->dev, info->agno,
> > > + info->high.rm_startblock,
> > > + info->high.rm_blockcount,
> > > + info->high.rm_owner,
> > > + info->high.rm_offset);
> > > +
> > > + bt_cur = xfs_allocbt_init_cursor(mp, tp, info->agf_bp,
> > > + info->agno, XFS_BTNUM_BNO);
> > > + alow.ar_startblock = info->low.rm_startblock;
> > > + ahigh.ar_startblock = info->high.rm_startblock;
> > > + error = xfs_alloc_query_range(bt_cur, &alow, &ahigh,
> > > + xfs_getfsmap_datadev_bnobt_helper, info);
> > > + if (error)
> > > + goto err;
> > > +
> > > + if (info->agno == start_ag) {
> > > + info->low.rm_startblock = 0;
> > > + info->low.rm_owner = 0;
> > > + info->low.rm_offset = 0;
> > > + info->low.rm_flags = 0;
> > > + }
> > > + }
> > > +
> > > + /* Report any free space at the end of the AG */
> > > + info->last = true;
> > > + error = xfs_getfsmap_datadev_bnobt_helper(bt_cur, &ahigh, info);
> > > + if (error)
> > > + goto err;
> > > +
> >
> > I guess the comment here is a little misleading too because we aren't
> > looking for more free space. Rather, any more allocated ranges beyond
> > the last free space record, right?
>
> Yes. "Report any used space at the end of the AG"
>
> > FWIW, this handler overall looks quite similar to xfs_datadev_helper().
> > It might be interesting to see if the guts of the two could be combined
> > into an internal function (called by both _datadev() and
> > _datadev_bnobt() as wrappers) that has the unique bits passed to it
> > (i.e., cursor, missing owner).
>
> Hmm. You're right, I bet they could be parameterized fairly easily.
Yes, they can be parameterized pretty easily. The two rtdev getfsmap
implementations can also be collapsed into a single driver function,
which is particularly slick if I introduce the rtalloc query_range
functions as part of the getfsmap patchset, because then I no longer
have to open-code iteration of the rtbitmap free extents.
--D
>
> --D
>
> >
> > Brian
> >
> > > +err:
> > > + if (bt_cur)
> > > + xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR :
> > > + XFS_BTREE_NOERROR);
> > > + if (info->agf_bp)
> > > + info->agf_bp = NULL;
> > > +
> > > + return error;
> > > +}
> > > +
> > > /* Do we recognize the device? */
> > > STATIC bool
> > > xfs_getfsmap_is_valid_device(
> > > @@ -689,8 +837,6 @@ xfs_getfsmap(
> > > int i;
> > > int error = 0;
> > >
> > > - if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
> > > - return -EOPNOTSUPP;
> > > if (head->fmh_iflags & ~FMH_IF_VALID)
> > > return -EINVAL;
> > > rkey_low = head->fmh_keys;
> > > @@ -704,7 +850,10 @@ xfs_getfsmap(
> > > /* Set up our device handlers. */
> > > memset(handlers, 0, sizeof(handlers));
> > > handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
> > > - handlers[0].fn = xfs_getfsmap_datadev;
> > > + if (xfs_sb_version_hasrmapbt(&mp->m_sb))
> > > + handlers[0].fn = xfs_getfsmap_datadev;
> > > + else
> > > + handlers[0].fn = xfs_getfsmap_datadev_bnobt;
> > > if (mp->m_logdev_targp != mp->m_ddev_targp) {
> > > handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
> > > handlers[1].fn = xfs_getfsmap_logdev;
> > >
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at http://vger.kernel.org/majordomo-info.html
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic