[prev in list] [next in list] [prev in thread] [next in thread]
List: linux-bcache
Subject: Re: [PATCH v2] Separate bch_moving_gc() from bch_btree_gc()
From: Eric Wheeler <bcache () lists ! ewheeler ! net>
Date: 2023-06-28 23:37:57
Message-ID: 4dac5ba5-fac3-3383-45ed-ca8c24a033b0 () ewheeler ! net
[Download RAW message or body]
On Wed, 28 Jun 2023, Mingzhe Zou wrote:
> From: Mingzhe Zou <zoumingzhe@qq.com>
>
> Moving gc uses cache->heap to defragment disk. Unlike btree gc,
> moving gc only takes up part of the disk bandwidth.
>
> The number of heap is constant. However, the buckets released by
> each moving gc is limited. So bch_moving_gc() needs to be called
> multiple times.
>
> If bch_gc_thread() always calls bch_btree_gc(), it will block
> the IO request.This patch allows bch_gc_thread() to only call
> bch_moving_gc() when there are many fragments.
>
> Signed-off-by: Mingzhe Zou <mingzhe.zou@easystack.cn>
Hi Mingzhe,
Could this be used to free buckets down to a minimum bucket count?
See more below:
> ---
> drivers/md/bcache/bcache.h | 4 ++-
> drivers/md/bcache/btree.c | 66 ++++++++++++++++++++++++++++++++++--
> drivers/md/bcache/movinggc.c | 2 ++
> 3 files changed, 68 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
> index 5a79bb3c272f..155deff0ce05 100644
> --- a/drivers/md/bcache/bcache.h
> +++ b/drivers/md/bcache/bcache.h
> @@ -461,7 +461,8 @@ struct cache {
> * until a gc finishes - otherwise we could pointlessly burn a ton of
> * cpu
> */
> - unsigned int invalidate_needs_gc;
> + unsigned int invalidate_needs_gc:1;
> + unsigned int only_moving_gc:1;
>
> bool discard; /* Get rid of? */
>
> @@ -629,6 +630,7 @@ struct cache_set {
> struct gc_stat gc_stats;
> size_t nbuckets;
> size_t avail_nbuckets;
> + size_t fragment_nbuckets;
>
> struct task_struct *gc_thread;
> /* Where in the btree gc currently is */
> diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
> index 68b9d7ca864e..6698a4480e05 100644
> --- a/drivers/md/bcache/btree.c
> +++ b/drivers/md/bcache/btree.c
> @@ -88,6 +88,7 @@
> * Test module load/unload
> */
>
> +#define COPY_GC_PERCENT 5
> #define MAX_NEED_GC 64
> #define MAX_SAVE_PRIO 72
> #define MAX_GC_TIMES 100
> @@ -1705,6 +1706,7 @@ static void btree_gc_start(struct cache_set *c)
>
> mutex_lock(&c->bucket_lock);
>
> + set_gc_sectors(c);
> c->gc_mark_valid = 0;
> c->gc_done = ZERO_KEY;
>
> @@ -1825,8 +1827,51 @@ static void bch_btree_gc(struct cache_set *c)
> memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
>
> trace_bcache_gc_end(c);
> +}
> +
> +extern unsigned int bch_cutoff_writeback;
> +extern unsigned int bch_cutoff_writeback_sync;
> +
> +static bool moving_gc_should_run(struct cache_set *c)
> +{
> + struct bucket *b;
> + struct cache *ca = c->cache;
> + size_t moving_gc_threshold = ca->sb.bucket_size >> 2, frag_percent;
> + unsigned long used_buckets = 0, frag_buckets = 0, move_buckets = 0;
> + unsigned long dirty_sectors = 0, frag_sectors, used_sectors;
> +
> + if (c->gc_stats.in_use > bch_cutoff_writeback_sync)
> + return true;
> +
> + mutex_lock(&c->bucket_lock);
> + for_each_bucket(b, ca) {
> + if (GC_MARK(b) != GC_MARK_DIRTY)
> + continue;
> +
> + used_buckets++;
> +
> + used_sectors = GC_SECTORS_USED(b);
> + dirty_sectors += used_sectors;
> +
> + if (used_sectors < ca->sb.bucket_size)
> + frag_buckets++;
>
> - bch_moving_gc(c);
> + if (used_sectors <= moving_gc_threshold)
> + move_buckets++;
> + }
> + mutex_unlock(&c->bucket_lock);
> +
> + c->fragment_nbuckets = frag_buckets;
> + frag_sectors = used_buckets * ca->sb.bucket_size - dirty_sectors;
> + frag_percent = div_u64(frag_sectors * 100, ca->sb.bucket_size * c->nbuckets);
> +
> + if (move_buckets > ca->heap.size)
> + return true;
> +
> + if (frag_percent >= COPY_GC_PERCENT)
> + return true;
For example, could we add this to `moving_gc_should_run`:
+ if (used_buckets >= MAX_USED_BUCKETS)
+ return true;
to solve the issue in this thread:
https://www.spinics.net/lists/linux-bcache/msg11746.html
where MAX_USED_BUCKETS is a placeholder for a sysfs tunable like
`cache_max_used_percent` ?
-Eric
> +
> + return false;
> }
>
> static bool gc_should_run(struct cache_set *c)
> @@ -1839,6 +1884,19 @@ static bool gc_should_run(struct cache_set *c)
> if (atomic_read(&c->sectors_to_gc) < 0)
> return true;
>
> + /*
> + * Moving gc uses cache->heap to defragment disk. Unlike btree gc,
> + * moving gc only takes up part of the disk bandwidth.
> + * The number of heap is constant. However, the buckets released by
> + * each moving gc is limited. So bch_moving_gc() needs to be called
> + * multiple times. If bch_gc_thread() always calls bch_btree_gc(),
> + * it will block the IO request.
> + */
> + if (c->copy_gc_enabled && moving_gc_should_run(c)) {
> + ca->only_moving_gc = 1;
> + return true;
> + }
> +
> return false;
> }
>
> @@ -1856,8 +1914,10 @@ static int bch_gc_thread(void *arg)
> test_bit(CACHE_SET_IO_DISABLE, &c->flags))
> break;
>
> - set_gc_sectors(c);
> - bch_btree_gc(c);
> + if (!c->cache->only_moving_gc)
> + bch_btree_gc(c);
> +
> + bch_moving_gc(c);
> }
>
> wait_for_kthread_stop();
> diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
> index 9f32901fdad1..04da088cefe8 100644
> --- a/drivers/md/bcache/movinggc.c
> +++ b/drivers/md/bcache/movinggc.c
> @@ -200,6 +200,8 @@ void bch_moving_gc(struct cache_set *c)
> struct bucket *b;
> unsigned long sectors_to_move, reserve_sectors;
>
> + c->cache->only_moving_gc = 0;
> +
> if (!c->copy_gc_enabled)
> return;
>
> --
> 2.17.1.windows.2
>
>
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic