[prev in list] [next in list] [prev in thread] [next in thread] 

List:       busybox
Subject:    Re: [PATCH v2] shuf: speed-up when limited output is requested
From:       Denys Vlasenko <vda.linux () googlemail ! com>
Date:       2021-08-22 13:43:16
Message-ID: CAK1hOcORJ1+-0iHmBsp=z0nSL1eMHRMRFNUOk08S+uso0ygtTQ () mail ! gmail ! com
[Download RAW message or body]

Applied, thank you

On Sat, Aug 7, 2021 at 10:42 AM Ron Yorston <rmy@pobox.com> wrote:
>
> A user noted that the following command was slower than they
> expected:
>
>    busybox shuf -i "1500000000-$(date +%s)" -n 5
>
> At time of writing the range contains 128 million values.  On my
> system this takes 7.7s whereas 'shuf' from coreutils takes a
> handful of milliseconds.
>
> Optimise BusyBox 'shuf' for cases where -n is specified by stopping
> shuffling once the required number of lines have been processed.
> On my system the time for the example is reduced to 0.4s.
>
> function                                             old     new   delta
> shuf_main                                            520     540     +20
> ------------------------------------------------------------------------------
> (add/remove: 0/0 grow/shrink: 1/0 up/down: 20/0)               Total: 20 bytes
>
> v2: Code shrink.  Since outlines <= numlines:
>     - the loop in shuffle_lines() only needs to test the value of
>       outlines;
>     - shuffle_lines() can be called unconditionally.
>     Update timing to allow for the 13 million seconds elapsed since v1.
>
> Signed-off-by: Ron Yorston <rmy@pobox.com>
> ---
>  coreutils/shuf.c | 27 ++++++++++++++-------------
>  1 file changed, 14 insertions(+), 13 deletions(-)
>
> diff --git a/coreutils/shuf.c b/coreutils/shuf.c
> index fdbd3e9b2..50dfa249d 100644
> --- a/coreutils/shuf.c
> +++ b/coreutils/shuf.c
> @@ -39,8 +39,10 @@
>
>  /*
>   * Use the Fisher-Yates shuffle algorithm on an array of lines.
> + * If the required number of output lines is less than the total
> + * we can stop shuffling early.
>   */
> -static void shuffle_lines(char **lines, unsigned numlines)
> +static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines)
>  {
>         unsigned i;
>         unsigned r;
> @@ -48,7 +50,7 @@ static void shuffle_lines(char **lines, unsigned numlines)
>
>         srand(monotonic_us());
>
> -       for (i = numlines-1; i > 0; i--) {
> +       for (i = numlines-1; outlines > 0; i--, outlines--) {
>                 r = rand();
>                 /* RAND_MAX can be as small as 32767 */
>                 if (i > RAND_MAX)
> @@ -67,7 +69,7 @@ int shuf_main(int argc, char **argv)
>         char *opt_i_str, *opt_n_str, *opt_o_str;
>         unsigned i;
>         char **lines;
> -       unsigned numlines;
> +       unsigned numlines, outlines;
>         char eol;
>
>         opts = getopt32(argv, "^"
> @@ -128,24 +130,23 @@ int shuf_main(int argc, char **argv)
>                 fclose_if_not_stdin(fp);
>         }
>
> -       if (numlines != 0)
> -               shuffle_lines(lines, numlines);
> +       outlines = numlines;
> +       if (opts & OPT_n) {
> +               outlines = xatou(opt_n_str);
> +               if (outlines > numlines)
> +                       outlines = numlines;
> +       }
> +
> +       shuffle_lines(lines, numlines, outlines);
>
>         if (opts & OPT_o)
>                 xmove_fd(xopen(opt_o_str, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO);
>
> -       if (opts & OPT_n) {
> -               unsigned maxlines;
> -               maxlines = xatou(opt_n_str);
> -               if (numlines > maxlines)
> -                       numlines = maxlines;
> -       }
> -
>         eol = '\n';
>         if (opts & OPT_z)
>                 eol = '\0';
>
> -       for (i = 0; i < numlines; i++) {
> +       for (i = numlines-outlines; i < numlines; i++) {
>                 if (opts & OPT_i)
>                         printf("%u%c", (unsigned)(uintptr_t)lines[i], eol);
>                 else
> --
> 2.31.1
>
> _______________________________________________
> busybox mailing list
> busybox@busybox.net
> http://lists.busybox.net/mailman/listinfo/busybox
_______________________________________________
busybox mailing list
busybox@busybox.net
http://lists.busybox.net/mailman/listinfo/busybox
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic