[prev in list] [next in list] [prev in thread] [next in thread] 

List:       git
Subject:    Re: [PATCH] Enable index-pack threading in msysgit.
From:       Stefan Zager <szager () chromium ! org>
Date:       2014-03-21 5:35:52
Message-ID: CAHOQ7J8eEUd+NpL78RQqGFYzhD9Fs0hdGOHhmXiujJdGrfeS=A () mail ! gmail ! com
[Download RAW message or body]

On Thu, Mar 20, 2014 at 10:21 PM, Duy Nguyen <pclouds@gmail.com> wrote:
> On Fri, Mar 21, 2014 at 08:51:18AM +0700, Duy Nguyen wrote:
>> On Thu, Mar 20, 2014 at 11:08 PM, Stefan Zager <szager@chromium.org> wrote:
>> > Duy, would you like to re-post your patch without the new pread implementation?
>>
>> I will but let me try out the sliding window idea first. My quick
>> tests on git.git show me we may only need 21k mmap instead of 177k
>> pread. That hints some potential performance improvement.
>
> The patch at the bottom reuses (un)use_pack() instead of pread(). The
> results on linux-2.6 do not look any different. I guess we can drop
> the idea.
>
> It makes me wonder, though, what's wrong a simple patch like this to
> make pread in index-pack thread-safe? It does not look any different
> either from the performance point of view, perhaps because
> unpack_data() reads small deltas most of the time

When you serialize disk access in this way, the effect on performance
is really dependent on the behavior of the OS, as well as the locality
of the read offsets.  Assuming -- fairly, I think -- that the reads
will be pretty randomly distributed (i.e., no locality to speak of),
then your best bet is get as many read operations in flight as
possible, and let the disk scheduler optimize the seek time.

If I have a chance, I will try out this patch in msysgit on the
chromium repositories.

>
> -- 8< --
> diff --git a/builtin/index-pack.c b/builtin/index-pack.c
> index a6b1c17..b91f4f8 100644
> --- a/builtin/index-pack.c
> +++ b/builtin/index-pack.c
> @@ -40,11 +40,6 @@ struct base_data {
>         int ofs_first, ofs_last;
>  };
>
> -#if !defined(NO_PTHREADS) && defined(NO_THREAD_SAFE_PREAD)
> -/* pread() emulation is not thread-safe. Disable threading. */
> -#define NO_PTHREADS
> -#endif
> -
>  struct thread_local {
>  #ifndef NO_PTHREADS
>         pthread_t thread;
> @@ -175,6 +170,22 @@ static void cleanup_thread(void)
>  #endif
>
>
> +#if defined(NO_THREAD_SAFE_PREAD)
> +static inline ssize_t pread_safe(int fd, void *buf, size_t count, off_t off)
> +{
> +       int ret;
> +       read_lock();
> +       ret = pread(fd, buf, count, off);
> +       read_unlock();
> +       return ret;
> +}
> +#else
> +static inline ssize_t pread_safe(int fd, void *buf, size_t count, off_t off)
> +{
> +       return pread(fd, buf, count, off);
> +}
> +#endif
> +
>  static int mark_link(struct object *obj, int type, void *data)
>  {
>         if (!obj)
> @@ -542,7 +553,7 @@ static void *unpack_data(struct object_entry *obj,
>
>         do {
>                 ssize_t n = (len < 64*1024) ? len : 64*1024;
> -               n = pread(pack_fd, inbuf, n, from);
> +               n = pread_safe(pack_fd, inbuf, n, from);
>                 if (n < 0)
>                         die_errno(_("cannot pread pack file"));
>                 if (!n)
> -- 8< --
>
> And the sliding window patch for the list archive
>
> -- 8< --
> diff --git a/builtin/index-pack.c b/builtin/index-pack.c
> index a6b1c17..6f5c6d9 100644
> --- a/builtin/index-pack.c
> +++ b/builtin/index-pack.c
> @@ -91,7 +91,8 @@ static off_t consumed_bytes;
>  static unsigned deepest_delta;
>  static git_SHA_CTX input_ctx;
>  static uint32_t input_crc32;
> -static int input_fd, output_fd, pack_fd;
> +static int input_fd, output_fd;
> +static struct packed_git *pack;
>
>  #ifndef NO_PTHREADS
>
> @@ -224,8 +225,10 @@ static unsigned check_objects(void)
>  static void flush(void)
>  {
>         if (input_offset) {
> -               if (output_fd >= 0)
> +               if (output_fd >= 0) {
>                         write_or_die(output_fd, input_buffer, input_offset);
> +                       pack->pack_size += input_offset;
> +               }
>                 git_SHA1_Update(&input_ctx, input_buffer, input_offset);
>                 memmove(input_buffer, input_buffer + input_offset, input_len);
>                 input_offset = 0;
> @@ -277,6 +280,10 @@ static void use(int bytes)
>
>  static const char *open_pack_file(const char *pack_name)
>  {
> +       pack = xmalloc(sizeof(*pack) + 1);
> +       memset(pack, 0, sizeof(*pack));
> +       pack->pack_name[0] = '\0';
> +
>         if (from_stdin) {
>                 input_fd = 0;
>                 if (!pack_name) {
> @@ -288,13 +295,17 @@ static const char *open_pack_file(const char *pack_name)
>                         output_fd = open(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600);
>                 if (output_fd < 0)
>                         die_errno(_("unable to create '%s'"), pack_name);
> -               pack_fd = output_fd;
> +               pack->pack_fd = output_fd;
>         } else {
> +               struct stat st;
>                 input_fd = open(pack_name, O_RDONLY);
>                 if (input_fd < 0)
>                         die_errno(_("cannot open packfile '%s'"), pack_name);
> +               if (lstat(pack_name, &st))
> +                       die_errno(_("cannot stat packfile '%s'"), pack_name);
>                 output_fd = -1;
> -               pack_fd = input_fd;
> +               pack->pack_fd = input_fd;
> +               pack->pack_size = st.st_size;
>         }
>         git_SHA1_Init(&input_ctx);
>         return pack_name;
> @@ -531,9 +542,15 @@ static void *unpack_data(struct object_entry *obj,
>         unsigned char *data, *inbuf;
>         git_zstream stream;
>         int status;
> +       struct pack_window *w_cursor = NULL;
> +
> +       if (from + len > pack->pack_size)
> +               die(Q_("premature end of pack file, %lu byte missing",
> +                      "premature end of pack file, %lu bytes missing",
> +                      from + len - pack->pack_size),
> +                   (unsigned long)(from + len - pack->pack_size));
>
>         data = xmalloc(consume ? 64*1024 : obj->size);
> -       inbuf = xmalloc((len < 64*1024) ? len : 64*1024);
>
>         memset(&stream, 0, sizeof(stream));
>         git_inflate_init(&stream);
> @@ -541,15 +558,12 @@ static void *unpack_data(struct object_entry *obj,
>         stream.avail_out = consume ? 64*1024 : obj->size;
>
>         do {
> -               ssize_t n = (len < 64*1024) ? len : 64*1024;
> -               n = pread(pack_fd, inbuf, n, from);
> -               if (n < 0)
> -                       die_errno(_("cannot pread pack file"));
> -               if (!n)
> -                       die(Q_("premature end of pack file, %lu byte missing",
> -                              "premature end of pack file, %lu bytes missing",
> -                              len),
> -                           len);
> +               ssize_t n;
> +               unsigned long left;
> +               read_lock();
> +               inbuf = use_pack(pack, &w_cursor, from, &left);
> +               read_unlock();
> +               n = left > len ? len : left;
>                 from += n;
>                 len -= n;
>                 stream.next_in = inbuf;
> @@ -568,6 +582,9 @@ static void *unpack_data(struct object_entry *obj,
>                                 stream.avail_out = 64*1024;
>                         } while (status == Z_OK && stream.avail_in);
>                 }
> +               read_lock();
> +               unuse_pack(&w_cursor);
> +               read_unlock();
>         } while (len && status == Z_OK && !stream.avail_in);
>
>         /* This has been inflated OK when first encountered, so... */
> @@ -575,7 +592,6 @@ static void *unpack_data(struct object_entry *obj,
>                 die(_("serious inflate inconsistency"));
>
>         git_inflate_end(&stream);
> -       free(inbuf);
>         if (consume) {
>                 free(data);
>                 data = NULL;
> @@ -1657,6 +1673,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
>         free(objects);
>         free(index_name_buf);
>         free(keep_name_buf);
> +       close_pack_windows(pack);
> +       free(pack);
>         if (pack_name == NULL)
>                 free((void *) curr_pack);
>         if (index_name == NULL)
> diff --git a/sha1_file.c b/sha1_file.c
> index 187f5a6..aa0b16d 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -977,7 +977,13 @@ unsigned char *use_pack(struct packed_git *p,
>          */
>         if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
>                 die("packfile %s cannot be accessed", p->pack_name);
> -       if (offset > (p->pack_size - 20))
> +       /*
> +        * index-pack uses this function even if the pack is not
> +        * complete yet (i.e. trailing SHA-1 missing). Loosen the
> +        * check a bit in this case (pack_empty name uses as the
> +        * indicator).
> +        */
> +       if (offset > (p->pack_size - (*p->pack_name ? 20 : 0)))
>                 die("offset beyond end of packfile (truncated pack?)");
>
>         if (!win || !in_window(win, offset)) {
> -- 8< --
>
> --
> Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic