[prev in list] [next in list] [prev in thread] [next in thread] 

List:       openbsd-tech
Subject:    Re: malloc: micro optimizations
From:       Masato Asou <asou () soum ! co ! jp>
Date:       2023-10-26 0:58:29
Message-ID: 20231026.100010.1344226175332966621.asou () soum ! co ! jp
[Download RAW message or body]

Hi,

It works fine for me.
ok asou@
--
ASOU Masato

From: Otto Moerbeek <otto@drijf.net>
Date: Wed, 25 Oct 2023 11:04:01 +0200

> Hi,
> 
> a few micro-optimization, including getting rid of some statistics
> that are not actualy very interesting.
> 
> Speedup amounts to a few tenths of percents to a few percents,
> depending on how biased the benchmark is.
> 
> 	-Otto
> 
> Index: stdlib/malloc.c
> ===================================================================
> RCS file: /home/cvs/src/lib/libc/stdlib/malloc.c,v
> retrieving revision 1.291
> diff -u -p -r1.291 malloc.c
> --- stdlib/malloc.c	22 Oct 2023 12:19:26 -0000	1.291
> +++ stdlib/malloc.c	24 Oct 2023 14:05:37 -0000
> @@ -169,16 +169,12 @@ struct dir_info {
>  	void *caller;
>  	size_t inserts;
>  	size_t insert_collisions;
> -	size_t finds;
> -	size_t find_collisions;
>  	size_t deletes;
>  	size_t delete_moves;
>  	size_t cheap_realloc_tries;
>  	size_t cheap_reallocs;
>  	size_t malloc_used;		/* bytes allocated */
>  	size_t malloc_guarded;		/* bytes used for guards */
> -	size_t pool_searches;		/* searches for pool */
> -	size_t other_pool;		/* searches in other pool */
>  #define STATS_ADD(x,y)	((x) += (y))
>  #define STATS_SUB(x,y)	((x) -= (y))
>  #define STATS_INC(x)	((x)++)
> @@ -209,12 +205,14 @@ static void unmap(struct dir_info *d, vo
>  struct chunk_info {
>  	LIST_ENTRY(chunk_info) entries;
>  	void *page;			/* pointer to the page */
> +	/* number of shorts should add up to 8, check alloc_chunk_info() */
>  	u_short canary;
>  	u_short bucket;
>  	u_short free;			/* how many free chunks */
>  	u_short total;			/* how many chunks */
>  	u_short offset;			/* requested size table offset */
> -	u_short bits[1];		/* which chunks are free */
> +#define CHUNK_INFO_TAIL			3
> +	u_short bits[CHUNK_INFO_TAIL];	/* which chunks are free */
>  };
>  
>  #define CHUNK_FREE(i, n)	((i)->bits[(n) / MALLOC_BITS] & (1U << ((n) % MALLOC_BITS)))
> @@ -656,12 +654,10 @@ find(struct dir_info *d, void *p)
>  	index = hash(p) & mask;
>  	r = d->r[index].p;
>  	q = MASK_POINTER(r);
> -	STATS_INC(d->finds);
>  	while (q != p && r != NULL) {
>  		index = (index - 1) & mask;
>  		r = d->r[index].p;
>  		q = MASK_POINTER(r);
> -		STATS_INC(d->find_collisions);
>  	}
>  	return (q == p && r != NULL) ? &d->r[index] : NULL;
>  }
> @@ -949,7 +945,7 @@ init_chunk_info(struct dir_info *d, stru
>  
>  	p->bucket = bucket;
>  	p->total = p->free = MALLOC_PAGESIZE / B2ALLOC(bucket);
> -	p->offset = bucket == 0 ? 0xdead : howmany(p->total, MALLOC_BITS);
> +	p->offset = howmany(p->total, MALLOC_BITS);
>  	p->canary = (u_short)d->canary1;
>  
>  	/* set all valid bits in the bitmap */
> @@ -971,8 +967,13 @@ alloc_chunk_info(struct dir_info *d, u_i
>  		count = MALLOC_PAGESIZE / B2ALLOC(bucket);
>  
>  		size = howmany(count, MALLOC_BITS);
> -		size = sizeof(struct chunk_info) + (size - 1) * sizeof(u_short);
> -		if (mopts.chunk_canaries)
> +		/* see declaration of struct chunk_info */
> +		if (size <= CHUNK_INFO_TAIL)
> +			size = 0;
> +		else
> +			size -= CHUNK_INFO_TAIL;
> +		size = sizeof(struct chunk_info) + size * sizeof(u_short);
> +		if (mopts.chunk_canaries && bucket > 0)
>  			size += count * sizeof(u_short);
>  		size = _ALIGN(size);
>  		count = MALLOC_PAGESIZE / size;
> @@ -1129,8 +1130,7 @@ fill_canary(char *ptr, size_t sz, size_t
>  static void *
>  malloc_bytes(struct dir_info *d, size_t size)
>  {
> -	u_int i, r, bucket, listnum;
> -	size_t k;
> +	u_int i, k, r, bucket, listnum;
>  	u_short	*lp;
>  	struct chunk_info *bp;
>  	void *p;
> @@ -1170,7 +1170,7 @@ malloc_bytes(struct dir_info *d, size_t 
>  	/* no bit halfway, go to next full short */
>  	i /= MALLOC_BITS;
>  	for (;;) {
> -		if (++i >= howmany(bp->total, MALLOC_BITS))
> +		if (++i >= bp->offset)
>  			i = 0;
>  		lp = &bp->bits[i];
>  		if (*lp) {
> @@ -1228,7 +1228,7 @@ validate_canary(struct dir_info *d, u_ch
>  	}
>  }
>  
> -static uint32_t
> +static inline uint32_t
>  find_chunknum(struct dir_info *d, struct chunk_info *info, void *ptr, int check)
>  {
>  	uint32_t chunknum;
> @@ -1532,12 +1532,10 @@ findpool(void *p, struct dir_info *argpo
>  	struct dir_info *pool = argpool;
>  	struct region_info *r = find(pool, p);
>  
> -	STATS_INC(pool->pool_searches);
>  	if (r == NULL) {
>  		u_int i, nmutexes;
>  
>  		nmutexes = mopts.malloc_pool[1]->malloc_mt ? mopts.malloc_mutexes : 2;
> -		STATS_INC(pool->other_pool);
>  		for (i = 1; i < nmutexes; i++) {
>  			u_int j = (argpool->mutex + i) & (nmutexes - 1);
>  
> @@ -2581,13 +2579,10 @@ malloc_dump1(int poolno, struct dir_info
>  		    d->mmap_flag);
>  		ulog("Region slots free %zu/%zu\n",
>  			d->regions_free, d->regions_total);
> -		ulog("Finds %zu/%zu\n", d->finds, d->find_collisions);
>  		ulog("Inserts %zu/%zu\n", d->inserts, d->insert_collisions);
>  		ulog("Deletes %zu/%zu\n", d->deletes, d->delete_moves);
>  		ulog("Cheap reallocs %zu/%zu\n",
>  		    d->cheap_reallocs, d->cheap_realloc_tries);
> -		ulog("Other pool searches %zu/%zu\n",
> -		    d->other_pool, d->pool_searches);
>  		ulog("In use %zu\n", d->malloc_used);
>  		ulog("Guarded %zu\n", d->malloc_guarded);
>  		dump_free_chunk_info(d, leaks);
> 

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic