[prev in list] [next in list] [prev in thread] [next in thread] 

List:       musl
Subject:    Re: [musl] ppc soft-float regression
From:       Rich Felker <dalias () libc ! org>
Date:       2015-05-25 23:51:20
Message-ID: 20150525235120.GX17573 () brightrain ! aerifal ! cx
[Download RAW message or body]

On Mon, May 25, 2015 at 06:46:29PM -0400, Rich Felker wrote:
> On Mon, May 25, 2015 at 05:45:12PM -0400, Rich Felker wrote:
> > @@ -74,6 +77,16 @@ void _dlstart_c(size_t *sp, size_t *dynv)
> >  		*rel_addr = (size_t)base + rel[2];
> >  	}
> >  
> > +	/* Prepare storage for stages 2 to save clobbered REL
> > +	 * addends so they can be reused in stage 3. There should
> > +	 * be very few. If something goes wrong and there are a
> > +	 * huge number, pass a null pointer to trigger stage 2
> > +	 * to abort instead of risking stack overflow. */
> > +	int too_many_addends = symbolic_rel_cnt > 4096;
> > +	size_t naddends = too_many_addends ? 1 : symbolic_rel_cnt;
> > +	size_t addends[naddends];
> > +	size_t *paddends = too_many_addends ? 0 : addends;
> > +
> >  	const char *strings = (void *)(base + dyn[DT_STRTAB]);
> >  	const Sym *syms = (void *)(base + dyn[DT_SYMTAB]);
> 
> This logic could lead to a zero-sized VLA (thus UB); instead, trying:
> 
> 	int too_many_addends = symbolic_rel_cnt > 4096;
> 	size_t naddends = too_many_addends ? 0 : symbolic_rel_cnt;
> 	size_t addends[naddends+1];
> 	size_t *paddends = too_many_addends ? 0 : addends;
> 
> Avoiding the wasteful +1 would involve more conditionals so I think
> it's best just avoiding it. Alternatively this might be
> simpler/smaller:
> 
> 	size_t addends[symbolic_rel_cnt & LIMIT-1 | 1];
> 	size_t *paddends = symbolic_rel_cnt >= LIMIT ? 0 : addends;

Attached is an updated version of the patch with much simpler logic
and the addend buffer moved into stage 2 which is now possible thanks
to commit 768b82c6de24e480267c4c251c440edfc71800e3.

Rich

["save_and_reuse_addends_v3.diff" (text/plain)]

diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index 3842aeb..42930ad 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -74,7 +74,6 @@ struct dso {
 	volatile int new_dtv_idx, new_tls_idx;
 	struct td_index *td_index;
 	struct dso *fini_next;
-	int rel_early_relative, rel_update_got;
 	char *shortname;
 	char buf[];
 };
@@ -96,6 +95,9 @@ static struct builtin_tls {
 } builtin_tls[1];
 #define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
 
+#define ADDEND_LIMIT 4096
+static size_t *saved_addends, *apply_addends_to;
+
 static struct dso ldso;
 static struct dso *head, *tail, *fini_head;
 static char *env_path, *sys_path;
@@ -256,9 +258,17 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 	size_t sym_val;
 	size_t tls_val;
 	size_t addend;
+	int skip_relative = 0, reuse_addends = 0, save_slot = 0;
+
+	if (dso == &ldso) {
+		/* Only ldso's REL table needs addend saving/reuse. */
+		if (rel == apply_addends_to)
+			reuse_addends = 1;
+		skip_relative = 1;
+	}
 
 	for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
-		if (dso->rel_early_relative && IS_RELATIVE(rel[1])) continue;
+		if (skip_relative && IS_RELATIVE(rel[1])) continue;
 		type = R_TYPE(rel[1]);
 		sym_index = R_SYM(rel[1]);
 		reloc_addr = (void *)(base + rel[0]);
@@ -280,12 +290,20 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 			def.dso = dso;
 		}
 
-		int gotplt = (type == REL_GOT || type == REL_PLT);
-		if (dso->rel_update_got && !gotplt && stride==2) continue;
-
-		addend = stride>2 ? rel[2]
-			: gotplt || type==REL_COPY ? 0
-			: *reloc_addr;
+		if (stride > 2) {
+			addend = rel[2];
+		} else if (type==REL_GOT || type==REL_PLT|| type==REL_COPY) {
+			addend = 0;
+		} else if (reuse_addends) {
+			/* Save original addend in stage 2 where the dso
+			 * chain consists of just ldso; otherwise read back
+			 * saved addend since the inline one was clobbered. */
+			if (head==&ldso)
+				saved_addends[save_slot] = *reloc_addr;
+			addend = saved_addends[save_slot++];
+		} else {
+			addend = *reloc_addr;
+		}
 
 		sym_val = def.sym ? (size_t)def.dso->base+def.sym->st_value : 0;
 		tls_val = def.sym ? def.sym->st_value : 0;
@@ -879,7 +897,7 @@ static void do_mips_relocs(struct dso *p, size_t *got)
 	size_t i, j, rel[2];
 	unsigned char *base = p->base;
 	i=0; search_vec(p->dynv, &i, DT_MIPS_LOCAL_GOTNO);
-	if (p->rel_early_relative) {
+	if (p==&ldso) {
 		got += i;
 	} else {
 		while (i--) *got++ += (size_t)base;
@@ -1125,15 +1143,29 @@ void __dls2(unsigned char *base, size_t *sp)
 	ldso.phnum = ehdr->e_phnum;
 	ldso.phdr = (void *)(base + ehdr->e_phoff);
 	ldso.phentsize = ehdr->e_phentsize;
-	ldso.rel_early_relative = 1;
 	kernel_mapped_dso(&ldso);
 	decode_dyn(&ldso);
 
+	/* Prepare storage for to save clobbered REL addends so they
+	 * can be reused in stage 3. There should be very few. If
+	 * something goes wrong and there are a huge number, abort
+	 * instead of risking stack overflow. */
+	size_t dyn[DYN_CNT];
+	decode_vec(ldso.dynv, dyn, DYN_CNT);
+	size_t *rel = (void *)(base+dyn[DT_REL]);
+	size_t rel_size = dyn[DT_RELSZ];
+	size_t symbolic_rel_cnt = 0;
+	apply_addends_to = rel;
+	for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t))
+		if (!IS_RELATIVE(rel[1])) symbolic_rel_cnt++;
+	if (symbolic_rel_cnt >= ADDEND_LIMIT) a_crash();
+	size_t addends[symbolic_rel_cnt+1];
+	saved_addends = addends;
+
 	head = &ldso;
 	reloc_all(&ldso);
 
 	ldso.relocated = 0;
-	ldso.rel_update_got = 1;
 
 	/* Call dynamic linker stage-3, __dls3, looking it up
 	 * symbolically as a barrier against moving the address


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic