[prev in list] [next in list] [prev in thread] [next in thread] 

List:       gnulib-bug
Subject:    fixes for regex undefined behavior in shifts
From:       Paul Eggert <eggert () CS ! UCLA ! EDU>
Date:       2005-08-31 19:41:41
Message-ID: 87vf1lnbbe.fsf () penguin ! cs ! ucla ! edu
[Download RAW message or body]

I installed this and filed glibc bug 1278.

2005-08-31  Paul Eggert  <eggert@cs.ucla.edu>

	* lib/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char):
	(optimize_subexps, lower_subexp):
	Don't assume 1<<31 has defined behavior on hosts with 32-bit int,
	since the signed shift might overflow.  Use 1u<<31 instead.
	* lib/regex_internal.h (bitset_set, bitset_clear, bitset_contain):
	Likewise.
	* lib/regexec.c (check_dst_limits_calc_pos_1):
	(check_subexp_matching_top): Likewise.
	* lib/regcomp.c (optimize_subexps, lower_subexp):
	Use CHAR_BIT rather than 8, for clarity.
	* lib/regexec.c (check_dst_limits_calc_pos_1):
	(check_subexp_matching_top): Likewise.
	* lib/regcomp.c (init_dfa): Make table_size unsigned, so that we don't
	have to worry about portability issues when shifting it left.
	Remove no-longer-needed test for table_size > 0.
	* lib/regcomp.c (parse_sub_exp): Do not shift more bits than there are
	in a word, as the resulting behavior is undefined.
	* lib/regexec.c (check_dst_limits_calc_pos_1): Likewise;
	in one case, a <= should have been an <, and in another case the
	whole test was missing.
	* lib/regex_internal.h (BYTE_BITS): Remove.  All uses changed to
	the standard name CHAR_BIT.
	* lib/regexec.c (match_ctx_add_entry): Don't assume that ~0 == -1;
	this is not true on one's complement and signed-magnitude hosts.
	* config/srclist.txt: Add glibc bug 1278.

--- lib/regcomp.c	31 Aug 2005 18:08:34 -0000	1.10
+++ lib/regcomp.c	31 Aug 2005 19:09:45 -0000
@@ -336,7 +336,7 @@ re_compile_fastmap_iter (regex_t *bufp, 
 	  int i, j, ch;
 	  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
 	    for (j = 0; j < UINT_BITS; ++j, ++ch)
-	      if (dfa->nodes[node].opr.sbcset[i] & (1 << j))
+	      if (dfa->nodes[node].opr.sbcset[i] & (1u << j))
 		re_set_fastmap (fastmap, icase, ch);
 	}
 #ifdef RE_ENABLE_I18N
@@ -798,7 +798,7 @@ re_compile_internal (regex_t *preg, cons
 static reg_errcode_t
 init_dfa (re_dfa_t *dfa, int pat_len)
 {
-  int table_size;
+  unsigned int table_size;
 #ifndef _LIBC
   char *codeset_name;
 #endif
@@ -812,7 +812,7 @@ init_dfa (re_dfa_t *dfa, int pat_len)
   dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
 
   /*  table_size = 2 ^ ceil(log pat_len) */
-  for (table_size = 1; table_size > 0; table_size <<= 1)
+  for (table_size = 1; ; table_size <<= 1)
     if (table_size > pat_len)
       break;
 
@@ -872,7 +872,7 @@ init_dfa (re_dfa_t *dfa, int pat_len)
 	      {
 		wint_t wch = __btowc (ch);
 		if (wch != WEOF)
-		  dfa->sb_char[i] |= 1 << j;
+		  dfa->sb_char[i] |= 1u << j;
 # ifndef _LIBC
 		if (isascii (ch) && wch != ch)
 		  dfa->map_notascii = 1;
@@ -899,7 +899,7 @@ init_word_char (re_dfa_t *dfa)
   for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
     for (j = 0; j < UINT_BITS; ++j, ++ch)
       if (isalnum (ch) || ch == '_')
-	dfa->word_char[i] |= 1 << j;
+	dfa->word_char[i] |= 1u << j;
 }
 
 /* Free the work area which are only used while compiling.  */
@@ -1222,8 +1222,8 @@ optimize_subexps (void *extra, bin_tree_
         node->left->parent = node;
 
       dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
-      if (other_idx < 8 * sizeof (dfa->used_bkref_map))
-	dfa->used_bkref_map &= ~(1 << other_idx);
+      if (other_idx < CHAR_BIT * sizeof dfa->used_bkref_map)
+	dfa->used_bkref_map &= ~(1u << other_idx);
     }
 
   return REG_NOERROR;
@@ -1266,8 +1266,8 @@ lower_subexp (reg_errcode_t *err, regex_
 	 very common, so we do not lose much.  An example that triggers
 	 this case is the sed "script" /\(\)/x.  */
       && node->left != NULL
-      && (node->token.opr.idx >= 8 * sizeof (dfa->used_bkref_map)
-	  || !(dfa->used_bkref_map & (1 << node->token.opr.idx))))
+      && (node->token.opr.idx >= CHAR_BIT * sizeof dfa->used_bkref_map
+	  || !(dfa->used_bkref_map & (1u << node->token.opr.idx))))
     return node->left;
 
   /* Convert the SUBEXP node to the concatenation of an
@@ -2380,7 +2380,9 @@ parse_sub_exp (re_string_t *regexp, rege
       if (BE (*err != REG_NOERROR, 0))
 	return NULL;
     }
-  dfa->completed_bkref_map |= 1 << cur_nsub;
+
+  if (cur_nsub <= '9' - '1')
+    dfa->completed_bkref_map |= 1 << cur_nsub;
 
   tree = create_tree (dfa, tree, NULL, SUBEXP);
   if (BE (tree == NULL, 0))
--- lib/regex_internal.h	31 Aug 2005 18:08:34 -0000	1.8
+++ lib/regex_internal.h	31 Aug 2005 19:09:45 -0000
@@ -90,8 +90,6 @@
 # define inline
 #endif
 
-/* Number of bits in a byte.  */
-#define BYTE_BITS 8
 /* Number of single byte character.  */
 #define SBC_MAX 256
 
@@ -124,16 +122,16 @@ extern const char __re_error_msgid[] att
 extern const size_t __re_error_msgid_idx[] attribute_hidden;
 
 /* Number of bits in an unsinged int.  */
-#define UINT_BITS (sizeof (unsigned int) * BYTE_BITS)
+#define UINT_BITS (sizeof (unsigned int) * CHAR_BIT)
 /* Number of unsigned int in an bit_set.  */
 #define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)
 typedef unsigned int bitset[BITSET_UINTS];
 typedef unsigned int *re_bitset_ptr_t;
 typedef const unsigned int *re_const_bitset_ptr_t;
 
-#define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS)
-#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS))
-#define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS))
+#define bitset_set(set,i) (set[i / UINT_BITS] |= 1u << i % UINT_BITS)
+#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1u << i % UINT_BITS))
+#define bitset_contain(set,i) (set[i / UINT_BITS] & (1u << i % UINT_BITS))
 #define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)
 #define bitset_set_all(set) \
   memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)
--- lib/regexec.c	26 Aug 2005 05:58:55 -0000	1.9
+++ lib/regexec.c	31 Aug 2005 19:09:45 -0000
@@ -1895,8 +1895,9 @@ check_dst_limits_calc_pos_1 (re_match_co
 		  if (ent->node != node)
 		    continue;
 
-		  if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map)
-		      && !(ent->eps_reachable_subexps_map & (1 << subexp_idx)))
+		  if (subexp_idx
+		      < CHAR_BIT * sizeof ent->eps_reachable_subexps_map
+		      && !(ent->eps_reachable_subexps_map & (1u << subexp_idx)))
 		    continue;
 
 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
@@ -1922,7 +1923,9 @@ check_dst_limits_calc_pos_1 (re_match_co
 		  if (cpos == 0 && (boundaries & 2))
 		    return 0;
 
-		  ent->eps_reachable_subexps_map &= ~(1 << subexp_idx);
+		  if (subexp_idx
+		      < CHAR_BIT * sizeof ent->eps_reachable_subexps_map)
+		    ent->eps_reachable_subexps_map &= ~(1u << subexp_idx);
 	        }
 	      while (ent++->more);
 	    }
@@ -2376,8 +2379,8 @@ check_subexp_matching_top (re_match_cont
     {
       int node = cur_nodes->elems[node_idx];
       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
-	  && dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map))
-	  && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
+	  && dfa->nodes[node].opr.idx < CHAR_BIT * sizeof dfa->used_bkref_map
+	  && dfa->used_bkref_map & (1u << dfa->nodes[node].opr.idx))
 	{
 	  err = match_ctx_add_subtop (mctx, node, str_idx);
 	  if (BE (err != REG_NOERROR, 0))
@@ -4166,7 +4169,7 @@ match_ctx_add_entry (re_match_context_t 
      A backreference does not epsilon-transition unless it is empty, so set
      to all zeros if FROM != TO.  */
   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
-    = (from == to ? ~0 : 0);
+    = (from == to ? -1 : 0);
 
   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
   if (mctx->max_mb_elem_len < to - from)
--- config/srclist.txt.~1.89.~	2005-08-31 11:08:34.000000000 -0700
+++ config/srclist.txt	2005-08-31 12:38:57.000000000 -0700
@@ -103,6 +103,7 @@ $LIBCSRC/stdlib/getsubopt.c		lib gpl
 # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1240
 # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1241
 # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1273
+# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1278
 #$LIBCSRC/posix/regcomp.c		lib gpl
 #
 # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1238
@@ -134,6 +135,7 @@ $LIBCSRC/stdlib/getsubopt.c		lib gpl
 # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1245
 # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1248
 # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1273
+# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1278
 #$LIBCSRC/posix/regex_internal.h		lib gpl
 #
 # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1216
@@ -144,6 +146,7 @@ $LIBCSRC/stdlib/getsubopt.c		lib gpl
 # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1237
 # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1241
 # http://sources.redhat.com/bugzilla/show_bug.cgi?id=1245
+# http://sources.redhat.com/bugzilla/show_bug.cgi?id=1278
 #$LIBCSRC/posix/regexec.c		lib gpl
 #
 # c89 changes $LIBCSRC/string/strdup.c		lib gpl



[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic