[prev in list] [next in list] [prev in thread] [next in thread] 

List:       perl5-porters
Subject:    [perl #76810] [PATCH]:  REVISED.  reorder bits to prepare for regex
From:       karl williamson <public () khwilliamson ! com>
Date:       2010-07-31 22:24:48
Message-ID: 4C54A2B0.5030800 () khwilliamson ! com
[Download RAW message or body]

This is a slight revision of the patch submitted a few days ago.  I 
restructured the commits to make it slightly easier to review, and to 
more isolate the binary breakages into just 2 of the commits.

Also pushed to git://github.com/khwilliamson/perl.git
branch regex_mods

> 
> make regen needed; breaks binary compatibility.
> 
> This series of commits reorders the bit fields in re extflags and 
> op_pmflags, so that all the unused bits are at the boundary of the 
> shared and non-shared portions of those structures.  This will allow 
> easier allocation in the future to either portion without having to 
> reorder things again, and thus being able to allocate new bits without 
> breaking binary compatibility.
> 
> I also changed things so that there is a single point of coupling in 
> each of op.h and regexp.h with op_reg_common.h.  This allows for fewer 
> source changes in the future when bits are allocated.
> 
> I wasn't sure if perldelta now needs to be explicit about changing 
> binary compatibility, so created a patch to do that.

["0001-regexp.h-Fix-error-check-to-use-correct-offset.patch" (text/x-patch)]

From 432ce3aa0add647b66a49208ea4880e20ae27031 Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Sat, 31 Jul 2010 13:49:27 -0600
Subject: [PATCH] regexp.h: Fix error check to use correct offset

---
 regexp.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/regexp.h b/regexp.h
index d984031..ee46135 100644
--- a/regexp.h
+++ b/regexp.h
@@ -339,7 +339,7 @@ and check for NULL.
 #define RXf_SKIPWHITE		(1<<(_RXf_PMf_SHIFT+24)) /* Pattern is for a split / / */
 #define RXf_WHITE		(1<<(_RXf_PMf_SHIFT+25)) /* Pattern is /\s+/ */
 #define RXf_NULL		(1<<(_RXf_PMf_SHIFT+26)) /* Pattern is // */
-#if _RXf_PMf_SHIFT+23 > 31
+#if _RXf_PMf_SHIFT+26 > 31
 #   error Too many RXf_PMf bits used.  See regnodes.h for any spare in middle
 #endif
 
-- 
1.5.6.3


["0002-op.h-Fix-error-msg.patch" (text/x-patch)]

From b4d71f6f4e5dc7e55ba3afff7096641067bb82a2 Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Sat, 31 Jul 2010 13:57:51 -0600
Subject: [PATCH] op.h: Fix error msg

---
 op.h |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/op.h b/op.h
index 7873a74..04fc374 100644
--- a/op.h
+++ b/op.h
@@ -391,7 +391,7 @@ struct pmop {
 #define PMf_NONDESTRUCT	(1<<(_RXf_PMf_SHIFT+10))
 
 #if _RXf_PMf_SHIFT+10 > 31
-#   error Too many RXf_PMf bits used.  See above and regnodes.h for any spare in middle
+#   error Too many PMf_ bits used.  See above and regnodes.h for any spare in middle
 #endif
 
 /* mask of bits that need to be transfered to re->extflags */
-- 
1.5.6.3


["0003-regexp.h-op.h-decouple-mostly-from-op_reg_common.h.patch" (text/x-patch)]

From a14a1bf8dbe253f7a0e90f757dd94840d4fa64c2 Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Sat, 31 Jul 2010 14:01:46 -0600
Subject: [PATCH] regexp.h, op.h: decouple mostly from op_reg_common.h

This patch changes the shift bases to new ones local in the files that
are set to the common one.  Thus, there is now a single point of
coupling between in each file to the common one.
---
 op.h     |   24 +++++++++++++-----------
 regexp.h |   50 ++++++++++++++++++++++++++------------------------
 2 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/op.h b/op.h
index 04fc374..f402d76 100644
--- a/op.h
+++ b/op.h
@@ -360,37 +360,39 @@ struct pmop {
 #define PM_SETRE(o,r)   ((o)->op_pmregexp = (r))
 #endif
 
+#define PMf_BASE_SHIFT _RXf_PMf_SHIFT
+
 /* taint $1 etc. if target tainted */
-#define PMf_RETAINT	(1<<(_RXf_PMf_SHIFT+1))
+#define PMf_RETAINT	(1<<(PMf_BASE_SHIFT+1))
 
 /* match successfully only once per reset, with related flag RXf_USED in
  * re->extflags holding state.  This is used only for ?? matches, and only on
  * OP_MATCH and OP_QR */
-#define PMf_ONCE	(1<<(_RXf_PMf_SHIFT+2))
+#define PMf_ONCE	(1<<(PMf_BASE_SHIFT+2))
 
 /* replacement contains variables */
-#define PMf_MAYBE_CONST (1<<(_RXf_PMf_SHIFT+3))
+#define PMf_MAYBE_CONST (1<<(PMf_BASE_SHIFT+3))
 
 /* PMf_ONCE has matched successfully.  Not used under threading. */
-#define PMf_USED        (1<<(_RXf_PMf_SHIFT+4))
+#define PMf_USED        (1<<(PMf_BASE_SHIFT+4))
 
 /* subst replacement is constant */
-#define PMf_CONST	(1<<(_RXf_PMf_SHIFT+5))
+#define PMf_CONST	(1<<(PMf_BASE_SHIFT+5))
 
 /* keep 1st runtime pattern forever */
-#define PMf_KEEP	(1<<(_RXf_PMf_SHIFT+6))
-#define PMf_GLOBAL	(1<<(_RXf_PMf_SHIFT+7))	/* pattern had a g modifier */
+#define PMf_KEEP	(1<<(PMf_BASE_SHIFT+6))
+#define PMf_GLOBAL	(1<<(PMf_BASE_SHIFT+7))	/* pattern had a g modifier */
 
 /* don't reset pos() if //g fails */
-#define PMf_CONTINUE	(1<<(_RXf_PMf_SHIFT+8))
+#define PMf_CONTINUE	(1<<(PMf_BASE_SHIFT+8))
 
 /* evaluating replacement as expr */
-#define PMf_EVAL	(1<<(_RXf_PMf_SHIFT+9))
+#define PMf_EVAL	(1<<(PMf_BASE_SHIFT+9))
 
 /* Return substituted string instead of modifying it. */
-#define PMf_NONDESTRUCT	(1<<(_RXf_PMf_SHIFT+10))
+#define PMf_NONDESTRUCT	(1<<(PMf_BASE_SHIFT+10))
 
-#if _RXf_PMf_SHIFT+10 > 31
+#if PMf_BASE_SHIFT+10 > 31
 #   error Too many PMf_ bits used.  See above and regnodes.h for any spare in middle
 #endif
 
diff --git a/regexp.h b/regexp.h
index ee46135..ec89788 100644
--- a/regexp.h
+++ b/regexp.h
@@ -291,55 +291,57 @@ and check for NULL.
  *
  */
 
+#define RXf_BASE_SHIFT _RXf_PMf_SHIFT
+
 /* Anchor and GPOS related stuff */
-#define RXf_ANCH_BOL    	(1<<(_RXf_PMf_SHIFT+3))
-#define RXf_ANCH_MBOL   	(1<<(_RXf_PMf_SHIFT+4))
-#define RXf_ANCH_SBOL   	(1<<(_RXf_PMf_SHIFT+5))
-#define RXf_ANCH_GPOS   	(1<<(_RXf_PMf_SHIFT+6))
-#define RXf_GPOS_SEEN   	(1<<(_RXf_PMf_SHIFT+7))
-#define RXf_GPOS_FLOAT  	(1<<(_RXf_PMf_SHIFT+8))
+#define RXf_ANCH_BOL    	(1<<(RXf_BASE_SHIFT+3))
+#define RXf_ANCH_MBOL   	(1<<(RXf_BASE_SHIFT+4))
+#define RXf_ANCH_SBOL   	(1<<(RXf_BASE_SHIFT+5))
+#define RXf_ANCH_GPOS   	(1<<(RXf_BASE_SHIFT+6))
+#define RXf_GPOS_SEEN   	(1<<(RXf_BASE_SHIFT+7))
+#define RXf_GPOS_FLOAT  	(1<<(RXf_BASE_SHIFT+8))
 /* two bits here */
 #define RXf_ANCH        	(RXf_ANCH_BOL|RXf_ANCH_MBOL|RXf_ANCH_GPOS|RXf_ANCH_SBOL)
 #define RXf_GPOS_CHECK          (RXf_GPOS_SEEN|RXf_ANCH_GPOS)
 #define RXf_ANCH_SINGLE         (RXf_ANCH_SBOL|RXf_ANCH_GPOS)
 
 /* What we have seen */
-#define RXf_LOOKBEHIND_SEEN	(1<<(_RXf_PMf_SHIFT+9))
-#define RXf_EVAL_SEEN   	(1<<(_RXf_PMf_SHIFT+10))
-#define RXf_CANY_SEEN   	(1<<(_RXf_PMf_SHIFT+11))
+#define RXf_LOOKBEHIND_SEEN	(1<<(RXf_BASE_SHIFT+9))
+#define RXf_EVAL_SEEN   	(1<<(RXf_BASE_SHIFT+10))
+#define RXf_CANY_SEEN   	(1<<(RXf_BASE_SHIFT+11))
 
 /* Special */
-#define RXf_NOSCAN      	(1<<(_RXf_PMf_SHIFT+12))
-#define RXf_CHECK_ALL   	(1<<(_RXf_PMf_SHIFT+13))
+#define RXf_NOSCAN      	(1<<(RXf_BASE_SHIFT+12))
+#define RXf_CHECK_ALL   	(1<<(RXf_BASE_SHIFT+13))
 
 /* UTF8 related */
-#define RXf_MATCH_UTF8  	(1<<(_RXf_PMf_SHIFT+15))
+#define RXf_MATCH_UTF8  	(1<<(RXf_BASE_SHIFT+15))
 
 /* Intuit related */
-#define RXf_USE_INTUIT_NOML	(1<<(_RXf_PMf_SHIFT+16))
-#define RXf_USE_INTUIT_ML	(1<<(_RXf_PMf_SHIFT+17))
-#define RXf_INTUIT_TAIL 	(1<<(_RXf_PMf_SHIFT+18))
+#define RXf_USE_INTUIT_NOML	(1<<(RXf_BASE_SHIFT+16))
+#define RXf_USE_INTUIT_ML	(1<<(RXf_BASE_SHIFT+17))
+#define RXf_INTUIT_TAIL 	(1<<(RXf_BASE_SHIFT+18))
 
 /*
   Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will
   be used by regex engines to check whether they should set
   RXf_SKIPWHITE
 */
-#define RXf_SPLIT		(1<<(_RXf_PMf_SHIFT+19))
+#define RXf_SPLIT		(1<<(RXf_BASE_SHIFT+19))
 
 #define RXf_USE_INTUIT		(RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML)
 
 /* Copy and tainted info */
-#define RXf_COPY_DONE   	(1<<(_RXf_PMf_SHIFT+20))
-#define RXf_TAINTED_SEEN	(1<<(_RXf_PMf_SHIFT+21))
-#define RXf_TAINTED		(1<<(_RXf_PMf_SHIFT+22)) /* this pattern is tainted */
+#define RXf_COPY_DONE   	(1<<(RXf_BASE_SHIFT+20))
+#define RXf_TAINTED_SEEN	(1<<(RXf_BASE_SHIFT+21))
+#define RXf_TAINTED		(1<<(RXf_BASE_SHIFT+22)) /* this pattern is tainted */
 
 /* Flags indicating special patterns */
-#define RXf_START_ONLY		(1<<(_RXf_PMf_SHIFT+23)) /* Pattern is /^/ */
-#define RXf_SKIPWHITE		(1<<(_RXf_PMf_SHIFT+24)) /* Pattern is for a split / / */
-#define RXf_WHITE		(1<<(_RXf_PMf_SHIFT+25)) /* Pattern is /\s+/ */
-#define RXf_NULL		(1<<(_RXf_PMf_SHIFT+26)) /* Pattern is // */
-#if _RXf_PMf_SHIFT+26 > 31
+#define RXf_START_ONLY		(1<<(RXf_BASE_SHIFT+23)) /* Pattern is /^/ */
+#define RXf_SKIPWHITE		(1<<(RXf_BASE_SHIFT+24)) /* Pattern is for a split / / */
+#define RXf_WHITE		(1<<(RXf_BASE_SHIFT+25)) /* Pattern is /\s+/ */
+#define RXf_NULL		(1<<(RXf_BASE_SHIFT+26)) /* Pattern is // */
+#if RXf_BASE_SHIFT+26 > 31
 #   error Too many RXf_PMf bits used.  See regnodes.h for any spare in middle
 #endif
 
-- 
1.5.6.3


["0004-op_reg_common.h-Refactor-variable-for-safety.patch" (text/x-patch)]

From ce0be529759a16b1ebf23a40a368afb72d8580af Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Sat, 31 Jul 2010 14:13:40 -0600
Subject: [PATCH] op_reg_common.h: Refactor variable for safety

This patch changes the variable that tells how many common bits there
are to instead be +1 that value, so bits won't get reused.  A later
commit will renumber the bits in op.h and regexp.h, but for now things
are left as-is there, which means the base variables in those two files
must subtract one to compensate for the +1
---
 op.h            |    2 +-
 op_reg_common.h |    4 +++-
 regexp.h        |    2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/op.h b/op.h
index f402d76..d00f74f 100644
--- a/op.h
+++ b/op.h
@@ -360,7 +360,7 @@ struct pmop {
 #define PM_SETRE(o,r)   ((o)->op_pmregexp = (r))
 #endif
 
-#define PMf_BASE_SHIFT _RXf_PMf_SHIFT
+#define PMf_BASE_SHIFT ((_RXf_PMf_SHIFT_NEXT)-1)
 
 /* taint $1 etc. if target tainted */
 #define PMf_RETAINT	(1<<(PMf_BASE_SHIFT+1))
diff --git a/op_reg_common.h b/op_reg_common.h
index b0fd273..de100dc 100644
--- a/op_reg_common.h
+++ b/op_reg_common.h
@@ -24,4 +24,6 @@
 #define RXf_PMf_LOCALE		(1 << 5)
 #define PMf_LOCALE		(1 << 5)
 
-#define _RXf_PMf_SHIFT 5    /* Begins with '_' so won't be exported by B */
+/* Next available bit after the above.  Name begins with '_' so won't be
+ * exported by B */
+#define _RXf_PMf_SHIFT_NEXT 6
diff --git a/regexp.h b/regexp.h
index ec89788..5246c12 100644
--- a/regexp.h
+++ b/regexp.h
@@ -291,7 +291,7 @@ and check for NULL.
  *
  */
 
-#define RXf_BASE_SHIFT _RXf_PMf_SHIFT
+#define RXf_BASE_SHIFT ((_RXf_PMf_SHIFT_NEXT)-1)
 
 /* Anchor and GPOS related stuff */
 #define RXf_ANCH_BOL    	(1<<(RXf_BASE_SHIFT+3))
-- 
1.5.6.3


["0005-op_reg_common.h-Move-things-around.patch" (text/x-patch)]

From 8f76eb800065fce10f331ee6a583b0896eab825d Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Sat, 31 Jul 2010 14:18:03 -0600
Subject: [PATCH] op_reg_common.h: Move things around

Moving the definitions of the duplicate variables makes it easier to
read.
---
 op_reg_common.h |   13 +++++++------
 1 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/op_reg_common.h b/op_reg_common.h
index de100dc..ece2599 100644
--- a/op_reg_common.h
+++ b/op_reg_common.h
@@ -12,18 +12,19 @@
 /* These defines are used in both op.h and regexp.h  The definitions use the
  * shift form so that ext/B/defsubs_h.PL will pick them up */
 #define RXf_PMf_MULTILINE	(1 << 0)	/* /m */
-#define PMf_MULTILINE	        (1 << 0)	/* /m */
 #define RXf_PMf_SINGLELINE	(1 << 1)	/* /s */
-#define PMf_SINGLELINE	        (1 << 1)	/* /s */
 #define RXf_PMf_FOLD	        (1 << 2)	/* /i */
-#define PMf_FOLD	        (1 << 2)	/* /i */
 #define RXf_PMf_EXTENDED	(1 << 3)	/* /x */
-#define PMf_EXTENDED	        (1 << 3)	/* /x */
 #define RXf_PMf_KEEPCOPY	(1 << 4)	/* /p */
-#define PMf_KEEPCOPY	        (1 << 4)	/* /p */
 #define RXf_PMf_LOCALE		(1 << 5)
-#define PMf_LOCALE		(1 << 5)
 
 /* Next available bit after the above.  Name begins with '_' so won't be
  * exported by B */
 #define _RXf_PMf_SHIFT_NEXT 6
+
+#define PMf_MULTILINE	RXf_PMf_MULTILINE
+#define PMf_SINGLELINE	RXf_PMf_SINGLELINE
+#define PMf_FOLD	RXf_PMf_FOLD
+#define PMf_EXTENDED	RXf_PMf_EXTENDED
+#define PMf_KEEPCOPY	RXf_PMf_KEEPCOPY
+#define PMf_LOCALE	RXf_PMf_LOCALE
-- 
1.5.6.3


["0006-regexp.h-Nit-in-comments.patch" (text/x-patch)]

From e5573404603acb4640915e1e79018a4736346a35 Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Sat, 31 Jul 2010 15:10:52 -0600
Subject: [PATCH] regexp.h: Nit in comments

---
 regexp.h |    8 ++++----
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/regexp.h b/regexp.h
index 5246c12..7b48a48 100644
--- a/regexp.h
+++ b/regexp.h
@@ -271,13 +271,13 @@ and check for NULL.
 #define NONDESTRUCT_PAT_MODS "r"
 
 /* This string is expected by regcomp.c to be ordered so that the first
- * character is the flag in bit 0 of extflags; the next character is bit 1,
- * etc. */
+ * character is the flag in bit RXf_PMf_STD_PMMOD_SHIFT of extflags; the next
+ * character is bit +1, etc. */
 #define STD_PAT_MODS        "msix"
 
 /* This string is expected by XS_re_regexp_pattern() in universal.c to be ordered
- * so that the first character is the flag in bit 0 of extflags; the next
- * character is bit 1, etc. */
+ * so that the first character is the flag in bit RXf_PMf_STD_PMMOD_SHIFT of
+ * extflags; the next character is in bit +1, etc. */
 #define INT_PAT_MODS    STD_PAT_MODS    KEEPCOPY_PAT_MODS
 
 #define EXT_PAT_MODS    ONCE_PAT_MODS   KEEPCOPY_PAT_MODS
-- 
1.5.6.3


["0007-op_reg_common.h-Continue-refactoring.patch" (text/x-patch)]

From 30936099ff65477788df1cc5bc47ca9cd429e946 Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Sat, 31 Jul 2010 15:25:25 -0600
Subject: [PATCH] op_reg_common.h: Continue refactoring

The new op_reg_common.h did not have in it all the things that made
sense for it to have, including some comment changes that I should have
made when I created it.

I also realized the the new mechanism of using shifts allowed
RXf_PMf_STD_PMMOD_SHIFT to actually control things, rather than be a
 #define that one had to remember to change if those things changed
independently.

Finally, I created a check so that adding bits without adding them to
RXf_PMf_COMPILETIME will force a compilation error.  (This came from the
school of hard knocks)
---
 op.h            |    3 ---
 op_reg_common.h |   43 +++++++++++++++++++++++++++++++++++--------
 regexp.h        |   18 ++++--------------
 3 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/op.h b/op.h
index d00f74f..6bf996d 100644
--- a/op.h
+++ b/op.h
@@ -396,9 +396,6 @@ struct pmop {
 #   error Too many PMf_ bits used.  See above and regnodes.h for any spare in middle
 #endif
 
-/* mask of bits that need to be transfered to re->extflags */
-#define PMf_COMPILETIME	(PMf_MULTILINE|PMf_SINGLELINE|PMf_LOCALE|PMf_FOLD|PMf_EXTENDED|PMf_KEEPCOPY)
                
-
 #ifdef USE_ITHREADS
 
 #  define PmopSTASHPV(o)						\
diff --git a/op_reg_common.h b/op_reg_common.h
index ece2599..4a1eb51 100644
--- a/op_reg_common.h
+++ b/op_reg_common.h
@@ -10,17 +10,33 @@
  */
 
 /* These defines are used in both op.h and regexp.h  The definitions use the
- * shift form so that ext/B/defsubs_h.PL will pick them up */
-#define RXf_PMf_MULTILINE	(1 << 0)	/* /m */
-#define RXf_PMf_SINGLELINE	(1 << 1)	/* /s */
-#define RXf_PMf_FOLD	        (1 << 2)	/* /i */
-#define RXf_PMf_EXTENDED	(1 << 3)	/* /x */
-#define RXf_PMf_KEEPCOPY	(1 << 4)	/* /p */
-#define RXf_PMf_LOCALE		(1 << 5)
+ * shift form so that ext/B/defsubs_h.PL will pick them up.
+ *
+ * Data structures used in the two headers have common fields, and in fact one
+ * is copied onto the other.  This makes it easy to keep them in sync */
+
+/* This tells where the first of these bits is.  Setting it to 0 saved cycles
+ * and memory.  I (khw) think the code will work if changed back, but haven't
+ * tested it */
+#define RXf_PMf_STD_PMMOD_SHIFT	0
+
+/* The bits need to be ordered so that the msix are contiguous starting at bit
+ * RXf_PMf_STD_PMMOD_SHIFT, followed by the p.  See STD_PAT_MODS and
+ * INT_PAT_MODS in regexp.h for the reason contiguity is needed */
+#define RXf_PMf_MULTILINE	(1 << (RXf_PMf_STD_PMMOD_SHIFT+0))	/* /m */
+#define RXf_PMf_SINGLELINE	(1 << (RXf_PMf_STD_PMMOD_SHIFT+1))	/* /s */
+#define RXf_PMf_FOLD	        (1 << (RXf_PMf_STD_PMMOD_SHIFT+2))	/* /i */
+#define RXf_PMf_EXTENDED	(1 << (RXf_PMf_STD_PMMOD_SHIFT+3))	/* /x */
+#define RXf_PMf_KEEPCOPY	(1 << (RXf_PMf_STD_PMMOD_SHIFT+4))	/* /p */
+#define RXf_PMf_LOCALE		(1 << (RXf_PMf_STD_PMMOD_SHIFT+5))
 
 /* Next available bit after the above.  Name begins with '_' so won't be
  * exported by B */
-#define _RXf_PMf_SHIFT_NEXT 6
+#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+6)
+
+/* Mask of the above bits.  These need to be transferred from op_pmflags to
+ * re->extflags during compilation */
+#define RXf_PMf_COMPILETIME	(RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY)
  
 #define PMf_MULTILINE	RXf_PMf_MULTILINE
 #define PMf_SINGLELINE	RXf_PMf_SINGLELINE
@@ -28,3 +44,14 @@
 #define PMf_EXTENDED	RXf_PMf_EXTENDED
 #define PMf_KEEPCOPY	RXf_PMf_KEEPCOPY
 #define PMf_LOCALE	RXf_PMf_LOCALE
+#define PMf_COMPILETIME RXf_PMf_COMPILETIME
+
+/*  Error check that haven't left something out of this.  This isn't done
+ *  directly in the #define because doing so confuses regcomp.pl.
+ *  (2**n - 1) is n 1 bits, so the below gets the contiguous bits between the
+ *  beginning and ending shifts */
+#if RXf_PMf_COMPILETIME	!= (((1 << (_RXf_PMf_SHIFT_NEXT))-1) \
+	                         & (~((1 << RXf_PMf_STD_PMMOD_SHIFT)-1)))
+#   error RXf_PMf_COMPILETIME is invalid
+#endif
+
diff --git a/regexp.h b/regexp.h
index 7b48a48..83a7e8d 100644
--- a/regexp.h
+++ b/regexp.h
@@ -8,8 +8,6 @@
  *
  */
 
-#include "op_reg_common.h"
-
 /*
  * Definitions etc. for regexp(3) routines.
  *
@@ -220,25 +218,17 @@ and check for NULL.
 /* Flags stored in regexp->extflags
  * These are used by code external to the regexp engine
  *
- * Note that flags starting with RXf_PMf_ have exact equivalents
- * stored in op_pmflags and which are defined in op.h, they are defined
- * numerically here only for clarity.
+ * Note that the flags whose names start with RXf_PMf_ are defined in
+ * op_reg_common.h, being copied from the parallel flags of op_pmflags
  *
  * NOTE: if you modify any RXf flags you should run regen.pl or regcomp.pl
  * so that regnodes.h is updated with the changes.
  *
  */
 
-/* 0x3F of extflags is used by (RXf_)PMf_COMPILETIME
- * If you change these you need to change the equivalent flags in op.h, and
- * vice versa.  These need to be ordered so that the msix are contiguous
- * starting at bit 0, followed by the p; bit 0 is because of the shift below
- * being 0; see STD_PAT_MODS and INT_PAT_MODS below for the contiguity cause */
-/* the flags above are transfered from the PMOP->op_pmflags member during
- * compilation */
-#define RXf_PMf_STD_PMMOD_SHIFT	0
+#include "op_reg_common.h"
+
 #define RXf_PMf_STD_PMMOD	(RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED)
                
-#define RXf_PMf_COMPILETIME	(RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY)
  
 #define CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl)                        \
     case IGNORE_PAT_MOD:    *(pmfl) |= RXf_PMf_FOLD;       break;   \
-- 
1.5.6.3


["0008-op.h-regexp.h-renumber-shifts.patch" (text/x-patch)]

From b856fd14b2af21b0aa2cc07662113c1f626f36e8 Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Sat, 31 Jul 2010 15:33:22 -0600
Subject: [PATCH] op.h, regexp.h: renumber shifts.

This patch doesn't change any generated code.  It just changes the base
numbering of the shifts from 1 to 0.  In regexp.h the RXf_BASE_SHIFT was
changed to make sure the used bits didn't change
---
 op.h     |   24 ++++++++++++------------
 regexp.h |   50 +++++++++++++++++++++++++-------------------------
 2 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/op.h b/op.h
index 6bf996d..0102e36 100644
--- a/op.h
+++ b/op.h
@@ -360,39 +360,39 @@ struct pmop {
 #define PM_SETRE(o,r)   ((o)->op_pmregexp = (r))
 #endif
 
-#define PMf_BASE_SHIFT ((_RXf_PMf_SHIFT_NEXT)-1)
+#define PMf_BASE_SHIFT _RXf_PMf_SHIFT_NEXT
 
 /* taint $1 etc. if target tainted */
-#define PMf_RETAINT	(1<<(PMf_BASE_SHIFT+1))
+#define PMf_RETAINT	(1<<(PMf_BASE_SHIFT+0))
 
 /* match successfully only once per reset, with related flag RXf_USED in
  * re->extflags holding state.  This is used only for ?? matches, and only on
  * OP_MATCH and OP_QR */
-#define PMf_ONCE	(1<<(PMf_BASE_SHIFT+2))
+#define PMf_ONCE	(1<<(PMf_BASE_SHIFT+1))
 
 /* replacement contains variables */
-#define PMf_MAYBE_CONST (1<<(PMf_BASE_SHIFT+3))
+#define PMf_MAYBE_CONST (1<<(PMf_BASE_SHIFT+2))
 
 /* PMf_ONCE has matched successfully.  Not used under threading. */
-#define PMf_USED        (1<<(PMf_BASE_SHIFT+4))
+#define PMf_USED        (1<<(PMf_BASE_SHIFT+3))
 
 /* subst replacement is constant */
-#define PMf_CONST	(1<<(PMf_BASE_SHIFT+5))
+#define PMf_CONST	(1<<(PMf_BASE_SHIFT+4))
 
 /* keep 1st runtime pattern forever */
-#define PMf_KEEP	(1<<(PMf_BASE_SHIFT+6))
-#define PMf_GLOBAL	(1<<(PMf_BASE_SHIFT+7))	/* pattern had a g modifier */
+#define PMf_KEEP	(1<<(PMf_BASE_SHIFT+5))
+#define PMf_GLOBAL	(1<<(PMf_BASE_SHIFT+6))	/* pattern had a g modifier */
 
 /* don't reset pos() if //g fails */
-#define PMf_CONTINUE	(1<<(PMf_BASE_SHIFT+8))
+#define PMf_CONTINUE	(1<<(PMf_BASE_SHIFT+7))
 
 /* evaluating replacement as expr */
-#define PMf_EVAL	(1<<(PMf_BASE_SHIFT+9))
+#define PMf_EVAL	(1<<(PMf_BASE_SHIFT+8))
 
 /* Return substituted string instead of modifying it. */
-#define PMf_NONDESTRUCT	(1<<(PMf_BASE_SHIFT+10))
+#define PMf_NONDESTRUCT	(1<<(PMf_BASE_SHIFT+9))
 
-#if PMf_BASE_SHIFT+10 > 31
+#if PMf_BASE_SHIFT+9 > 31
 #   error Too many PMf_ bits used.  See above and regnodes.h for any spare in middle
 #endif
 
diff --git a/regexp.h b/regexp.h
index 83a7e8d..f041177 100644
--- a/regexp.h
+++ b/regexp.h
@@ -281,57 +281,57 @@ and check for NULL.
  *
  */
 
-#define RXf_BASE_SHIFT ((_RXf_PMf_SHIFT_NEXT)-1)
+#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+2)
 
 /* Anchor and GPOS related stuff */
-#define RXf_ANCH_BOL    	(1<<(RXf_BASE_SHIFT+3))
-#define RXf_ANCH_MBOL   	(1<<(RXf_BASE_SHIFT+4))
-#define RXf_ANCH_SBOL   	(1<<(RXf_BASE_SHIFT+5))
-#define RXf_ANCH_GPOS   	(1<<(RXf_BASE_SHIFT+6))
-#define RXf_GPOS_SEEN   	(1<<(RXf_BASE_SHIFT+7))
-#define RXf_GPOS_FLOAT  	(1<<(RXf_BASE_SHIFT+8))
+#define RXf_ANCH_BOL    	(1<<(RXf_BASE_SHIFT+0))
+#define RXf_ANCH_MBOL   	(1<<(RXf_BASE_SHIFT+1))
+#define RXf_ANCH_SBOL   	(1<<(RXf_BASE_SHIFT+2))
+#define RXf_ANCH_GPOS   	(1<<(RXf_BASE_SHIFT+3))
+#define RXf_GPOS_SEEN   	(1<<(RXf_BASE_SHIFT+4))
+#define RXf_GPOS_FLOAT  	(1<<(RXf_BASE_SHIFT+5))
 /* two bits here */
 #define RXf_ANCH        	(RXf_ANCH_BOL|RXf_ANCH_MBOL|RXf_ANCH_GPOS|RXf_ANCH_SBOL)
 #define RXf_GPOS_CHECK          (RXf_GPOS_SEEN|RXf_ANCH_GPOS)
 #define RXf_ANCH_SINGLE         (RXf_ANCH_SBOL|RXf_ANCH_GPOS)
 
 /* What we have seen */
-#define RXf_LOOKBEHIND_SEEN	(1<<(RXf_BASE_SHIFT+9))
-#define RXf_EVAL_SEEN   	(1<<(RXf_BASE_SHIFT+10))
-#define RXf_CANY_SEEN   	(1<<(RXf_BASE_SHIFT+11))
+#define RXf_LOOKBEHIND_SEEN	(1<<(RXf_BASE_SHIFT+6))
+#define RXf_EVAL_SEEN   	(1<<(RXf_BASE_SHIFT+7))
+#define RXf_CANY_SEEN   	(1<<(RXf_BASE_SHIFT+8))
 
 /* Special */
-#define RXf_NOSCAN      	(1<<(RXf_BASE_SHIFT+12))
-#define RXf_CHECK_ALL   	(1<<(RXf_BASE_SHIFT+13))
+#define RXf_NOSCAN      	(1<<(RXf_BASE_SHIFT+9))
+#define RXf_CHECK_ALL   	(1<<(RXf_BASE_SHIFT+10))
 
 /* UTF8 related */
-#define RXf_MATCH_UTF8  	(1<<(RXf_BASE_SHIFT+15))
+#define RXf_MATCH_UTF8  	(1<<(RXf_BASE_SHIFT+12))
 
 /* Intuit related */
-#define RXf_USE_INTUIT_NOML	(1<<(RXf_BASE_SHIFT+16))
-#define RXf_USE_INTUIT_ML	(1<<(RXf_BASE_SHIFT+17))
-#define RXf_INTUIT_TAIL 	(1<<(RXf_BASE_SHIFT+18))
+#define RXf_USE_INTUIT_NOML	(1<<(RXf_BASE_SHIFT+13))
+#define RXf_USE_INTUIT_ML	(1<<(RXf_BASE_SHIFT+14))
+#define RXf_INTUIT_TAIL 	(1<<(RXf_BASE_SHIFT+15))
 
 /*
   Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will
   be used by regex engines to check whether they should set
   RXf_SKIPWHITE
 */
-#define RXf_SPLIT		(1<<(RXf_BASE_SHIFT+19))
+#define RXf_SPLIT		(1<<(RXf_BASE_SHIFT+16))
 
 #define RXf_USE_INTUIT		(RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML)
 
 /* Copy and tainted info */
-#define RXf_COPY_DONE   	(1<<(RXf_BASE_SHIFT+20))
-#define RXf_TAINTED_SEEN	(1<<(RXf_BASE_SHIFT+21))
-#define RXf_TAINTED		(1<<(RXf_BASE_SHIFT+22)) /* this pattern is tainted */
+#define RXf_COPY_DONE   	(1<<(RXf_BASE_SHIFT+17))
+#define RXf_TAINTED_SEEN	(1<<(RXf_BASE_SHIFT+18))
+#define RXf_TAINTED		(1<<(RXf_BASE_SHIFT+19)) /* this pattern is tainted */
 
 /* Flags indicating special patterns */
-#define RXf_START_ONLY		(1<<(RXf_BASE_SHIFT+23)) /* Pattern is /^/ */
-#define RXf_SKIPWHITE		(1<<(RXf_BASE_SHIFT+24)) /* Pattern is for a split / / */
-#define RXf_WHITE		(1<<(RXf_BASE_SHIFT+25)) /* Pattern is /\s+/ */
-#define RXf_NULL		(1<<(RXf_BASE_SHIFT+26)) /* Pattern is // */
-#if RXf_BASE_SHIFT+26 > 31
+#define RXf_START_ONLY		(1<<(RXf_BASE_SHIFT+20)) /* Pattern is /^/ */
+#define RXf_SKIPWHITE		(1<<(RXf_BASE_SHIFT+21)) /* Pattern is for a split / / */
+#define RXf_WHITE		(1<<(RXf_BASE_SHIFT+22)) /* Pattern is /\s+/ */
+#define RXf_NULL		(1<<(RXf_BASE_SHIFT+23)) /* Pattern is // */
+#if RXf_BASE_SHIFT+23 > 31
 #   error Too many RXf_PMf bits used.  See regnodes.h for any spare in middle
 #endif
 
-- 
1.5.6.3


["0009-regexp.h-Move-bits-around.patch" (text/x-patch)]

From 1e3671c480198aee46e1d1cc908096fa6f3a5876 Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Sat, 31 Jul 2010 16:06:09 -0600
Subject: [PATCH] regexp.h: Move bits around

make regen needed.

This commit moves some bits in extflags around so that all the unallocated
ones are at the boundary between the unshared portion and the portion
shared with op.h.  This allows them to be allocated in the future to go
either way, without affecting binary compatibility at that time.

The high-order bits are unaffected, but the low order ones move to fill
the gap.
---
 pod/perl5134delta.pod |    5 +++++
 regexp.h              |   30 ++++++++++++++++--------------
 regnodes.h            |   26 +++++++++++++-------------
 3 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/pod/perl5134delta.pod b/pod/perl5134delta.pod
index f7e76bd..418ac88 100644
--- a/pod/perl5134delta.pod
+++ b/pod/perl5134delta.pod
@@ -84,6 +84,11 @@ when loading every newly compiled extension, compares the API version of the
 running perl with the version a module has been compiled for and raises an
 exception if they don't match.
 
+=head2 Binary Incompatible with all previous Perls
+
+Some bit fields have been reordered, hence this release will not be binary
+comptible with any previous Perl release.
+
 =head1 Deprecations
 
 XXX Any deprecated features, syntax, modules etc. should be listed here.
diff --git a/regexp.h b/regexp.h
index f041177..298a417 100644
--- a/regexp.h
+++ b/regexp.h
@@ -281,7 +281,9 @@ and check for NULL.
  *
  */
 
-#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+2)
+/* Leave some space, so future bit allocations can go either in the shared or
+ * unshared area without affecting binary compatibility */
+#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+3)
 
 /* Anchor and GPOS related stuff */
 #define RXf_ANCH_BOL    	(1<<(RXf_BASE_SHIFT+0))
@@ -305,33 +307,33 @@ and check for NULL.
 #define RXf_CHECK_ALL   	(1<<(RXf_BASE_SHIFT+10))
 
 /* UTF8 related */
-#define RXf_MATCH_UTF8  	(1<<(RXf_BASE_SHIFT+12))
+#define RXf_MATCH_UTF8  	(1<<(RXf_BASE_SHIFT+11))
 
 /* Intuit related */
-#define RXf_USE_INTUIT_NOML	(1<<(RXf_BASE_SHIFT+13))
-#define RXf_USE_INTUIT_ML	(1<<(RXf_BASE_SHIFT+14))
-#define RXf_INTUIT_TAIL 	(1<<(RXf_BASE_SHIFT+15))
+#define RXf_USE_INTUIT_NOML	(1<<(RXf_BASE_SHIFT+12))
+#define RXf_USE_INTUIT_ML	(1<<(RXf_BASE_SHIFT+13))
+#define RXf_INTUIT_TAIL 	(1<<(RXf_BASE_SHIFT+14))
 
 /*
   Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will
   be used by regex engines to check whether they should set
   RXf_SKIPWHITE
 */
-#define RXf_SPLIT		(1<<(RXf_BASE_SHIFT+16))
+#define RXf_SPLIT		(1<<(RXf_BASE_SHIFT+15))
 
 #define RXf_USE_INTUIT		(RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML)
 
 /* Copy and tainted info */
-#define RXf_COPY_DONE   	(1<<(RXf_BASE_SHIFT+17))
-#define RXf_TAINTED_SEEN	(1<<(RXf_BASE_SHIFT+18))
-#define RXf_TAINTED		(1<<(RXf_BASE_SHIFT+19)) /* this pattern is tainted */
+#define RXf_COPY_DONE   	(1<<(RXf_BASE_SHIFT+16))
+#define RXf_TAINTED_SEEN	(1<<(RXf_BASE_SHIFT+17))
+#define RXf_TAINTED		(1<<(RXf_BASE_SHIFT+18)) /* this pattern is tainted */
 
 /* Flags indicating special patterns */
-#define RXf_START_ONLY		(1<<(RXf_BASE_SHIFT+20)) /* Pattern is /^/ */
-#define RXf_SKIPWHITE		(1<<(RXf_BASE_SHIFT+21)) /* Pattern is for a split / / */
-#define RXf_WHITE		(1<<(RXf_BASE_SHIFT+22)) /* Pattern is /\s+/ */
-#define RXf_NULL		(1<<(RXf_BASE_SHIFT+23)) /* Pattern is // */
-#if RXf_BASE_SHIFT+23 > 31
+#define RXf_START_ONLY		(1<<(RXf_BASE_SHIFT+19)) /* Pattern is /^/ */
+#define RXf_SKIPWHITE		(1<<(RXf_BASE_SHIFT+20)) /* Pattern is for a split / / */
+#define RXf_WHITE		(1<<(RXf_BASE_SHIFT+21)) /* Pattern is /\s+/ */
+#define RXf_NULL		(1<<(RXf_BASE_SHIFT+22)) /* Pattern is // */
+#if RXf_BASE_SHIFT+22 > 31
 #   error Too many RXf_PMf bits used.  See regnodes.h for any spare in middle
 #endif
 
diff --git a/regnodes.h b/regnodes.h
index 348410c..d132013 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -625,7 +625,7 @@ EXTCONST char * const PL_reg_name[] = {
 EXTCONST char * PL_reg_extflags_name[];
 #else
 EXTCONST char * const PL_reg_extflags_name[] = {
-	/* Bits in extflags defined: 11111111111101111111111100111111 */
+	/* Bits in extflags defined: 11111111111111111111111000111111 */
 	"MULTILINE",        /* 0x00000001 */
 	"SINGLELINE",       /* 0x00000002 */
 	"FOLD",             /* 0x00000004 */
@@ -634,18 +634,18 @@ EXTCONST char * const PL_reg_extflags_name[] = {
 	"LOCALE",           /* 0x00000020 */
 	"UNUSED_BIT_6",     /* 0x00000040 */
 	"UNUSED_BIT_7",     /* 0x00000080 */
-	"ANCH_BOL",         /* 0x00000100 */
-	"ANCH_MBOL",        /* 0x00000200 */
-	"ANCH_SBOL",        /* 0x00000400 */
-	"ANCH_GPOS",        /* 0x00000800 */
-	"GPOS_SEEN",        /* 0x00001000 */
-	"GPOS_FLOAT",       /* 0x00002000 */
-	"LOOKBEHIND_SEEN",  /* 0x00004000 */
-	"EVAL_SEEN",        /* 0x00008000 */
-	"CANY_SEEN",        /* 0x00010000 */
-	"NOSCAN",           /* 0x00020000 */
-	"CHECK_ALL",        /* 0x00040000 */
-	"UNUSED_BIT_19",    /* 0x00080000 */
+	"UNUSED_BIT_8",     /* 0x00000100 */
+	"ANCH_BOL",         /* 0x00000200 */
+	"ANCH_MBOL",        /* 0x00000400 */
+	"ANCH_SBOL",        /* 0x00000800 */
+	"ANCH_GPOS",        /* 0x00001000 */
+	"GPOS_SEEN",        /* 0x00002000 */
+	"GPOS_FLOAT",       /* 0x00004000 */
+	"LOOKBEHIND_SEEN",  /* 0x00008000 */
+	"EVAL_SEEN",        /* 0x00010000 */
+	"CANY_SEEN",        /* 0x00020000 */
+	"NOSCAN",           /* 0x00040000 */
+	"CHECK_ALL",        /* 0x00080000 */
 	"MATCH_UTF8",       /* 0x00100000 */
 	"USE_INTUIT_NOML",  /* 0x00200000 */
 	"USE_INTUIT_ML",    /* 0x00400000 */
-- 
1.5.6.3


["0010-op.h-Move-bits-around-to-allow-for-future-growth.patch" (text/x-patch)]

From 629c4b2ac0a4faf72252c1d40930e213134033d3 Mon Sep 17 00:00:00 2001
From: Karl Williamson <public@khwilliamson.com>
Date: Sat, 31 Jul 2010 16:14:12 -0600
Subject: [PATCH] op.h: Move bits around to allow for future growth

This creates an unallocated space at the shared/unshared boundary of the
data with regexp.h.  This allows any future bits that may be needed to
go into either, without affecting binary compatibility.  I chose a
number larger than I thought we would ever need.
---
 op.h |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/op.h b/op.h
index 0102e36..4241f02 100644
--- a/op.h
+++ b/op.h
@@ -360,7 +360,9 @@ struct pmop {
 #define PM_SETRE(o,r)   ((o)->op_pmregexp = (r))
 #endif
 
-#define PMf_BASE_SHIFT _RXf_PMf_SHIFT_NEXT
+/* Leave some space, so future bit allocations can go either in the shared or
+ * unshared area without affecting binary compatibility */
+#define PMf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+8)
 
 /* taint $1 etc. if target tainted */
 #define PMf_RETAINT	(1<<(PMf_BASE_SHIFT+0))
-- 
1.5.6.3



[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic