[prev in list] [next in list] [prev in thread] [next in thread] 

List:       cfe-commits
Subject:    [PATCH][libcxx] regex: Support capturing parens inside lookahead assertions
From:       William Fisher <william.w.fisher () gmail ! com>
Date:       2013-07-20 22:41:08
Message-ID: CAHVjMKEiKC1s_9UOxDrchj+66VkSZp-pq0q69=L5_rR7tmhjSQ () mail ! gmail ! com
[Download RAW message or body]

[Attachment #2 (multipart/alternative)]


This patch fixes a bug where std::regex in ECMAScript mode was ignoring
capture groups inside lookahead assertions.

For example, matching /(?=(a))(a)/ to "a" should yield two captures: \1 =
"a", \2 = "a"

The test cases are in lookahead_capture.cpp.

Thanks,
Bill

[Attachment #5 (text/html)]

<div dir="ltr">This patch fixes a bug where std::regex in ECMAScript mode was \
ignoring capture groups inside lookahead assertions. <div><br><div>For example, \
matching /(?=(a))(a)/ to &quot;a&quot; should yield two captures: \1 = &quot;a&quot;, \
\2 = &quot;a&quot;</div> <div><br></div><div>The test cases are in \
lookahead_capture.cpp.</div><div><br></div><div>Thanks,</div><div>Bill</div></div></div>


--047d7bea2e8ab0ea7404e1f92342--


["lookahead_capture.cpp" (text/x-c++src)]

#include <regex>
#include <cassert>

int main()
{
	{
		std::regex re{"^(?=(.))a$"};
		assert(re.mark_count() == 1);

		std::string s{"a"};
		std::smatch m;
		assert(std::regex_match(s, m, re));
		assert(m.size() == 2);
		assert(m[0] == "a");
		assert(m[1] == "a");
	}

	{
		std::regex re{"^(a)(?=(.))(b)$"};
		assert(re.mark_count() == 3);

		std::string s{"ab"};
		std::smatch m;
		assert(std::regex_match(s, m, re));
		assert(m.size() == 4);
		assert(m[0] == "ab");
		assert(m[1] == "a");
		assert(m[2] == "b");
		assert(m[3] == "b");
	}

	{
		std::regex re{"^(.)(?=(.)(?=.(.)))(...)$"};
		assert(re.mark_count() == 4);

		std::string s{"abcd"};
		std::smatch m;
		assert(std::regex_match(s, m, re));
		assert(m.size() == 5);
		assert(m[0] == "abcd");
		assert(m[1] == "a");
		assert(m[2] == "b");
		assert(m[3] == "d");
		assert(m[4] == "bcd");
	}

	{
		std::regex re{"^(a)(?!([^b]))(.c)$"};
		assert(re.mark_count() == 3);

		std::string s{"abc"};
		std::smatch m;
		assert(std::regex_match(s, m, re));
		assert(m.size() == 4);
		assert(m[0] == "abc");
		assert(m[1] == "a");
		assert(m[2] == "");
		assert(m[3] == "bc");
	}

	{
		std::regex re{"^(?!((b)))(?=(.))(?!(abc)).b$"};
		assert(re.mark_count() == 4);

		std::string s{"ab"};
		std::smatch m;
		assert(std::regex_match(s, m, re));
		assert(m.size() == 5);
		assert(m[0] == "ab");
		assert(m[1] == "");
		assert(m[2] == "");
		assert(m[3] == "a");
		assert(m[4] == "");
	}
}

["lookahead_capture.patch" (application/octet-stream)]

Index: include/regex
===================================================================
--- include/regex	(revision 186772)
+++ include/regex	(working copy)
@@ -2769,7 +2769,7 @@
     void __push_end_marked_subexpression(unsigned);
     void __push_empty();
     void __push_word_boundary(bool);
-    void __push_lookahead(const basic_regex&, bool);
+    void __push_lookahead(const basic_regex&, bool, unsigned);
 
     template <class _Allocator>
         bool
@@ -2907,6 +2907,7 @@
     typedef __owns_one_state<_CharT> base;
 
     basic_regex<_CharT, _Traits> __exp_;
+    unsigned __mexp_;
     bool __invert_;
 
     __lookahead(const __lookahead&);
@@ -2915,8 +2916,8 @@
     typedef _VSTD::__state<_CharT> __state;
 
     _LIBCPP_INLINE_VISIBILITY
-    __lookahead(const basic_regex<_CharT, _Traits>& __exp, bool __invert, \
                __node<_CharT>* __s)
-        : base(__s), __exp_(__exp), __invert_(__invert) {}
+    __lookahead(const basic_regex<_CharT, _Traits>& __exp, bool __invert, \
__node<_CharT>* __s, unsigned __mexp) +        : base(__s), __exp_(__exp), \
__invert_(__invert), __mexp_(__mexp) {}  
     virtual void __exec(__state&) const;
 };
@@ -2935,6 +2936,9 @@
     {
         __s.__do_ = __state::__accept_but_not_consume;
         __s.__node_ = this->first();
+        for (unsigned __i = 1; __i < __m.size(); ++__i) {
+            __s.__sub_matches_[__mexp_ + __i - 1] = __m.__matches_[__i];
+        }
     }
     else
     {
@@ -4168,7 +4172,9 @@
                                 basic_regex __exp;
                                 __exp.__flags_ = __flags_;
                                 __temp = __exp.__parse(++__temp, __last);
-                                __push_lookahead(_VSTD::move(__exp), false);
+                                unsigned __mexp = __exp.__marked_count_;
+                                __push_lookahead(_VSTD::move(__exp), false, \
__marked_count_); +                                __marked_count_ += __mexp;
 #ifndef _LIBCPP_NO_EXCEPTIONS
                                 if (__temp == __last || *__temp != ')')
                                     throw regex_error(regex_constants::error_paren);
@@ -4181,7 +4187,9 @@
                                 basic_regex __exp;
                                 __exp.__flags_ = __flags_;
                                 __temp = __exp.__parse(++__temp, __last);
-                                __push_lookahead(_VSTD::move(__exp), true);
+                                unsigned __mexp = __exp.__marked_count_;
+                                __push_lookahead(_VSTD::move(__exp), true, \
__marked_count_); +                                __marked_count_ += __mexp;
 #ifndef _LIBCPP_NO_EXCEPTIONS
                                 if (__temp == __last || *__temp != ')')
                                     throw regex_error(regex_constants::error_paren);
@@ -4759,10 +4767,11 @@
 template <class _CharT, class _Traits>
 void
 basic_regex<_CharT, _Traits>::__push_lookahead(const basic_regex& __exp,
-                                               bool __invert)
+                                               bool __invert,
+                                               unsigned __mexp)
 {
     __end_->first() = new __lookahead<_CharT, _Traits>(__exp, __invert,
-                                                           __end_->first());
+                                                           __end_->first(), __mexp);
     __end_ = static_cast<__owns_one_state<_CharT>*>(__end_->first());
 }
 



_______________________________________________
cfe-commits mailing list
cfe-commits@cs.uiuc.edu
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic