[prev in list] [next in list] [prev in thread] [next in thread] 

List:       php-cvs
Subject:    [PHP-CVS] com php-src: ext tokenizer port + cleanup unused lexer states: =?UTF-8?Q?Zend/zend=5Fcompi
From:       Bob Weinand <bwoebi () php ! net>
Date:       2015-04-30 6:03:29
Message-ID: php-mail-82b281e520fe74ea0cb506b18097e170123740241 () git ! php ! net
[Download RAW message or body]

Commit:    110759386e2f9b4d88bf68c669b6c54ad4b5c04f
Author:    Márcio Almada <marcio3w@gmail.com>         Sun, 5 Apr 2015 08:50:35 -0300
Parents:   02a9eb4f8c736089808b51d862def0e648383e09
Branches:  master

Link:       http://git.php.net/?p=php-src.git;a=commitdiff;h=110759386e2f9b4d88bf68c669b6c54ad4b5c04f

Log:
ext tokenizer port + cleanup unused lexer states

we basically added a mechanism to store the token stream during parsing
and exposed the entire parser stack on the tokenizer extension through
an opt in flag: token_get_all($src, TOKEN_PARSE).

this change allows easy future language enhancements regarding context
aware parsing & scanning without further maintance on the tokenizer
extension while solves known inconsistencies "parseless" tokenizer
extension has when it handles `__halt_compiler()` presence.

Changed paths:
  M  Zend/zend_compile.c
  M  Zend/zend_globals.h
  M  Zend/zend_language_parser.y
  M  Zend/zend_language_scanner.h
  M  Zend/zend_language_scanner.l
  A  ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt
  A  ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt
  A  ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt
  M  ext/tokenizer/tests/token_get_all_error.phpt
  M  ext/tokenizer/tokenizer.c


["diff_110759386e2f9b4d88bf68c669b6c54ad4b5c04f.txt" (text/plain)]

diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c
index c92a25a..2108103 100644
--- a/Zend/zend_compile.c
+++ b/Zend/zend_compile.c
@@ -30,7 +30,6 @@
 #include "zend_interfaces.h"
 #include "zend_virtual_cwd.h"
 #include "zend_multibyte.h"
-#include "zend_language_scanner.h"
 #include "zend_inheritance.h"
 
 #define SET_NODE(target, src) do { \
@@ -568,7 +567,10 @@ static int zend_add_const_name_literal(zend_op_array *op_array, \
zend_string *nam  op.constant = zend_add_literal(CG(active_op_array), &_c); \
 	} while (0)
 
-void zend_stop_lexing(void) {
+void zend_stop_lexing(void)
+{
+	if(LANG_SCNG(on_event)) LANG_SCNG(on_event)(ON_STOP, END, 0);
+
 	LANG_SCNG(yy_cursor) = LANG_SCNG(yy_limit);
 }
 
diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h
index 326955a..28487a2 100644
--- a/Zend/zend_globals.h
+++ b/Zend/zend_globals.h
@@ -249,6 +249,12 @@ struct _zend_ini_scanner_globals {
 	int scanner_mode;
 };
 
+typedef enum {
+	ON_TOKEN,
+	ON_FEEDBACK,
+	ON_STOP
+} zend_php_scanner_event;
+
 struct _zend_php_scanner_globals {
 	zend_file_handle *yy_in;
 	zend_file_handle *yy_out;
@@ -278,6 +284,9 @@ struct _zend_php_scanner_globals {
 
 	/* initial string length after scanning to first variable */
 	int scanned_string_len;
+
+	/* hooks */
+	void (* on_event)(zend_php_scanner_event event, int token, int line);
 };
 
 #endif /* ZEND_GLOBALS_H */
diff --git a/Zend/zend_language_parser.y b/Zend/zend_language_parser.y
index cefcd0c..f6318ec 100644
--- a/Zend/zend_language_parser.y
+++ b/Zend/zend_language_parser.y
@@ -35,7 +35,7 @@
 #include "zend_globals.h"
 #include "zend_API.h"
 #include "zend_constants.h"
-#include "zend_language_scanner_defs.h"
+#include "zend_language_scanner.h"
 
 #define YYSIZE_T size_t
 #define yytnamerr zend_yytnamerr
@@ -49,12 +49,6 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
 #define YYFREE free
 #endif
 
-#define REWIND { \
-	zend_stack_push(&LANG_SCNG(state_stack), (void *) &LANG_SCNG(yy_state)); \
-	LANG_SCNG(yy_state) = yycST_LOOKING_FOR_SEMI_RESERVED_NAME; \
-	LANG_SCNG(yy_cursor) = (unsigned char*)LANG_SCNG(yy_text); \
-	LANG_SCNG(yy_leng)   = 0; }
-
 %}
 
 %pure_parser
@@ -290,7 +284,11 @@ semi_reserved:
 
 identifier:
 		T_STRING { $$ = $1; }
-	| 	/* if */ semi_reserved { REWIND } /* and rematch as */ T_STRING { $$ = $3; }
+	| 	semi_reserved  {
+			zval zv;
+			zend_lex_tstring(&zv);
+			$$ = zend_ast_create_zval(&zv);
+		}
 ;
 
 top_statement_list:
diff --git a/Zend/zend_language_scanner.h b/Zend/zend_language_scanner.h
index c82b306..3b75ff8 100644
--- a/Zend/zend_language_scanner.h
+++ b/Zend/zend_language_scanner.h
@@ -50,6 +50,9 @@ typedef struct _zend_lex_state {
 	zend_encoding_filter output_filter;
 	const zend_encoding *script_encoding;
 
+	/* hooks */
+	void (* on_event)(zend_php_scanner_event event, int token, int line);
+
 	zend_ast *ast;
 	zend_arena *ast_arena;
 } zend_lex_state;
@@ -66,6 +69,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state \
*lex_state);  ZEND_API int zend_prepare_string_for_scanning(zval *str, char \
*filename);  ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter \
old_input_filter, const zend_encoding *old_encoding);  ZEND_API int \
zend_multibyte_set_filter(const zend_encoding *onetime_encoding); +ZEND_API void \
zend_lex_tstring(zval *zv);  
 END_EXTERN_C()
 
diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l
index 2481af6..cde0621 100644
--- a/Zend/zend_language_scanner.l
+++ b/Zend/zend_language_scanner.l
@@ -193,6 +193,7 @@ void shutdown_scanner(void)
 	zend_stack_destroy(&SCNG(state_stack));
 	zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) \
&heredoc_label_dtor, 1);  zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
+	SCNG(on_event) = NULL;
 }
 
 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
@@ -223,6 +224,8 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
 	lex_state->output_filter = SCNG(output_filter);
 	lex_state->script_encoding = SCNG(script_encoding);
 
+	lex_state->on_event = SCNG(on_event);
+
 	lex_state->ast = CG(ast);
 	lex_state->ast_arena = CG(ast_arena);
 }
@@ -260,6 +263,8 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state \
*lex_state)  SCNG(output_filter) = lex_state->output_filter;
 	SCNG(script_encoding) = lex_state->script_encoding;
 
+	SCNG(on_event) = lex_state->on_event;
+
 	CG(ast) = lex_state->ast;
 	CG(ast_arena) = lex_state->ast_arena;
 
@@ -276,6 +281,13 @@ ZEND_API void zend_destroy_file_handle(zend_file_handle \
*file_handle)  }
 }
 
+ZEND_API void zend_lex_tstring(zval *zv)
+{
+	if (SCNG(on_event)) SCNG(on_event)(ON_FEEDBACK, T_STRING, 0);
+
+	ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
+}
+
 #define BOM_UTF32_BE	"\x00\x00\xfe\xff"
 #define	BOM_UTF32_LE	"\xff\xfe\x00\x00"
 #define	BOM_UTF16_BE	"\xfe\xff"
@@ -1083,9 +1095,20 @@ static int zend_scan_escape_string(zval *zendlval, char *str, \
int len, char quot  return SUCCESS;
 }
 
+static zend_always_inline int emit_token(int token, int token_line)
+{
+	if(SCNG(on_event)) SCNG(on_event)(ON_TOKEN, token, token_line);
+
+	return token;
+}
+
+#define RETURN_TOKEN(token) return emit_token(token, start_line);
 
 int lex_scan(zval *zendlval)
 {
+
+int start_line = CG(zend_lineno);
+
 restart:
 	SCNG(yy_text) = YYCURSOR;
 
@@ -1107,183 +1130,183 @@ NEWLINE ("\r"|"\n"|"\r\n")
 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
 
 <ST_IN_SCRIPTING>"exit" {
-	return T_EXIT;
+	RETURN_TOKEN(T_EXIT);
 }
 
 <ST_IN_SCRIPTING>"die" {
-	return T_EXIT;
+	RETURN_TOKEN(T_EXIT);
 }
 
 <ST_IN_SCRIPTING>"function" {
-	return T_FUNCTION;
+	RETURN_TOKEN(T_FUNCTION);
 }
 
 <ST_IN_SCRIPTING>"const" {
-	return T_CONST;
+	RETURN_TOKEN(T_CONST);
 }
 
 <ST_IN_SCRIPTING>"return" {
-	return T_RETURN;
+	RETURN_TOKEN(T_RETURN);
 }
 
 <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from" {
-	return T_YIELD_FROM;
+	RETURN_TOKEN(T_YIELD_FROM);
 }
 
 <ST_IN_SCRIPTING>"yield" {
-	return T_YIELD;
+	RETURN_TOKEN(T_YIELD);
 }
 
 <ST_IN_SCRIPTING>"try" {
-	return T_TRY;
+	RETURN_TOKEN(T_TRY);
 }
 
 <ST_IN_SCRIPTING>"catch" {
-	return T_CATCH;
+	RETURN_TOKEN(T_CATCH);
 }
 
 <ST_IN_SCRIPTING>"finally" {
-	return T_FINALLY;
+	RETURN_TOKEN(T_FINALLY);
 }
 
 <ST_IN_SCRIPTING>"throw" {
-	return T_THROW;
+	RETURN_TOKEN(T_THROW);
 }
 
 <ST_IN_SCRIPTING>"if" {
-	return T_IF;
+	RETURN_TOKEN(T_IF);
 }
 
 <ST_IN_SCRIPTING>"elseif" {
-	return T_ELSEIF;
+	RETURN_TOKEN(T_ELSEIF);
 }
 
 <ST_IN_SCRIPTING>"endif" {
-	return T_ENDIF;
+	RETURN_TOKEN(T_ENDIF);
 }
 
 <ST_IN_SCRIPTING>"else" {
-	return T_ELSE;
+	RETURN_TOKEN(T_ELSE);
 }
 
 <ST_IN_SCRIPTING>"while" {
-	return T_WHILE;
+	RETURN_TOKEN(T_WHILE);
 }
 
 <ST_IN_SCRIPTING>"endwhile" {
-	return T_ENDWHILE;
+	RETURN_TOKEN(T_ENDWHILE);
 }
 
 <ST_IN_SCRIPTING>"do" {
-	return T_DO;
+	RETURN_TOKEN(T_DO);
 }
 
 <ST_IN_SCRIPTING>"for" {
-	return T_FOR;
+	RETURN_TOKEN(T_FOR);
 }
 
 <ST_IN_SCRIPTING>"endfor" {
-	return T_ENDFOR;
+	RETURN_TOKEN(T_ENDFOR);
 }
 
 <ST_IN_SCRIPTING>"foreach" {
-	return T_FOREACH;
+	RETURN_TOKEN(T_FOREACH);
 }
 
 <ST_IN_SCRIPTING>"endforeach" {
-	return T_ENDFOREACH;
+	RETURN_TOKEN(T_ENDFOREACH);
 }
 
 <ST_IN_SCRIPTING>"declare" {
-	return T_DECLARE;
+	RETURN_TOKEN(T_DECLARE);
 }
 
 <ST_IN_SCRIPTING>"enddeclare" {
-	return T_ENDDECLARE;
+	RETURN_TOKEN(T_ENDDECLARE);
 }
 
 <ST_IN_SCRIPTING>"instanceof" {
-	return T_INSTANCEOF;
+	RETURN_TOKEN(T_INSTANCEOF);
 }
 
 <ST_IN_SCRIPTING>"as" {
-	return T_AS;
+	RETURN_TOKEN(T_AS);
 }
 
 <ST_IN_SCRIPTING>"switch" {
-	return T_SWITCH;
+	RETURN_TOKEN(T_SWITCH);
 }
 
 <ST_IN_SCRIPTING>"endswitch" {
-	return T_ENDSWITCH;
+	RETURN_TOKEN(T_ENDSWITCH);
 }
 
 <ST_IN_SCRIPTING>"case" {
-	return T_CASE;
+	RETURN_TOKEN(T_CASE);
 }
 
 <ST_IN_SCRIPTING>"default" {
-	return T_DEFAULT;
+	RETURN_TOKEN(T_DEFAULT);
 }
 
 <ST_IN_SCRIPTING>"break" {
-	return T_BREAK;
+	RETURN_TOKEN(T_BREAK);
 }
 
 <ST_IN_SCRIPTING>"continue" {
-	return T_CONTINUE;
+	RETURN_TOKEN(T_CONTINUE);
 }
 
 <ST_IN_SCRIPTING>"goto" {
-	return T_GOTO;
+	RETURN_TOKEN(T_GOTO);
 }
 
 <ST_IN_SCRIPTING>"echo" {
-	return T_ECHO;
+	RETURN_TOKEN(T_ECHO);
 }
 
 <ST_IN_SCRIPTING>"print" {
-	return T_PRINT;
+	RETURN_TOKEN(T_PRINT);
 }
 
 <ST_IN_SCRIPTING>"class" {
-	return T_CLASS;
+	RETURN_TOKEN(T_CLASS);
 }
 
 <ST_IN_SCRIPTING>"interface" {
-	return T_INTERFACE;
+	RETURN_TOKEN(T_INTERFACE);
 }
 
 <ST_IN_SCRIPTING>"trait" {
-	return T_TRAIT;
+	RETURN_TOKEN(T_TRAIT);
 }
 
 <ST_IN_SCRIPTING>"extends" {
-	return T_EXTENDS;
+	RETURN_TOKEN(T_EXTENDS);
 }
 
 <ST_IN_SCRIPTING>"implements" {
-	return T_IMPLEMENTS;
+	RETURN_TOKEN(T_IMPLEMENTS);
 }
 
 <ST_IN_SCRIPTING>"->" {
 	yy_push_state(ST_LOOKING_FOR_PROPERTY);
-	return T_OBJECT_OPERATOR;
+	RETURN_TOKEN(T_OBJECT_OPERATOR);
 }
 
-<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY,ST_LOOKING_FOR_SEMI_RESERVED_NAME>{WHITESPACE}+ \
{ +<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
 	HANDLE_NEWLINES(yytext, yyleng);
-	return T_WHITESPACE;
+	RETURN_TOKEN(T_WHITESPACE);
 }
 
 <ST_LOOKING_FOR_PROPERTY>"->" {
-	return T_OBJECT_OPERATOR;
+	RETURN_TOKEN(T_OBJECT_OPERATOR);
 }
 
 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
 	yy_pop_state();
 	zend_copy_value(zendlval, yytext, yyleng);
-	return T_STRING;
+	RETURN_TOKEN(T_STRING);
 }
 
 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
@@ -1293,283 +1316,283 @@ NEWLINE ("\r"|"\n"|"\r\n")
 }
 
 <ST_IN_SCRIPTING>"::" {
-	return T_PAAMAYIM_NEKUDOTAYIM;
+	RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
 }
 
 <ST_IN_SCRIPTING>"\\" {
-	return T_NS_SEPARATOR;
+	RETURN_TOKEN(T_NS_SEPARATOR);
 }
 
 <ST_IN_SCRIPTING>"..." {
-	return T_ELLIPSIS;
+	RETURN_TOKEN(T_ELLIPSIS);
 }
 
 <ST_IN_SCRIPTING>"??" {
-	return T_COALESCE;
+	RETURN_TOKEN(T_COALESCE);
 }
 
 <ST_IN_SCRIPTING>"new" {
-	return T_NEW;
+	RETURN_TOKEN(T_NEW);
 }
 
 <ST_IN_SCRIPTING>"clone" {
-	return T_CLONE;
+	RETURN_TOKEN(T_CLONE);
 }
 
 <ST_IN_SCRIPTING>"var" {
-	return T_VAR;
+	RETURN_TOKEN(T_VAR);
 }
 
 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
-	return T_INT_CAST;
+	RETURN_TOKEN(T_INT_CAST);
 }
 
 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
-	return T_DOUBLE_CAST;
+	RETURN_TOKEN(T_DOUBLE_CAST);
 }
 
 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
-	return T_STRING_CAST;
+	RETURN_TOKEN(T_STRING_CAST);
 }
 
 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
-	return T_ARRAY_CAST;
+	RETURN_TOKEN(T_ARRAY_CAST);
 }
 
 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
-	return T_OBJECT_CAST;
+	RETURN_TOKEN(T_OBJECT_CAST);
 }
 
 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
-	return T_BOOL_CAST;
+	RETURN_TOKEN(T_BOOL_CAST);
 }
 
 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
-	return T_UNSET_CAST;
+	RETURN_TOKEN(T_UNSET_CAST);
 }
 
 <ST_IN_SCRIPTING>"eval" {
-	return T_EVAL;
+	RETURN_TOKEN(T_EVAL);
 }
 
 <ST_IN_SCRIPTING>"include" {
-	return T_INCLUDE;
+	RETURN_TOKEN(T_INCLUDE);
 }
 
 <ST_IN_SCRIPTING>"include_once" {
-	return T_INCLUDE_ONCE;
+	RETURN_TOKEN(T_INCLUDE_ONCE);
 }
 
 <ST_IN_SCRIPTING>"require" {
-	return T_REQUIRE;
+	RETURN_TOKEN(T_REQUIRE);
 }
 
 <ST_IN_SCRIPTING>"require_once" {
-	return T_REQUIRE_ONCE;
+	RETURN_TOKEN(T_REQUIRE_ONCE);
 }
 
 <ST_IN_SCRIPTING>"namespace" {
-	return T_NAMESPACE;
+	RETURN_TOKEN(T_NAMESPACE);
 }
 
 <ST_IN_SCRIPTING>"use" {
-	return T_USE;
+	RETURN_TOKEN(T_USE);
 }
 
 <ST_IN_SCRIPTING>"insteadof" {
-        return T_INSTEADOF;
+    RETURN_TOKEN(T_INSTEADOF);
 }
 
 <ST_IN_SCRIPTING>"global" {
-	return T_GLOBAL;
+	RETURN_TOKEN(T_GLOBAL);
 }
 
 <ST_IN_SCRIPTING>"isset" {
-	return T_ISSET;
+	RETURN_TOKEN(T_ISSET);
 }
 
 <ST_IN_SCRIPTING>"empty" {
-	return T_EMPTY;
+	RETURN_TOKEN(T_EMPTY);
 }
 
 <ST_IN_SCRIPTING>"__halt_compiler" {
-	return T_HALT_COMPILER;
+	RETURN_TOKEN(T_HALT_COMPILER);
 }
 
 <ST_IN_SCRIPTING>"static" {
-	return T_STATIC;
+	RETURN_TOKEN(T_STATIC);
 }
 
 <ST_IN_SCRIPTING>"abstract" {
-	return T_ABSTRACT;
+	RETURN_TOKEN(T_ABSTRACT);
 }
 
 <ST_IN_SCRIPTING>"final" {
-	return T_FINAL;
+	RETURN_TOKEN(T_FINAL);
 }
 
 <ST_IN_SCRIPTING>"private" {
-	return T_PRIVATE;
+	RETURN_TOKEN(T_PRIVATE);
 }
 
 <ST_IN_SCRIPTING>"protected" {
-	return T_PROTECTED;
+	RETURN_TOKEN(T_PROTECTED);
 }
 
 <ST_IN_SCRIPTING>"public" {
-	return T_PUBLIC;
+	RETURN_TOKEN(T_PUBLIC);
 }
 
 <ST_IN_SCRIPTING>"unset" {
-	return T_UNSET;
+	RETURN_TOKEN(T_UNSET);
 }
 
 <ST_IN_SCRIPTING>"=>" {
-	return T_DOUBLE_ARROW;
+	RETURN_TOKEN(T_DOUBLE_ARROW);
 }
 
 <ST_IN_SCRIPTING>"list" {
-	return T_LIST;
+	RETURN_TOKEN(T_LIST);
 }
 
 <ST_IN_SCRIPTING>"array" {
-	return T_ARRAY;
+	RETURN_TOKEN(T_ARRAY);
 }
 
 <ST_IN_SCRIPTING>"callable" {
- return T_CALLABLE;
+	RETURN_TOKEN(T_CALLABLE);
 }
 
 <ST_IN_SCRIPTING>"++" {
-	return T_INC;
+	RETURN_TOKEN(T_INC);
 }
 
 <ST_IN_SCRIPTING>"--" {
-	return T_DEC;
+	RETURN_TOKEN(T_DEC);
 }
 
 <ST_IN_SCRIPTING>"===" {
-	return T_IS_IDENTICAL;
+	RETURN_TOKEN(T_IS_IDENTICAL);
 }
 
 <ST_IN_SCRIPTING>"!==" {
-	return T_IS_NOT_IDENTICAL;
+	RETURN_TOKEN(T_IS_NOT_IDENTICAL);
 }
 
 <ST_IN_SCRIPTING>"==" {
-	return T_IS_EQUAL;
+	RETURN_TOKEN(T_IS_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"!="|"<>" {
-	return T_IS_NOT_EQUAL;
+	RETURN_TOKEN(T_IS_NOT_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"<=>" {
-	return T_SPACESHIP;
+	RETURN_TOKEN(T_SPACESHIP);
 }
 
 <ST_IN_SCRIPTING>"<=" {
-	return T_IS_SMALLER_OR_EQUAL;
+	RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
 }
 
 <ST_IN_SCRIPTING>">=" {
-	return T_IS_GREATER_OR_EQUAL;
+	RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"+=" {
-	return T_PLUS_EQUAL;
+	RETURN_TOKEN(T_PLUS_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"-=" {
-	return T_MINUS_EQUAL;
+	RETURN_TOKEN(T_MINUS_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"*=" {
-	return T_MUL_EQUAL;
+	RETURN_TOKEN(T_MUL_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"*\*" {
-	return T_POW;
+	RETURN_TOKEN(T_POW);
 }
 
 <ST_IN_SCRIPTING>"*\*=" {
-	return T_POW_EQUAL;
+	RETURN_TOKEN(T_POW_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"/=" {
-	return T_DIV_EQUAL;
+	RETURN_TOKEN(T_DIV_EQUAL);
 }
 
 <ST_IN_SCRIPTING>".=" {
-	return T_CONCAT_EQUAL;
+	RETURN_TOKEN(T_CONCAT_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"%=" {
-	return T_MOD_EQUAL;
+	RETURN_TOKEN(T_MOD_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"<<=" {
-	return T_SL_EQUAL;
+	RETURN_TOKEN(T_SL_EQUAL);
 }
 
 <ST_IN_SCRIPTING>">>=" {
-	return T_SR_EQUAL;
+	RETURN_TOKEN(T_SR_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"&=" {
-	return T_AND_EQUAL;
+	RETURN_TOKEN(T_AND_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"|=" {
-	return T_OR_EQUAL;
+	RETURN_TOKEN(T_OR_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"^=" {
-	return T_XOR_EQUAL;
+	RETURN_TOKEN(T_XOR_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"||" {
-	return T_BOOLEAN_OR;
+	RETURN_TOKEN(T_BOOLEAN_OR);
 }
 
 <ST_IN_SCRIPTING>"&&" {
-	return T_BOOLEAN_AND;
+	RETURN_TOKEN(T_BOOLEAN_AND);
 }
 
 <ST_IN_SCRIPTING>"OR" {
-	return T_LOGICAL_OR;
+	RETURN_TOKEN(T_LOGICAL_OR);
 }
 
 <ST_IN_SCRIPTING>"AND" {
-	return T_LOGICAL_AND;
+	RETURN_TOKEN(T_LOGICAL_AND);
 }
 
 <ST_IN_SCRIPTING>"XOR" {
-	return T_LOGICAL_XOR;
+	RETURN_TOKEN(T_LOGICAL_XOR);
 }
 
 <ST_IN_SCRIPTING>"<<" {
-	return T_SL;
+	RETURN_TOKEN(T_SL);
 }
 
 <ST_IN_SCRIPTING>">>" {
-	return T_SR;
+	RETURN_TOKEN(T_SR);
 }
 
 <ST_IN_SCRIPTING>{TOKENS} {
-	return yytext[0];
+	RETURN_TOKEN(yytext[0]);
 }
 
 
 <ST_IN_SCRIPTING>"{" {
 	yy_push_state(ST_IN_SCRIPTING);
-	return '{';
+	RETURN_TOKEN('{');
 }
 
 
 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
 	yy_push_state(ST_LOOKING_FOR_VARNAME);
-	return T_DOLLAR_OPEN_CURLY_BRACES;
+	RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
 }
 
 
@@ -1578,7 +1601,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
 	if (!zend_stack_is_empty(&SCNG(state_stack))) {
 		yy_pop_state();
 	}
-	return '}';
+	RETURN_TOKEN('}');
 }
 
 
@@ -1587,7 +1610,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
 	zend_copy_value(zendlval, yytext, yyleng);
 	yy_pop_state();
 	yy_push_state(ST_IN_SCRIPTING);
-	return T_STRING_VARNAME;
+	RETURN_TOKEN(T_STRING_VARNAME);
 }
 
 
@@ -1617,12 +1640,12 @@ NEWLINE ("\r"|"\n"|"\r\n")
 			ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
 			ZEND_ASSERT(!errno && end == yytext + yyleng);
 		}
-		return T_LNUMBER;
+		RETURN_TOKEN(T_LNUMBER);
 	} else {
 		ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
 		/* errno isn't checked since we allow HUGE_VAL/INF overflow */
 		ZEND_ASSERT(end == yytext + yyleng);
-		return T_DNUMBER;
+		RETURN_TOKEN(T_DNUMBER);
 	}
 }
 
@@ -1636,7 +1659,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
 		 */
 		if (end != yytext + yyleng) {
 			zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", \
                E_PARSE);
-			return T_ERROR;
+			RETURN_TOKEN(T_ERROR);
 		}
 	} else {
 		errno = 0;
@@ -1653,19 +1676,19 @@ NEWLINE ("\r"|"\n"|"\r\n")
 			if (end != yytext + yyleng) {
 				zend_throw_exception(zend_get_parse_exception(),
 					"Invalid numeric literal", E_PARSE);
-				return T_ERROR;
+				RETURN_TOKEN(T_ERROR);
 			}
 			ZEND_ASSERT(!errno);
-			return T_DNUMBER;
+			RETURN_TOKEN(T_DNUMBER);
 		}
 		/* Also not an assert for the same reason */
 		if (end != yytext + yyleng) {
 			zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", \
                E_PARSE);
-			return T_ERROR;
+			RETURN_TOKEN(T_ERROR);
 		}
 	}
 	ZEND_ASSERT(!errno);
-	return T_LNUMBER;
+	RETURN_TOKEN(T_LNUMBER);
 }
 
 <ST_IN_SCRIPTING>{HNUM} {
@@ -1687,12 +1710,12 @@ NEWLINE ("\r"|"\n"|"\r\n")
 			ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
 			ZEND_ASSERT(!errno && end == hex + len);
 		}
-		return T_LNUMBER;
+		RETURN_TOKEN(T_LNUMBER);
 	} else {
 		ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
 		/* errno isn't checked since we allow HUGE_VAL/INF overflow */
 		ZEND_ASSERT(end == hex + len);
-		return T_DNUMBER;
+		RETURN_TOKEN(T_DNUMBER);
 	}
 }
 
@@ -1709,12 +1732,12 @@ NEWLINE ("\r"|"\n"|"\r\n")
 string:
 		ZVAL_STRINGL(zendlval, yytext, yyleng);
 	}
-	return T_NUM_STRING;
+	RETURN_TOKEN(T_NUM_STRING);
 }
 
 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
 	ZVAL_STRINGL(zendlval, yytext, yyleng);
-	return T_NUM_STRING;
+	RETURN_TOKEN(T_NUM_STRING);
 }
 
 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
@@ -1723,59 +1746,59 @@ string:
 	ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
 	/* errno isn't checked since we allow HUGE_VAL/INF overflow */
 	ZEND_ASSERT(end == yytext + yyleng);
-	return T_DNUMBER;
+	RETURN_TOKEN(T_DNUMBER);
 }
 
 <ST_IN_SCRIPTING>"__CLASS__" {
-	return T_CLASS_C;
+	RETURN_TOKEN(T_CLASS_C);
 }
 
 <ST_IN_SCRIPTING>"__TRAIT__" {
-	return T_TRAIT_C;
+	RETURN_TOKEN(T_TRAIT_C);
 }
 
 <ST_IN_SCRIPTING>"__FUNCTION__" {
-	return T_FUNC_C;
+	RETURN_TOKEN(T_FUNC_C);
 }
 
 <ST_IN_SCRIPTING>"__METHOD__" {
-	return T_METHOD_C;
+	RETURN_TOKEN(T_METHOD_C);
 }
 
 <ST_IN_SCRIPTING>"__LINE__" {
-	return T_LINE;
+	RETURN_TOKEN(T_LINE);
 }
 
 <ST_IN_SCRIPTING>"__FILE__" {
-	return T_FILE;
+	RETURN_TOKEN(T_FILE);
 }
 
 <ST_IN_SCRIPTING>"__DIR__" {
-	return T_DIR;
+	RETURN_TOKEN(T_DIR);
 }
 
 <ST_IN_SCRIPTING>"__NAMESPACE__" {
-	return T_NS_C;
+	RETURN_TOKEN(T_NS_C);
 }
 
 
 <INITIAL>"<?=" {
 	BEGIN(ST_IN_SCRIPTING);
-	return T_OPEN_TAG_WITH_ECHO;
+	RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
 }
 
 
 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
 	HANDLE_NEWLINE(yytext[yyleng-1]);
 	BEGIN(ST_IN_SCRIPTING);
-	return T_OPEN_TAG;
+	RETURN_TOKEN(T_OPEN_TAG);
 }
 
 
 <INITIAL>"<?" {
 	if (CG(short_tags)) {
 		BEGIN(ST_IN_SCRIPTING);
-		return T_OPEN_TAG;
+		RETURN_TOKEN(T_OPEN_TAG);
 	} else {
 		goto inline_char_handler;
 	}
@@ -1783,7 +1806,7 @@ string:
 
 <INITIAL>{ANY_CHAR} {
 	if (YYCURSOR > YYLIMIT) {
-		return 0;
+		RETURN_TOKEN(END);
 	}
 
 inline_char_handler:
@@ -1823,7 +1846,7 @@ inline_char_handler:
 	  ZVAL_STRINGL(zendlval, yytext, yyleng);
 	}
 	HANDLE_NEWLINES(yytext, yyleng);
-	return T_INLINE_HTML;
+	RETURN_TOKEN(T_INLINE_HTML);
 }
 
 
@@ -1834,7 +1857,7 @@ inline_char_handler:
 	yyless(yyleng - 3);
 	yy_push_state(ST_LOOKING_FOR_PROPERTY);
 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
-	return T_VARIABLE;
+	RETURN_TOKEN(T_VARIABLE);
 }
 
 /* A [ always designates a variable offset, regardless of what follows
@@ -1843,22 +1866,22 @@ inline_char_handler:
 	yyless(yyleng - 1);
 	yy_push_state(ST_VAR_OFFSET);
 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
-	return T_VARIABLE;
+	RETURN_TOKEN(T_VARIABLE);
 }
 
 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
 	zend_copy_value(zendlval, (yytext+1), (yyleng-1));
-	return T_VARIABLE;
+	RETURN_TOKEN(T_VARIABLE);
 }
 
 <ST_VAR_OFFSET>"]" {
 	yy_pop_state();
-	return ']';
+	RETURN_TOKEN(']');
 }
 
 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
 	/* Only '[' can be valid, but returning other tokens will allow a more explicit \
                parse error */
-	return yytext[0];
+	RETURN_TOKEN(yytext[0]);
 }
 
 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
@@ -1866,16 +1889,16 @@ inline_char_handler:
 	yyless(0);
 	yy_pop_state();
 	ZVAL_NULL(zendlval);
-	return T_ENCAPSED_AND_WHITESPACE;
+	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
 }
 
 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
 	zend_copy_value(zendlval, yytext, yyleng);
-	return T_STRING;
+	RETURN_TOKEN(T_STRING);
 }
 
 
-<ST_IN_SCRIPTING,ST_LOOKING_FOR_SEMI_RESERVED_NAME>"#"|"//" {
+<ST_IN_SCRIPTING>"#"|"//" {
 	while (YYCURSOR < YYLIMIT) {
 		switch (*YYCURSOR++) {
 			case '\r':
@@ -1901,10 +1924,10 @@ inline_char_handler:
 
 	yyleng = YYCURSOR - SCNG(yy_text);
 
-	return T_COMMENT;
+	RETURN_TOKEN(T_COMMENT);
 }
 
-<ST_IN_SCRIPTING,ST_LOOKING_FOR_SEMI_RESERVED_NAME>"/*"|"/**"{WHITESPACE} {
+<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
 	int doc_com;
 
 	if (yyleng > 2) {
@@ -1931,27 +1954,15 @@ inline_char_handler:
 
 	if (doc_com) {
 		CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
-		return T_DOC_COMMENT;
+		RETURN_TOKEN(T_DOC_COMMENT);
 	}
 
-	return T_COMMENT;
-}
-
-<ST_LOOKING_FOR_SEMI_RESERVED_NAME>{LABEL} {
-    zend_copy_value(zendlval, yytext, yyleng);
-    yy_pop_state();
-    return T_STRING;
-}
-
-<ST_LOOKING_FOR_SEMI_RESERVED_NAME>{ANY_CHAR} {
-    yyless(0);
-    yy_pop_state();
-    goto restart;
+	RETURN_TOKEN(T_COMMENT);
 }
 
 <ST_IN_SCRIPTING>"?>"{NEWLINE}? {
 	BEGIN(INITIAL);
-	return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
+	RETURN_TOKEN(T_CLOSE_TAG);  /* implicit ';' at php-end tag */
 }
 
 
@@ -1977,7 +1988,7 @@ inline_char_handler:
 			 * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected \
                character..."
 			 * rule, which continued in ST_IN_SCRIPTING state after the quote */
 			ZVAL_NULL(zendlval);
-			return T_ENCAPSED_AND_WHITESPACE;
+			RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
 		}
 	}
 
@@ -2020,7 +2031,7 @@ inline_char_handler:
 		SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, \
(size_t)Z_STRLEN_P(zendlval));  ZVAL_STRINGL(zendlval, str, sz);
 	}
-	return T_CONSTANT_ENCAPSED_STRING;
+	RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
 }
 
 
@@ -2032,9 +2043,9 @@ inline_char_handler:
 			case '"':
 				yyleng = YYCURSOR - SCNG(yy_text);
 				if (zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"') \
                == FAILURE) {
-					return T_ERROR;
+					RETURN_TOKEN(T_ERROR);
 				}
-				return T_CONSTANT_ENCAPSED_STRING;
+				RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
 			case '$':
 				if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
 					break;
@@ -2064,7 +2075,7 @@ inline_char_handler:
 	YYCURSOR = SCNG(yy_text) + yyleng;
 
 	BEGIN(ST_DOUBLE_QUOTES);
-	return '"';
+	RETURN_TOKEN('"');
 }
 
 
@@ -2112,13 +2123,13 @@ inline_char_handler:
 
 	zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
 
-	return T_START_HEREDOC;
+	RETURN_TOKEN(T_START_HEREDOC);
 }
 
 
 <ST_IN_SCRIPTING>[`] {
 	BEGIN(ST_BACKQUOTE);
-	return '`';
+	RETURN_TOKEN('`');
 }
 
 
@@ -2132,7 +2143,7 @@ inline_char_handler:
 	efree(heredoc_label);
 
 	BEGIN(ST_IN_SCRIPTING);
-	return T_END_HEREDOC;
+	RETURN_TOKEN(T_END_HEREDOC);
 }
 
 
@@ -2140,18 +2151,18 @@ inline_char_handler:
 	Z_LVAL_P(zendlval) = (zend_long) '{';
 	yy_push_state(ST_IN_SCRIPTING);
 	yyless(1);
-	return T_CURLY_OPEN;
+	RETURN_TOKEN(T_CURLY_OPEN);
 }
 
 
 <ST_DOUBLE_QUOTES>["] {
 	BEGIN(ST_IN_SCRIPTING);
-	return '"';
+	RETURN_TOKEN('"');
 }
 
 <ST_BACKQUOTE>[`] {
 	BEGIN(ST_IN_SCRIPTING);
-	return '`';
+	RETURN_TOKEN('`');
 }
 
 
@@ -2164,7 +2175,7 @@ inline_char_handler:
 	}
 
 	if (YYCURSOR > YYLIMIT) {
-		return 0;
+		RETURN_TOKEN(END);
 	}
 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
 		YYCURSOR++;
@@ -2201,15 +2212,15 @@ double_quotes_scan_done:
 	yyleng = YYCURSOR - SCNG(yy_text);
 
 	if (zend_scan_escape_string(zendlval, yytext, yyleng, '"') == FAILURE) {
-		return T_ERROR;
+		RETURN_TOKEN(T_ERROR);
 	}
-	return T_ENCAPSED_AND_WHITESPACE;
+	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
 }
 
 
 <ST_BACKQUOTE>{ANY_CHAR} {
 	if (YYCURSOR > YYLIMIT) {
-		return 0;
+		RETURN_TOKEN(END);
 	}
 	if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
 		YYCURSOR++;
@@ -2245,9 +2256,9 @@ double_quotes_scan_done:
 	yyleng = YYCURSOR - SCNG(yy_text);
 
 	if (zend_scan_escape_string(zendlval, yytext, yyleng, '`') == FAILURE) {
-		return T_ERROR;
+		RETURN_TOKEN(T_ERROR);
 	}
-	return T_ENCAPSED_AND_WHITESPACE;
+	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
 }
 
 
@@ -2257,7 +2268,7 @@ double_quotes_scan_done:
 	zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
 
 	if (YYCURSOR > YYLIMIT) {
-		return 0;
+		RETURN_TOKEN(END);
 	}
 
 	YYCURSOR--;
@@ -2321,9 +2332,9 @@ heredoc_scan_done:
 	yyleng = YYCURSOR - SCNG(yy_text);
 
 	if (zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0) == FAILURE) {
-		return T_ERROR;
+		RETURN_TOKEN(T_ERROR);
 	}
-	return T_ENCAPSED_AND_WHITESPACE;
+	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
 }
 
 
@@ -2333,7 +2344,7 @@ heredoc_scan_done:
 	zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
 
 	if (YYCURSOR > YYLIMIT) {
-		return 0;
+		RETURN_TOKEN(END);
 	}
 
 	YYCURSOR--;
@@ -2380,13 +2391,13 @@ nowdoc_scan_done:
 
 	zend_copy_value(zendlval, yytext, yyleng - newline);
 	HANDLE_NEWLINES(yytext, yyleng - newline);
-	return T_ENCAPSED_AND_WHITESPACE;
+	RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
 }
 
 
 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
 	if (YYCURSOR > YYLIMIT) {
-		return 0;
+		RETURN_TOKEN(END);
 	}
 
 	zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) \
                state=%d", yytext[0], yytext[0], YYSTATE);
diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt \
b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt new file mode 100644
index 0000000..03b991b
--- /dev/null
+++ b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt
@@ -0,0 +1,19 @@
+--TEST--
+Parse errors during token_get_all() with TOKEN_PARSE flag
+--SKIPIF--
+<?php if (!extension_loaded("tokenizer")) print "skip"; ?>
+--FILE--
+<?php
+
+try {
+    token_get_all('<?php invalid code;', TOKEN_PARSE);
+} catch (ParseException $e) {
+    echo $e->getMessage(), PHP_EOL;
+}
+
+echo "Done";
+
+?>
+--EXPECT--
+syntax error, unexpected 'code' (T_STRING)
+Done
diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt \
b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt new file mode 100644
index 0000000..ab33435
--- /dev/null
+++ b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt
@@ -0,0 +1,81 @@
+--TEST--
+Semi reserved words support: member access
+--SKIPIF--
+<?php if (!extension_loaded("tokenizer")) print "skip"; ?>
+--FILE--
+<?php
+$tokens = token_get_all('<?php
+X::continue;
+X::$continue;
+$x->$continue;
+X::continue();
+$x->continue();
+X::class;
+
+class X {
+    const CONTINUE = 1;
+    public $x = self::CONTINUE + 1;
+}
+', TOKEN_PARSE);
+
+array_walk($tokens, function($tk) {
+  if(is_array($tk)) {
+    if(($t = token_name($tk[0])) == 'T_WHITESPACE') return;
+    echo "L{$tk[2]}: ".$t." {$tk[1]}", PHP_EOL;
+  }
+  else echo $tk, PHP_EOL;
+});
+
+echo "Done";
+
+?>
+--EXPECTF--
+L1: T_OPEN_TAG <?php
+
+L2: T_STRING X
+L2: T_DOUBLE_COLON ::
+L2: T_STRING continue
+;
+L3: T_STRING X
+L3: T_DOUBLE_COLON ::
+L3: T_VARIABLE $continue
+;
+L4: T_VARIABLE $x
+L4: T_OBJECT_OPERATOR ->
+L4: T_VARIABLE $continue
+;
+L5: T_STRING X
+L5: T_DOUBLE_COLON ::
+L5: T_STRING continue
+(
+)
+;
+L6: T_VARIABLE $x
+L6: T_OBJECT_OPERATOR ->
+L6: T_STRING continue
+(
+)
+;
+L7: T_STRING X
+L7: T_DOUBLE_COLON ::
+L7: T_CLASS class
+;
+L9: T_CLASS class
+L9: T_STRING X
+{
+L10: T_CONST const
+L10: T_STRING CONTINUE
+=
+L10: T_LNUMBER 1
+;
+L11: T_PUBLIC public
+L11: T_VARIABLE $x
+=
+L11: T_STRING self
+L11: T_DOUBLE_COLON ::
+L11: T_STRING CONTINUE
++
+L11: T_LNUMBER 1
+;
+}
+Done
diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt \
b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt new file mode 100644
index 0000000..3dd8e14
--- /dev/null
+++ b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt
@@ -0,0 +1,68 @@
+--TEST--
+Semi reserved words support: class const
+--SKIPIF--
+<?php if (!extension_loaded("tokenizer")) print "skip"; ?>
+--FILE--
+<?php
+$tokens = token_get_all('<?php
+  class SomeClass {
+      const CONST = 1;
+      const CONTINUE = (self::CONST + 1);
+      const ARRAY = [1, self::CONTINUE => [3, 4], 5];
+  }
+', TOKEN_PARSE);
+
+array_walk($tokens, function($tk) {
+  if(is_array($tk)) {
+    if(($t = token_name($tk[0])) == 'T_WHITESPACE') return;
+    echo "L{$tk[2]}: ".$t." {$tk[1]}", PHP_EOL;
+  }
+  else echo $tk, PHP_EOL;
+});
+
+echo "Done";
+
+?>
+--EXPECTF--
+L1: T_OPEN_TAG <?php
+
+L2: T_CLASS class
+L2: T_STRING SomeClass
+{
+L3: T_CONST const
+L3: T_STRING CONST
+=
+L3: T_LNUMBER 1
+;
+L4: T_CONST const
+L4: T_STRING CONTINUE
+=
+(
+L4: T_STRING self
+L4: T_DOUBLE_COLON ::
+L4: T_STRING CONST
++
+L4: T_LNUMBER 1
+)
+;
+L5: T_CONST const
+L5: T_STRING ARRAY
+=
+[
+L5: T_LNUMBER 1
+,
+L5: T_STRING self
+L5: T_DOUBLE_COLON ::
+L5: T_STRING CONTINUE
+L5: T_DOUBLE_ARROW =>
+[
+L5: T_LNUMBER 3
+,
+L5: T_LNUMBER 4
+]
+,
+L5: T_LNUMBER 5
+]
+;
+}
+Done
diff --git a/ext/tokenizer/tests/token_get_all_error.phpt \
b/ext/tokenizer/tests/token_get_all_error.phpt index 29e97c3..9ded0a1 100644
--- a/ext/tokenizer/tests/token_get_all_error.phpt
+++ b/ext/tokenizer/tests/token_get_all_error.phpt
@@ -19,7 +19,7 @@ var_dump( token_get_all());
 echo "-- Testing token_get_all() function with more than expected no. of arguments \
--\n";  $source = '<?php ?>';
 $extra_arg = 10;
-var_dump( token_get_all($source, $extra_arg));
+var_dump( token_get_all($source, true, $extra_arg));
 
 echo "Done"
 ?>
@@ -28,10 +28,10 @@ echo "Done"
 
 -- Testing token_get_all() function with zero arguments --
 
-Warning: token_get_all() expects exactly 1 parameter, 0 given in %s on line %d
+Warning: token_get_all() expects at least 1 parameter, 0 given in %s on line 11
 NULL
 -- Testing token_get_all() function with more than expected no. of arguments --
 
-Warning: token_get_all() expects exactly 1 parameter, 2 given in %s on line %d
+Warning: token_get_all() expects at most 2 parameters, 3 given in %s on line 17
 NULL
-Done
+Done
\ No newline at end of file
diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c
index c4b9d14..2a4fa90 100644
--- a/ext/tokenizer/tokenizer.c
+++ b/ext/tokenizer/tokenizer.c
@@ -37,6 +37,12 @@
 #define zendcursor LANG_SCNG(yy_cursor)
 #define zendlimit  LANG_SCNG(yy_limit)
 
+#define TOKEN_PARSE 				1
+
+void tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS) {
+	REGISTER_LONG_CONSTANT("TOKEN_PARSE", TOKEN_PARSE, CONST_CS|CONST_PERSISTENT);
+}
+
 /* {{{ arginfo */
 ZEND_BEGIN_ARG_INFO_EX(arginfo_token_get_all, 0, 0, 1)
 	ZEND_ARG_INFO(0, source)
@@ -83,6 +89,7 @@ ZEND_GET_MODULE(tokenizer)
 PHP_MINIT_FUNCTION(tokenizer)
 {
 	tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU);
+	tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS_PASSTHRU);
 	return SUCCESS;
 }
 /* }}} */
@@ -97,8 +104,10 @@ PHP_MINFO_FUNCTION(tokenizer)
 }
 /* }}} */
 
-static void tokenize(zval *return_value)
+static zend_bool tokenize(zval *return_value, zend_string *source)
 {
+	zval source_zval;
+	zend_lex_state original_lex_state;
 	zval token;
 	zval keyword;
 	int token_type;
@@ -106,10 +115,22 @@ static void tokenize(zval *return_value)
 	int token_line = 1;
 	int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */
 
+	ZVAL_STR_COPY(&source_zval, source);
+	zend_save_lexical_state(&original_lex_state);
+
+	if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) {
+		zend_restore_lexical_state(&original_lex_state);
+		return 0;
+	}
+
+	LANG_SCNG(yy_state) = yycINITIAL;
 	array_init(return_value);
 
 	ZVAL_NULL(&token);
 	while ((token_type = lex_scan(&token))) {
+
+		if(token_type == T_ERROR) break;
+
 		destroy = 1;
 		switch (token_type) {
 			case T_CLOSE_TAG:
@@ -123,8 +144,6 @@ static void tokenize(zval *return_value)
 			case T_DOC_COMMENT:
 				destroy = 0;
 				break;
-			case T_ERROR:
-				return;
 		}
 
 		if (token_type >= 256) {
@@ -169,34 +188,113 @@ static void tokenize(zval *return_value)
 
 		token_line = CG(zend_lineno);
 	}
+
+	zval_dtor(&source_zval);
+	zend_restore_lexical_state(&original_lex_state);
+
+	return 1;
 }
 
-/* {{{ proto array token_get_all(string source)
- */
-PHP_FUNCTION(token_get_all)
+zval token_stream;
+
+void on_event(zend_php_scanner_event event, int token, int line)
 {
-	zend_string *source;
-	zval source_zval;
-	zend_lex_state original_lex_state;
+	zval keyword;
+	HashTable *tokens_ht;
+	zval *token_zv;
 
-	if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &source) == FAILURE) {
-		return;
+	switch(event) {
+		case ON_TOKEN:
+			if (token == T_ERROR || token == END) break;
+			if (token >= 256) {
+				array_init(&keyword);
+				add_next_index_long(&keyword, token);
+				add_next_index_stringl(&keyword, (char *)LANG_SCNG(yy_text), \
LANG_SCNG(yy_leng)); +				add_next_index_long(&keyword, line);
+				add_next_index_zval(&token_stream, &keyword);
+			} else {
+				add_next_index_stringl(&token_stream, (char *)LANG_SCNG(yy_text), \
LANG_SCNG(yy_leng)); +			}
+			break;
+		case ON_FEEDBACK:
+			tokens_ht = Z_ARRVAL(token_stream);
+			token_zv = zend_hash_index_find(tokens_ht, zend_hash_num_elements(tokens_ht) - \
1); +			if (token_zv && Z_TYPE_P(token_zv) == IS_ARRAY) {
+				ZVAL_LONG(zend_hash_index_find(Z_ARRVAL_P(token_zv), 0), token);
+			}
+			break;
+		case ON_STOP:
+			if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) {
+				array_init(&keyword);
+				add_next_index_long(&keyword, T_INLINE_HTML);
+				add_next_index_stringl(&keyword,
+					(char *)LANG_SCNG(yy_cursor), LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor));
+				add_next_index_long(&keyword, CG(zend_lineno));
+				add_next_index_zval(&token_stream, &keyword);
+			}
+			break;
 	}
+}
+
+static zend_bool tokenize_parse(zval *return_value, zend_string *source)
+{
+	zval source_zval;
+	zend_lex_state original_lex_state;
+	zend_bool original_in_compilation;
+	zend_bool success;
 
 	ZVAL_STR_COPY(&source_zval, source);
+
+	original_in_compilation = CG(in_compilation);
+	CG(in_compilation) = 1;
 	zend_save_lexical_state(&original_lex_state);
 
-	if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) {
-		zend_restore_lexical_state(&original_lex_state);
-		RETURN_FALSE;
-	}
+	if ((success = (zend_prepare_string_for_scanning(&source_zval, "") == SUCCESS))) {
+		CG(ast) = NULL;
+		CG(ast_arena) = zend_arena_create(1024 * 32);
+		LANG_SCNG(yy_state) = yycINITIAL;
+		LANG_SCNG(on_event) = on_event;
 
-	LANG_SCNG(yy_state) = yycINITIAL;
+		array_init(&token_stream);
+		if((success = (zendparse() == SUCCESS))) {
+			ZVAL_ZVAL(return_value, &token_stream, 1, 0);
+		}
+		zval_dtor(&token_stream);
 
-	tokenize(return_value);
+		zend_ast_destroy(CG(ast));
+		zend_arena_destroy(CG(ast_arena));
+	}
 
+	/* restore compiler and scanner global states */
 	zend_restore_lexical_state(&original_lex_state);
+	CG(in_compilation) = original_in_compilation;
+
 	zval_dtor(&source_zval);
+
+	return success;
+}
+
+/* }}} */
+
+/* {{{ proto array token_get_all(string source)
+ */
+PHP_FUNCTION(token_get_all)
+{
+	zend_string *source;
+	zend_long flags = 0;
+	zend_bool success;
+
+	if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|l", &source, &flags) == FAILURE) {
+		return;
+	}
+
+	if (flags & TOKEN_PARSE) {
+		success = tokenize_parse(return_value, source);
+	} else {
+		success = tokenize(return_value, source);
+	}
+
+	if (!success) RETURN_FALSE;
 }
 /* }}} */



-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic