[prev in list] [next in list] [prev in thread] [next in thread]
List: php-cvs
Subject: [PHP-CVS] com php-src: ext tokenizer port + cleanup unused lexer states: =?UTF-8?Q?Zend/zend=5Fcompi
From: Bob Weinand <bwoebi () php ! net>
Date: 2015-04-30 6:03:29
Message-ID: php-mail-82b281e520fe74ea0cb506b18097e170123740241 () git ! php ! net
[Download RAW message or body]
Commit: 110759386e2f9b4d88bf68c669b6c54ad4b5c04f
Author: Márcio Almada <marcio3w@gmail.com> Sun, 5 Apr 2015 08:50:35 -0300
Parents: 02a9eb4f8c736089808b51d862def0e648383e09
Branches: master
Link: http://git.php.net/?p=php-src.git;a=commitdiff;h=110759386e2f9b4d88bf68c669b6c54ad4b5c04f
Log:
ext tokenizer port + cleanup unused lexer states
we basically added a mechanism to store the token stream during parsing
and exposed the entire parser stack on the tokenizer extension through
an opt in flag: token_get_all($src, TOKEN_PARSE).
this change allows easy future language enhancements regarding context
aware parsing & scanning without further maintance on the tokenizer
extension while solves known inconsistencies "parseless" tokenizer
extension has when it handles `__halt_compiler()` presence.
Changed paths:
M Zend/zend_compile.c
M Zend/zend_globals.h
M Zend/zend_language_parser.y
M Zend/zend_language_scanner.h
M Zend/zend_language_scanner.l
A ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt
A ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt
A ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt
M ext/tokenizer/tests/token_get_all_error.phpt
M ext/tokenizer/tokenizer.c
["diff_110759386e2f9b4d88bf68c669b6c54ad4b5c04f.txt" (text/plain)]
diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c
index c92a25a..2108103 100644
--- a/Zend/zend_compile.c
+++ b/Zend/zend_compile.c
@@ -30,7 +30,6 @@
#include "zend_interfaces.h"
#include "zend_virtual_cwd.h"
#include "zend_multibyte.h"
-#include "zend_language_scanner.h"
#include "zend_inheritance.h"
#define SET_NODE(target, src) do { \
@@ -568,7 +567,10 @@ static int zend_add_const_name_literal(zend_op_array *op_array, \
zend_string *nam op.constant = zend_add_literal(CG(active_op_array), &_c); \
} while (0)
-void zend_stop_lexing(void) {
+void zend_stop_lexing(void)
+{
+ if(LANG_SCNG(on_event)) LANG_SCNG(on_event)(ON_STOP, END, 0);
+
LANG_SCNG(yy_cursor) = LANG_SCNG(yy_limit);
}
diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h
index 326955a..28487a2 100644
--- a/Zend/zend_globals.h
+++ b/Zend/zend_globals.h
@@ -249,6 +249,12 @@ struct _zend_ini_scanner_globals {
int scanner_mode;
};
+typedef enum {
+ ON_TOKEN,
+ ON_FEEDBACK,
+ ON_STOP
+} zend_php_scanner_event;
+
struct _zend_php_scanner_globals {
zend_file_handle *yy_in;
zend_file_handle *yy_out;
@@ -278,6 +284,9 @@ struct _zend_php_scanner_globals {
/* initial string length after scanning to first variable */
int scanned_string_len;
+
+ /* hooks */
+ void (* on_event)(zend_php_scanner_event event, int token, int line);
};
#endif /* ZEND_GLOBALS_H */
diff --git a/Zend/zend_language_parser.y b/Zend/zend_language_parser.y
index cefcd0c..f6318ec 100644
--- a/Zend/zend_language_parser.y
+++ b/Zend/zend_language_parser.y
@@ -35,7 +35,7 @@
#include "zend_globals.h"
#include "zend_API.h"
#include "zend_constants.h"
-#include "zend_language_scanner_defs.h"
+#include "zend_language_scanner.h"
#define YYSIZE_T size_t
#define yytnamerr zend_yytnamerr
@@ -49,12 +49,6 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
#define YYFREE free
#endif
-#define REWIND { \
- zend_stack_push(&LANG_SCNG(state_stack), (void *) &LANG_SCNG(yy_state)); \
- LANG_SCNG(yy_state) = yycST_LOOKING_FOR_SEMI_RESERVED_NAME; \
- LANG_SCNG(yy_cursor) = (unsigned char*)LANG_SCNG(yy_text); \
- LANG_SCNG(yy_leng) = 0; }
-
%}
%pure_parser
@@ -290,7 +284,11 @@ semi_reserved:
identifier:
T_STRING { $$ = $1; }
- | /* if */ semi_reserved { REWIND } /* and rematch as */ T_STRING { $$ = $3; }
+ | semi_reserved {
+ zval zv;
+ zend_lex_tstring(&zv);
+ $$ = zend_ast_create_zval(&zv);
+ }
;
top_statement_list:
diff --git a/Zend/zend_language_scanner.h b/Zend/zend_language_scanner.h
index c82b306..3b75ff8 100644
--- a/Zend/zend_language_scanner.h
+++ b/Zend/zend_language_scanner.h
@@ -50,6 +50,9 @@ typedef struct _zend_lex_state {
zend_encoding_filter output_filter;
const zend_encoding *script_encoding;
+ /* hooks */
+ void (* on_event)(zend_php_scanner_event event, int token, int line);
+
zend_ast *ast;
zend_arena *ast_arena;
} zend_lex_state;
@@ -66,6 +69,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state \
*lex_state); ZEND_API int zend_prepare_string_for_scanning(zval *str, char \
*filename); ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter \
old_input_filter, const zend_encoding *old_encoding); ZEND_API int \
zend_multibyte_set_filter(const zend_encoding *onetime_encoding); +ZEND_API void \
zend_lex_tstring(zval *zv);
END_EXTERN_C()
diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l
index 2481af6..cde0621 100644
--- a/Zend/zend_language_scanner.l
+++ b/Zend/zend_language_scanner.l
@@ -193,6 +193,7 @@ void shutdown_scanner(void)
zend_stack_destroy(&SCNG(state_stack));
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) \
&heredoc_label_dtor, 1); zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
+ SCNG(on_event) = NULL;
}
ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
@@ -223,6 +224,8 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
lex_state->output_filter = SCNG(output_filter);
lex_state->script_encoding = SCNG(script_encoding);
+ lex_state->on_event = SCNG(on_event);
+
lex_state->ast = CG(ast);
lex_state->ast_arena = CG(ast_arena);
}
@@ -260,6 +263,8 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state \
*lex_state) SCNG(output_filter) = lex_state->output_filter;
SCNG(script_encoding) = lex_state->script_encoding;
+ SCNG(on_event) = lex_state->on_event;
+
CG(ast) = lex_state->ast;
CG(ast_arena) = lex_state->ast_arena;
@@ -276,6 +281,13 @@ ZEND_API void zend_destroy_file_handle(zend_file_handle \
*file_handle) }
}
+ZEND_API void zend_lex_tstring(zval *zv)
+{
+ if (SCNG(on_event)) SCNG(on_event)(ON_FEEDBACK, T_STRING, 0);
+
+ ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
+}
+
#define BOM_UTF32_BE "\x00\x00\xfe\xff"
#define BOM_UTF32_LE "\xff\xfe\x00\x00"
#define BOM_UTF16_BE "\xfe\xff"
@@ -1083,9 +1095,20 @@ static int zend_scan_escape_string(zval *zendlval, char *str, \
int len, char quot return SUCCESS;
}
+static zend_always_inline int emit_token(int token, int token_line)
+{
+ if(SCNG(on_event)) SCNG(on_event)(ON_TOKEN, token, token_line);
+
+ return token;
+}
+
+#define RETURN_TOKEN(token) return emit_token(token, start_line);
int lex_scan(zval *zendlval)
{
+
+int start_line = CG(zend_lineno);
+
restart:
SCNG(yy_text) = YYCURSOR;
@@ -1107,183 +1130,183 @@ NEWLINE ("\r"|"\n"|"\r\n")
<!*> := yyleng = YYCURSOR - SCNG(yy_text);
<ST_IN_SCRIPTING>"exit" {
- return T_EXIT;
+ RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
- return T_EXIT;
+ RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"function" {
- return T_FUNCTION;
+ RETURN_TOKEN(T_FUNCTION);
}
<ST_IN_SCRIPTING>"const" {
- return T_CONST;
+ RETURN_TOKEN(T_CONST);
}
<ST_IN_SCRIPTING>"return" {
- return T_RETURN;
+ RETURN_TOKEN(T_RETURN);
}
<ST_IN_SCRIPTING>"yield"{WHITESPACE}"from" {
- return T_YIELD_FROM;
+ RETURN_TOKEN(T_YIELD_FROM);
}
<ST_IN_SCRIPTING>"yield" {
- return T_YIELD;
+ RETURN_TOKEN(T_YIELD);
}
<ST_IN_SCRIPTING>"try" {
- return T_TRY;
+ RETURN_TOKEN(T_TRY);
}
<ST_IN_SCRIPTING>"catch" {
- return T_CATCH;
+ RETURN_TOKEN(T_CATCH);
}
<ST_IN_SCRIPTING>"finally" {
- return T_FINALLY;
+ RETURN_TOKEN(T_FINALLY);
}
<ST_IN_SCRIPTING>"throw" {
- return T_THROW;
+ RETURN_TOKEN(T_THROW);
}
<ST_IN_SCRIPTING>"if" {
- return T_IF;
+ RETURN_TOKEN(T_IF);
}
<ST_IN_SCRIPTING>"elseif" {
- return T_ELSEIF;
+ RETURN_TOKEN(T_ELSEIF);
}
<ST_IN_SCRIPTING>"endif" {
- return T_ENDIF;
+ RETURN_TOKEN(T_ENDIF);
}
<ST_IN_SCRIPTING>"else" {
- return T_ELSE;
+ RETURN_TOKEN(T_ELSE);
}
<ST_IN_SCRIPTING>"while" {
- return T_WHILE;
+ RETURN_TOKEN(T_WHILE);
}
<ST_IN_SCRIPTING>"endwhile" {
- return T_ENDWHILE;
+ RETURN_TOKEN(T_ENDWHILE);
}
<ST_IN_SCRIPTING>"do" {
- return T_DO;
+ RETURN_TOKEN(T_DO);
}
<ST_IN_SCRIPTING>"for" {
- return T_FOR;
+ RETURN_TOKEN(T_FOR);
}
<ST_IN_SCRIPTING>"endfor" {
- return T_ENDFOR;
+ RETURN_TOKEN(T_ENDFOR);
}
<ST_IN_SCRIPTING>"foreach" {
- return T_FOREACH;
+ RETURN_TOKEN(T_FOREACH);
}
<ST_IN_SCRIPTING>"endforeach" {
- return T_ENDFOREACH;
+ RETURN_TOKEN(T_ENDFOREACH);
}
<ST_IN_SCRIPTING>"declare" {
- return T_DECLARE;
+ RETURN_TOKEN(T_DECLARE);
}
<ST_IN_SCRIPTING>"enddeclare" {
- return T_ENDDECLARE;
+ RETURN_TOKEN(T_ENDDECLARE);
}
<ST_IN_SCRIPTING>"instanceof" {
- return T_INSTANCEOF;
+ RETURN_TOKEN(T_INSTANCEOF);
}
<ST_IN_SCRIPTING>"as" {
- return T_AS;
+ RETURN_TOKEN(T_AS);
}
<ST_IN_SCRIPTING>"switch" {
- return T_SWITCH;
+ RETURN_TOKEN(T_SWITCH);
}
<ST_IN_SCRIPTING>"endswitch" {
- return T_ENDSWITCH;
+ RETURN_TOKEN(T_ENDSWITCH);
}
<ST_IN_SCRIPTING>"case" {
- return T_CASE;
+ RETURN_TOKEN(T_CASE);
}
<ST_IN_SCRIPTING>"default" {
- return T_DEFAULT;
+ RETURN_TOKEN(T_DEFAULT);
}
<ST_IN_SCRIPTING>"break" {
- return T_BREAK;
+ RETURN_TOKEN(T_BREAK);
}
<ST_IN_SCRIPTING>"continue" {
- return T_CONTINUE;
+ RETURN_TOKEN(T_CONTINUE);
}
<ST_IN_SCRIPTING>"goto" {
- return T_GOTO;
+ RETURN_TOKEN(T_GOTO);
}
<ST_IN_SCRIPTING>"echo" {
- return T_ECHO;
+ RETURN_TOKEN(T_ECHO);
}
<ST_IN_SCRIPTING>"print" {
- return T_PRINT;
+ RETURN_TOKEN(T_PRINT);
}
<ST_IN_SCRIPTING>"class" {
- return T_CLASS;
+ RETURN_TOKEN(T_CLASS);
}
<ST_IN_SCRIPTING>"interface" {
- return T_INTERFACE;
+ RETURN_TOKEN(T_INTERFACE);
}
<ST_IN_SCRIPTING>"trait" {
- return T_TRAIT;
+ RETURN_TOKEN(T_TRAIT);
}
<ST_IN_SCRIPTING>"extends" {
- return T_EXTENDS;
+ RETURN_TOKEN(T_EXTENDS);
}
<ST_IN_SCRIPTING>"implements" {
- return T_IMPLEMENTS;
+ RETURN_TOKEN(T_IMPLEMENTS);
}
<ST_IN_SCRIPTING>"->" {
yy_push_state(ST_LOOKING_FOR_PROPERTY);
- return T_OBJECT_OPERATOR;
+ RETURN_TOKEN(T_OBJECT_OPERATOR);
}
-<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY,ST_LOOKING_FOR_SEMI_RESERVED_NAME>{WHITESPACE}+ \
{ +<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
HANDLE_NEWLINES(yytext, yyleng);
- return T_WHITESPACE;
+ RETURN_TOKEN(T_WHITESPACE);
}
<ST_LOOKING_FOR_PROPERTY>"->" {
- return T_OBJECT_OPERATOR;
+ RETURN_TOKEN(T_OBJECT_OPERATOR);
}
<ST_LOOKING_FOR_PROPERTY>{LABEL} {
yy_pop_state();
zend_copy_value(zendlval, yytext, yyleng);
- return T_STRING;
+ RETURN_TOKEN(T_STRING);
}
<ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
@@ -1293,283 +1316,283 @@ NEWLINE ("\r"|"\n"|"\r\n")
}
<ST_IN_SCRIPTING>"::" {
- return T_PAAMAYIM_NEKUDOTAYIM;
+ RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
}
<ST_IN_SCRIPTING>"\\" {
- return T_NS_SEPARATOR;
+ RETURN_TOKEN(T_NS_SEPARATOR);
}
<ST_IN_SCRIPTING>"..." {
- return T_ELLIPSIS;
+ RETURN_TOKEN(T_ELLIPSIS);
}
<ST_IN_SCRIPTING>"??" {
- return T_COALESCE;
+ RETURN_TOKEN(T_COALESCE);
}
<ST_IN_SCRIPTING>"new" {
- return T_NEW;
+ RETURN_TOKEN(T_NEW);
}
<ST_IN_SCRIPTING>"clone" {
- return T_CLONE;
+ RETURN_TOKEN(T_CLONE);
}
<ST_IN_SCRIPTING>"var" {
- return T_VAR;
+ RETURN_TOKEN(T_VAR);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
- return T_INT_CAST;
+ RETURN_TOKEN(T_INT_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
- return T_DOUBLE_CAST;
+ RETURN_TOKEN(T_DOUBLE_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
- return T_STRING_CAST;
+ RETURN_TOKEN(T_STRING_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
- return T_ARRAY_CAST;
+ RETURN_TOKEN(T_ARRAY_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
- return T_OBJECT_CAST;
+ RETURN_TOKEN(T_OBJECT_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
- return T_BOOL_CAST;
+ RETURN_TOKEN(T_BOOL_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
- return T_UNSET_CAST;
+ RETURN_TOKEN(T_UNSET_CAST);
}
<ST_IN_SCRIPTING>"eval" {
- return T_EVAL;
+ RETURN_TOKEN(T_EVAL);
}
<ST_IN_SCRIPTING>"include" {
- return T_INCLUDE;
+ RETURN_TOKEN(T_INCLUDE);
}
<ST_IN_SCRIPTING>"include_once" {
- return T_INCLUDE_ONCE;
+ RETURN_TOKEN(T_INCLUDE_ONCE);
}
<ST_IN_SCRIPTING>"require" {
- return T_REQUIRE;
+ RETURN_TOKEN(T_REQUIRE);
}
<ST_IN_SCRIPTING>"require_once" {
- return T_REQUIRE_ONCE;
+ RETURN_TOKEN(T_REQUIRE_ONCE);
}
<ST_IN_SCRIPTING>"namespace" {
- return T_NAMESPACE;
+ RETURN_TOKEN(T_NAMESPACE);
}
<ST_IN_SCRIPTING>"use" {
- return T_USE;
+ RETURN_TOKEN(T_USE);
}
<ST_IN_SCRIPTING>"insteadof" {
- return T_INSTEADOF;
+ RETURN_TOKEN(T_INSTEADOF);
}
<ST_IN_SCRIPTING>"global" {
- return T_GLOBAL;
+ RETURN_TOKEN(T_GLOBAL);
}
<ST_IN_SCRIPTING>"isset" {
- return T_ISSET;
+ RETURN_TOKEN(T_ISSET);
}
<ST_IN_SCRIPTING>"empty" {
- return T_EMPTY;
+ RETURN_TOKEN(T_EMPTY);
}
<ST_IN_SCRIPTING>"__halt_compiler" {
- return T_HALT_COMPILER;
+ RETURN_TOKEN(T_HALT_COMPILER);
}
<ST_IN_SCRIPTING>"static" {
- return T_STATIC;
+ RETURN_TOKEN(T_STATIC);
}
<ST_IN_SCRIPTING>"abstract" {
- return T_ABSTRACT;
+ RETURN_TOKEN(T_ABSTRACT);
}
<ST_IN_SCRIPTING>"final" {
- return T_FINAL;
+ RETURN_TOKEN(T_FINAL);
}
<ST_IN_SCRIPTING>"private" {
- return T_PRIVATE;
+ RETURN_TOKEN(T_PRIVATE);
}
<ST_IN_SCRIPTING>"protected" {
- return T_PROTECTED;
+ RETURN_TOKEN(T_PROTECTED);
}
<ST_IN_SCRIPTING>"public" {
- return T_PUBLIC;
+ RETURN_TOKEN(T_PUBLIC);
}
<ST_IN_SCRIPTING>"unset" {
- return T_UNSET;
+ RETURN_TOKEN(T_UNSET);
}
<ST_IN_SCRIPTING>"=>" {
- return T_DOUBLE_ARROW;
+ RETURN_TOKEN(T_DOUBLE_ARROW);
}
<ST_IN_SCRIPTING>"list" {
- return T_LIST;
+ RETURN_TOKEN(T_LIST);
}
<ST_IN_SCRIPTING>"array" {
- return T_ARRAY;
+ RETURN_TOKEN(T_ARRAY);
}
<ST_IN_SCRIPTING>"callable" {
- return T_CALLABLE;
+ RETURN_TOKEN(T_CALLABLE);
}
<ST_IN_SCRIPTING>"++" {
- return T_INC;
+ RETURN_TOKEN(T_INC);
}
<ST_IN_SCRIPTING>"--" {
- return T_DEC;
+ RETURN_TOKEN(T_DEC);
}
<ST_IN_SCRIPTING>"===" {
- return T_IS_IDENTICAL;
+ RETURN_TOKEN(T_IS_IDENTICAL);
}
<ST_IN_SCRIPTING>"!==" {
- return T_IS_NOT_IDENTICAL;
+ RETURN_TOKEN(T_IS_NOT_IDENTICAL);
}
<ST_IN_SCRIPTING>"==" {
- return T_IS_EQUAL;
+ RETURN_TOKEN(T_IS_EQUAL);
}
<ST_IN_SCRIPTING>"!="|"<>" {
- return T_IS_NOT_EQUAL;
+ RETURN_TOKEN(T_IS_NOT_EQUAL);
}
<ST_IN_SCRIPTING>"<=>" {
- return T_SPACESHIP;
+ RETURN_TOKEN(T_SPACESHIP);
}
<ST_IN_SCRIPTING>"<=" {
- return T_IS_SMALLER_OR_EQUAL;
+ RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
}
<ST_IN_SCRIPTING>">=" {
- return T_IS_GREATER_OR_EQUAL;
+ RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
}
<ST_IN_SCRIPTING>"+=" {
- return T_PLUS_EQUAL;
+ RETURN_TOKEN(T_PLUS_EQUAL);
}
<ST_IN_SCRIPTING>"-=" {
- return T_MINUS_EQUAL;
+ RETURN_TOKEN(T_MINUS_EQUAL);
}
<ST_IN_SCRIPTING>"*=" {
- return T_MUL_EQUAL;
+ RETURN_TOKEN(T_MUL_EQUAL);
}
<ST_IN_SCRIPTING>"*\*" {
- return T_POW;
+ RETURN_TOKEN(T_POW);
}
<ST_IN_SCRIPTING>"*\*=" {
- return T_POW_EQUAL;
+ RETURN_TOKEN(T_POW_EQUAL);
}
<ST_IN_SCRIPTING>"/=" {
- return T_DIV_EQUAL;
+ RETURN_TOKEN(T_DIV_EQUAL);
}
<ST_IN_SCRIPTING>".=" {
- return T_CONCAT_EQUAL;
+ RETURN_TOKEN(T_CONCAT_EQUAL);
}
<ST_IN_SCRIPTING>"%=" {
- return T_MOD_EQUAL;
+ RETURN_TOKEN(T_MOD_EQUAL);
}
<ST_IN_SCRIPTING>"<<=" {
- return T_SL_EQUAL;
+ RETURN_TOKEN(T_SL_EQUAL);
}
<ST_IN_SCRIPTING>">>=" {
- return T_SR_EQUAL;
+ RETURN_TOKEN(T_SR_EQUAL);
}
<ST_IN_SCRIPTING>"&=" {
- return T_AND_EQUAL;
+ RETURN_TOKEN(T_AND_EQUAL);
}
<ST_IN_SCRIPTING>"|=" {
- return T_OR_EQUAL;
+ RETURN_TOKEN(T_OR_EQUAL);
}
<ST_IN_SCRIPTING>"^=" {
- return T_XOR_EQUAL;
+ RETURN_TOKEN(T_XOR_EQUAL);
}
<ST_IN_SCRIPTING>"||" {
- return T_BOOLEAN_OR;
+ RETURN_TOKEN(T_BOOLEAN_OR);
}
<ST_IN_SCRIPTING>"&&" {
- return T_BOOLEAN_AND;
+ RETURN_TOKEN(T_BOOLEAN_AND);
}
<ST_IN_SCRIPTING>"OR" {
- return T_LOGICAL_OR;
+ RETURN_TOKEN(T_LOGICAL_OR);
}
<ST_IN_SCRIPTING>"AND" {
- return T_LOGICAL_AND;
+ RETURN_TOKEN(T_LOGICAL_AND);
}
<ST_IN_SCRIPTING>"XOR" {
- return T_LOGICAL_XOR;
+ RETURN_TOKEN(T_LOGICAL_XOR);
}
<ST_IN_SCRIPTING>"<<" {
- return T_SL;
+ RETURN_TOKEN(T_SL);
}
<ST_IN_SCRIPTING>">>" {
- return T_SR;
+ RETURN_TOKEN(T_SR);
}
<ST_IN_SCRIPTING>{TOKENS} {
- return yytext[0];
+ RETURN_TOKEN(yytext[0]);
}
<ST_IN_SCRIPTING>"{" {
yy_push_state(ST_IN_SCRIPTING);
- return '{';
+ RETURN_TOKEN('{');
}
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
- return T_DOLLAR_OPEN_CURLY_BRACES;
+ RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
@@ -1578,7 +1601,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
if (!zend_stack_is_empty(&SCNG(state_stack))) {
yy_pop_state();
}
- return '}';
+ RETURN_TOKEN('}');
}
@@ -1587,7 +1610,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
- return T_STRING_VARNAME;
+ RETURN_TOKEN(T_STRING_VARNAME);
}
@@ -1617,12 +1640,12 @@ NEWLINE ("\r"|"\n"|"\r\n")
ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
ZEND_ASSERT(!errno && end == yytext + yyleng);
}
- return T_LNUMBER;
+ RETURN_TOKEN(T_LNUMBER);
} else {
ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == yytext + yyleng);
- return T_DNUMBER;
+ RETURN_TOKEN(T_DNUMBER);
}
}
@@ -1636,7 +1659,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
*/
if (end != yytext + yyleng) {
zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", \
E_PARSE);
- return T_ERROR;
+ RETURN_TOKEN(T_ERROR);
}
} else {
errno = 0;
@@ -1653,19 +1676,19 @@ NEWLINE ("\r"|"\n"|"\r\n")
if (end != yytext + yyleng) {
zend_throw_exception(zend_get_parse_exception(),
"Invalid numeric literal", E_PARSE);
- return T_ERROR;
+ RETURN_TOKEN(T_ERROR);
}
ZEND_ASSERT(!errno);
- return T_DNUMBER;
+ RETURN_TOKEN(T_DNUMBER);
}
/* Also not an assert for the same reason */
if (end != yytext + yyleng) {
zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", \
E_PARSE);
- return T_ERROR;
+ RETURN_TOKEN(T_ERROR);
}
}
ZEND_ASSERT(!errno);
- return T_LNUMBER;
+ RETURN_TOKEN(T_LNUMBER);
}
<ST_IN_SCRIPTING>{HNUM} {
@@ -1687,12 +1710,12 @@ NEWLINE ("\r"|"\n"|"\r\n")
ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
ZEND_ASSERT(!errno && end == hex + len);
}
- return T_LNUMBER;
+ RETURN_TOKEN(T_LNUMBER);
} else {
ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == hex + len);
- return T_DNUMBER;
+ RETURN_TOKEN(T_DNUMBER);
}
}
@@ -1709,12 +1732,12 @@ NEWLINE ("\r"|"\n"|"\r\n")
string:
ZVAL_STRINGL(zendlval, yytext, yyleng);
}
- return T_NUM_STRING;
+ RETURN_TOKEN(T_NUM_STRING);
}
<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
ZVAL_STRINGL(zendlval, yytext, yyleng);
- return T_NUM_STRING;
+ RETURN_TOKEN(T_NUM_STRING);
}
<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
@@ -1723,59 +1746,59 @@ string:
ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == yytext + yyleng);
- return T_DNUMBER;
+ RETURN_TOKEN(T_DNUMBER);
}
<ST_IN_SCRIPTING>"__CLASS__" {
- return T_CLASS_C;
+ RETURN_TOKEN(T_CLASS_C);
}
<ST_IN_SCRIPTING>"__TRAIT__" {
- return T_TRAIT_C;
+ RETURN_TOKEN(T_TRAIT_C);
}
<ST_IN_SCRIPTING>"__FUNCTION__" {
- return T_FUNC_C;
+ RETURN_TOKEN(T_FUNC_C);
}
<ST_IN_SCRIPTING>"__METHOD__" {
- return T_METHOD_C;
+ RETURN_TOKEN(T_METHOD_C);
}
<ST_IN_SCRIPTING>"__LINE__" {
- return T_LINE;
+ RETURN_TOKEN(T_LINE);
}
<ST_IN_SCRIPTING>"__FILE__" {
- return T_FILE;
+ RETURN_TOKEN(T_FILE);
}
<ST_IN_SCRIPTING>"__DIR__" {
- return T_DIR;
+ RETURN_TOKEN(T_DIR);
}
<ST_IN_SCRIPTING>"__NAMESPACE__" {
- return T_NS_C;
+ RETURN_TOKEN(T_NS_C);
}
<INITIAL>"<?=" {
BEGIN(ST_IN_SCRIPTING);
- return T_OPEN_TAG_WITH_ECHO;
+ RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
}
<INITIAL>"<?php"([ \t]|{NEWLINE}) {
HANDLE_NEWLINE(yytext[yyleng-1]);
BEGIN(ST_IN_SCRIPTING);
- return T_OPEN_TAG;
+ RETURN_TOKEN(T_OPEN_TAG);
}
<INITIAL>"<?" {
if (CG(short_tags)) {
BEGIN(ST_IN_SCRIPTING);
- return T_OPEN_TAG;
+ RETURN_TOKEN(T_OPEN_TAG);
} else {
goto inline_char_handler;
}
@@ -1783,7 +1806,7 @@ string:
<INITIAL>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
- return 0;
+ RETURN_TOKEN(END);
}
inline_char_handler:
@@ -1823,7 +1846,7 @@ inline_char_handler:
ZVAL_STRINGL(zendlval, yytext, yyleng);
}
HANDLE_NEWLINES(yytext, yyleng);
- return T_INLINE_HTML;
+ RETURN_TOKEN(T_INLINE_HTML);
}
@@ -1834,7 +1857,7 @@ inline_char_handler:
yyless(yyleng - 3);
yy_push_state(ST_LOOKING_FOR_PROPERTY);
zend_copy_value(zendlval, (yytext+1), (yyleng-1));
- return T_VARIABLE;
+ RETURN_TOKEN(T_VARIABLE);
}
/* A [ always designates a variable offset, regardless of what follows
@@ -1843,22 +1866,22 @@ inline_char_handler:
yyless(yyleng - 1);
yy_push_state(ST_VAR_OFFSET);
zend_copy_value(zendlval, (yytext+1), (yyleng-1));
- return T_VARIABLE;
+ RETURN_TOKEN(T_VARIABLE);
}
<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
zend_copy_value(zendlval, (yytext+1), (yyleng-1));
- return T_VARIABLE;
+ RETURN_TOKEN(T_VARIABLE);
}
<ST_VAR_OFFSET>"]" {
yy_pop_state();
- return ']';
+ RETURN_TOKEN(']');
}
<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
/* Only '[' can be valid, but returning other tokens will allow a more explicit \
parse error */
- return yytext[0];
+ RETURN_TOKEN(yytext[0]);
}
<ST_VAR_OFFSET>[ \n\r\t\\'#] {
@@ -1866,16 +1889,16 @@ inline_char_handler:
yyless(0);
yy_pop_state();
ZVAL_NULL(zendlval);
- return T_ENCAPSED_AND_WHITESPACE;
+ RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
zend_copy_value(zendlval, yytext, yyleng);
- return T_STRING;
+ RETURN_TOKEN(T_STRING);
}
-<ST_IN_SCRIPTING,ST_LOOKING_FOR_SEMI_RESERVED_NAME>"#"|"//" {
+<ST_IN_SCRIPTING>"#"|"//" {
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '\r':
@@ -1901,10 +1924,10 @@ inline_char_handler:
yyleng = YYCURSOR - SCNG(yy_text);
- return T_COMMENT;
+ RETURN_TOKEN(T_COMMENT);
}
-<ST_IN_SCRIPTING,ST_LOOKING_FOR_SEMI_RESERVED_NAME>"/*"|"/**"{WHITESPACE} {
+<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
int doc_com;
if (yyleng > 2) {
@@ -1931,27 +1954,15 @@ inline_char_handler:
if (doc_com) {
CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
- return T_DOC_COMMENT;
+ RETURN_TOKEN(T_DOC_COMMENT);
}
- return T_COMMENT;
-}
-
-<ST_LOOKING_FOR_SEMI_RESERVED_NAME>{LABEL} {
- zend_copy_value(zendlval, yytext, yyleng);
- yy_pop_state();
- return T_STRING;
-}
-
-<ST_LOOKING_FOR_SEMI_RESERVED_NAME>{ANY_CHAR} {
- yyless(0);
- yy_pop_state();
- goto restart;
+ RETURN_TOKEN(T_COMMENT);
}
<ST_IN_SCRIPTING>"?>"{NEWLINE}? {
BEGIN(INITIAL);
- return T_CLOSE_TAG; /* implicit ';' at php-end tag */
+ RETURN_TOKEN(T_CLOSE_TAG); /* implicit ';' at php-end tag */
}
@@ -1977,7 +1988,7 @@ inline_char_handler:
* for ' (unrecognized by parser), instead of old flex fallback to "Unexpected \
character..."
* rule, which continued in ST_IN_SCRIPTING state after the quote */
ZVAL_NULL(zendlval);
- return T_ENCAPSED_AND_WHITESPACE;
+ RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}
}
@@ -2020,7 +2031,7 @@ inline_char_handler:
SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, \
(size_t)Z_STRLEN_P(zendlval)); ZVAL_STRINGL(zendlval, str, sz);
}
- return T_CONSTANT_ENCAPSED_STRING;
+ RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
}
@@ -2032,9 +2043,9 @@ inline_char_handler:
case '"':
yyleng = YYCURSOR - SCNG(yy_text);
if (zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"') \
== FAILURE) {
- return T_ERROR;
+ RETURN_TOKEN(T_ERROR);
}
- return T_CONSTANT_ENCAPSED_STRING;
+ RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
case '$':
if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
break;
@@ -2064,7 +2075,7 @@ inline_char_handler:
YYCURSOR = SCNG(yy_text) + yyleng;
BEGIN(ST_DOUBLE_QUOTES);
- return '"';
+ RETURN_TOKEN('"');
}
@@ -2112,13 +2123,13 @@ inline_char_handler:
zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
- return T_START_HEREDOC;
+ RETURN_TOKEN(T_START_HEREDOC);
}
<ST_IN_SCRIPTING>[`] {
BEGIN(ST_BACKQUOTE);
- return '`';
+ RETURN_TOKEN('`');
}
@@ -2132,7 +2143,7 @@ inline_char_handler:
efree(heredoc_label);
BEGIN(ST_IN_SCRIPTING);
- return T_END_HEREDOC;
+ RETURN_TOKEN(T_END_HEREDOC);
}
@@ -2140,18 +2151,18 @@ inline_char_handler:
Z_LVAL_P(zendlval) = (zend_long) '{';
yy_push_state(ST_IN_SCRIPTING);
yyless(1);
- return T_CURLY_OPEN;
+ RETURN_TOKEN(T_CURLY_OPEN);
}
<ST_DOUBLE_QUOTES>["] {
BEGIN(ST_IN_SCRIPTING);
- return '"';
+ RETURN_TOKEN('"');
}
<ST_BACKQUOTE>[`] {
BEGIN(ST_IN_SCRIPTING);
- return '`';
+ RETURN_TOKEN('`');
}
@@ -2164,7 +2175,7 @@ inline_char_handler:
}
if (YYCURSOR > YYLIMIT) {
- return 0;
+ RETURN_TOKEN(END);
}
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
@@ -2201,15 +2212,15 @@ double_quotes_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
if (zend_scan_escape_string(zendlval, yytext, yyleng, '"') == FAILURE) {
- return T_ERROR;
+ RETURN_TOKEN(T_ERROR);
}
- return T_ENCAPSED_AND_WHITESPACE;
+ RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}
<ST_BACKQUOTE>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
- return 0;
+ RETURN_TOKEN(END);
}
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
@@ -2245,9 +2256,9 @@ double_quotes_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
if (zend_scan_escape_string(zendlval, yytext, yyleng, '`') == FAILURE) {
- return T_ERROR;
+ RETURN_TOKEN(T_ERROR);
}
- return T_ENCAPSED_AND_WHITESPACE;
+ RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}
@@ -2257,7 +2268,7 @@ double_quotes_scan_done:
zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
if (YYCURSOR > YYLIMIT) {
- return 0;
+ RETURN_TOKEN(END);
}
YYCURSOR--;
@@ -2321,9 +2332,9 @@ heredoc_scan_done:
yyleng = YYCURSOR - SCNG(yy_text);
if (zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0) == FAILURE) {
- return T_ERROR;
+ RETURN_TOKEN(T_ERROR);
}
- return T_ENCAPSED_AND_WHITESPACE;
+ RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}
@@ -2333,7 +2344,7 @@ heredoc_scan_done:
zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
if (YYCURSOR > YYLIMIT) {
- return 0;
+ RETURN_TOKEN(END);
}
YYCURSOR--;
@@ -2380,13 +2391,13 @@ nowdoc_scan_done:
zend_copy_value(zendlval, yytext, yyleng - newline);
HANDLE_NEWLINES(yytext, yyleng - newline);
- return T_ENCAPSED_AND_WHITESPACE;
+ RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
- return 0;
+ RETURN_TOKEN(END);
}
zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) \
state=%d", yytext[0], yytext[0], YYSTATE);
diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt \
b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt new file mode 100644
index 0000000..03b991b
--- /dev/null
+++ b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt
@@ -0,0 +1,19 @@
+--TEST--
+Parse errors during token_get_all() with TOKEN_PARSE flag
+--SKIPIF--
+<?php if (!extension_loaded("tokenizer")) print "skip"; ?>
+--FILE--
+<?php
+
+try {
+ token_get_all('<?php invalid code;', TOKEN_PARSE);
+} catch (ParseException $e) {
+ echo $e->getMessage(), PHP_EOL;
+}
+
+echo "Done";
+
+?>
+--EXPECT--
+syntax error, unexpected 'code' (T_STRING)
+Done
diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt \
b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt new file mode 100644
index 0000000..ab33435
--- /dev/null
+++ b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt
@@ -0,0 +1,81 @@
+--TEST--
+Semi reserved words support: member access
+--SKIPIF--
+<?php if (!extension_loaded("tokenizer")) print "skip"; ?>
+--FILE--
+<?php
+$tokens = token_get_all('<?php
+X::continue;
+X::$continue;
+$x->$continue;
+X::continue();
+$x->continue();
+X::class;
+
+class X {
+ const CONTINUE = 1;
+ public $x = self::CONTINUE + 1;
+}
+', TOKEN_PARSE);
+
+array_walk($tokens, function($tk) {
+ if(is_array($tk)) {
+ if(($t = token_name($tk[0])) == 'T_WHITESPACE') return;
+ echo "L{$tk[2]}: ".$t." {$tk[1]}", PHP_EOL;
+ }
+ else echo $tk, PHP_EOL;
+});
+
+echo "Done";
+
+?>
+--EXPECTF--
+L1: T_OPEN_TAG <?php
+
+L2: T_STRING X
+L2: T_DOUBLE_COLON ::
+L2: T_STRING continue
+;
+L3: T_STRING X
+L3: T_DOUBLE_COLON ::
+L3: T_VARIABLE $continue
+;
+L4: T_VARIABLE $x
+L4: T_OBJECT_OPERATOR ->
+L4: T_VARIABLE $continue
+;
+L5: T_STRING X
+L5: T_DOUBLE_COLON ::
+L5: T_STRING continue
+(
+)
+;
+L6: T_VARIABLE $x
+L6: T_OBJECT_OPERATOR ->
+L6: T_STRING continue
+(
+)
+;
+L7: T_STRING X
+L7: T_DOUBLE_COLON ::
+L7: T_CLASS class
+;
+L9: T_CLASS class
+L9: T_STRING X
+{
+L10: T_CONST const
+L10: T_STRING CONTINUE
+=
+L10: T_LNUMBER 1
+;
+L11: T_PUBLIC public
+L11: T_VARIABLE $x
+=
+L11: T_STRING self
+L11: T_DOUBLE_COLON ::
+L11: T_STRING CONTINUE
++
+L11: T_LNUMBER 1
+;
+}
+Done
diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt \
b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt new file mode 100644
index 0000000..3dd8e14
--- /dev/null
+++ b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt
@@ -0,0 +1,68 @@
+--TEST--
+Semi reserved words support: class const
+--SKIPIF--
+<?php if (!extension_loaded("tokenizer")) print "skip"; ?>
+--FILE--
+<?php
+$tokens = token_get_all('<?php
+ class SomeClass {
+ const CONST = 1;
+ const CONTINUE = (self::CONST + 1);
+ const ARRAY = [1, self::CONTINUE => [3, 4], 5];
+ }
+', TOKEN_PARSE);
+
+array_walk($tokens, function($tk) {
+ if(is_array($tk)) {
+ if(($t = token_name($tk[0])) == 'T_WHITESPACE') return;
+ echo "L{$tk[2]}: ".$t." {$tk[1]}", PHP_EOL;
+ }
+ else echo $tk, PHP_EOL;
+});
+
+echo "Done";
+
+?>
+--EXPECTF--
+L1: T_OPEN_TAG <?php
+
+L2: T_CLASS class
+L2: T_STRING SomeClass
+{
+L3: T_CONST const
+L3: T_STRING CONST
+=
+L3: T_LNUMBER 1
+;
+L4: T_CONST const
+L4: T_STRING CONTINUE
+=
+(
+L4: T_STRING self
+L4: T_DOUBLE_COLON ::
+L4: T_STRING CONST
++
+L4: T_LNUMBER 1
+)
+;
+L5: T_CONST const
+L5: T_STRING ARRAY
+=
+[
+L5: T_LNUMBER 1
+,
+L5: T_STRING self
+L5: T_DOUBLE_COLON ::
+L5: T_STRING CONTINUE
+L5: T_DOUBLE_ARROW =>
+[
+L5: T_LNUMBER 3
+,
+L5: T_LNUMBER 4
+]
+,
+L5: T_LNUMBER 5
+]
+;
+}
+Done
diff --git a/ext/tokenizer/tests/token_get_all_error.phpt \
b/ext/tokenizer/tests/token_get_all_error.phpt index 29e97c3..9ded0a1 100644
--- a/ext/tokenizer/tests/token_get_all_error.phpt
+++ b/ext/tokenizer/tests/token_get_all_error.phpt
@@ -19,7 +19,7 @@ var_dump( token_get_all());
echo "-- Testing token_get_all() function with more than expected no. of arguments \
--\n"; $source = '<?php ?>';
$extra_arg = 10;
-var_dump( token_get_all($source, $extra_arg));
+var_dump( token_get_all($source, true, $extra_arg));
echo "Done"
?>
@@ -28,10 +28,10 @@ echo "Done"
-- Testing token_get_all() function with zero arguments --
-Warning: token_get_all() expects exactly 1 parameter, 0 given in %s on line %d
+Warning: token_get_all() expects at least 1 parameter, 0 given in %s on line 11
NULL
-- Testing token_get_all() function with more than expected no. of arguments --
-Warning: token_get_all() expects exactly 1 parameter, 2 given in %s on line %d
+Warning: token_get_all() expects at most 2 parameters, 3 given in %s on line 17
NULL
-Done
+Done
\ No newline at end of file
diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c
index c4b9d14..2a4fa90 100644
--- a/ext/tokenizer/tokenizer.c
+++ b/ext/tokenizer/tokenizer.c
@@ -37,6 +37,12 @@
#define zendcursor LANG_SCNG(yy_cursor)
#define zendlimit LANG_SCNG(yy_limit)
+#define TOKEN_PARSE 1
+
+void tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS) {
+ REGISTER_LONG_CONSTANT("TOKEN_PARSE", TOKEN_PARSE, CONST_CS|CONST_PERSISTENT);
+}
+
/* {{{ arginfo */
ZEND_BEGIN_ARG_INFO_EX(arginfo_token_get_all, 0, 0, 1)
ZEND_ARG_INFO(0, source)
@@ -83,6 +89,7 @@ ZEND_GET_MODULE(tokenizer)
PHP_MINIT_FUNCTION(tokenizer)
{
tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU);
+ tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS_PASSTHRU);
return SUCCESS;
}
/* }}} */
@@ -97,8 +104,10 @@ PHP_MINFO_FUNCTION(tokenizer)
}
/* }}} */
-static void tokenize(zval *return_value)
+static zend_bool tokenize(zval *return_value, zend_string *source)
{
+ zval source_zval;
+ zend_lex_state original_lex_state;
zval token;
zval keyword;
int token_type;
@@ -106,10 +115,22 @@ static void tokenize(zval *return_value)
int token_line = 1;
int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */
+ ZVAL_STR_COPY(&source_zval, source);
+ zend_save_lexical_state(&original_lex_state);
+
+ if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) {
+ zend_restore_lexical_state(&original_lex_state);
+ return 0;
+ }
+
+ LANG_SCNG(yy_state) = yycINITIAL;
array_init(return_value);
ZVAL_NULL(&token);
while ((token_type = lex_scan(&token))) {
+
+ if(token_type == T_ERROR) break;
+
destroy = 1;
switch (token_type) {
case T_CLOSE_TAG:
@@ -123,8 +144,6 @@ static void tokenize(zval *return_value)
case T_DOC_COMMENT:
destroy = 0;
break;
- case T_ERROR:
- return;
}
if (token_type >= 256) {
@@ -169,34 +188,113 @@ static void tokenize(zval *return_value)
token_line = CG(zend_lineno);
}
+
+ zval_dtor(&source_zval);
+ zend_restore_lexical_state(&original_lex_state);
+
+ return 1;
}
-/* {{{ proto array token_get_all(string source)
- */
-PHP_FUNCTION(token_get_all)
+zval token_stream;
+
+void on_event(zend_php_scanner_event event, int token, int line)
{
- zend_string *source;
- zval source_zval;
- zend_lex_state original_lex_state;
+ zval keyword;
+ HashTable *tokens_ht;
+ zval *token_zv;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &source) == FAILURE) {
- return;
+ switch(event) {
+ case ON_TOKEN:
+ if (token == T_ERROR || token == END) break;
+ if (token >= 256) {
+ array_init(&keyword);
+ add_next_index_long(&keyword, token);
+ add_next_index_stringl(&keyword, (char *)LANG_SCNG(yy_text), \
LANG_SCNG(yy_leng)); + add_next_index_long(&keyword, line);
+ add_next_index_zval(&token_stream, &keyword);
+ } else {
+ add_next_index_stringl(&token_stream, (char *)LANG_SCNG(yy_text), \
LANG_SCNG(yy_leng)); + }
+ break;
+ case ON_FEEDBACK:
+ tokens_ht = Z_ARRVAL(token_stream);
+ token_zv = zend_hash_index_find(tokens_ht, zend_hash_num_elements(tokens_ht) - \
1); + if (token_zv && Z_TYPE_P(token_zv) == IS_ARRAY) {
+ ZVAL_LONG(zend_hash_index_find(Z_ARRVAL_P(token_zv), 0), token);
+ }
+ break;
+ case ON_STOP:
+ if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) {
+ array_init(&keyword);
+ add_next_index_long(&keyword, T_INLINE_HTML);
+ add_next_index_stringl(&keyword,
+ (char *)LANG_SCNG(yy_cursor), LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor));
+ add_next_index_long(&keyword, CG(zend_lineno));
+ add_next_index_zval(&token_stream, &keyword);
+ }
+ break;
}
+}
+
+static zend_bool tokenize_parse(zval *return_value, zend_string *source)
+{
+ zval source_zval;
+ zend_lex_state original_lex_state;
+ zend_bool original_in_compilation;
+ zend_bool success;
ZVAL_STR_COPY(&source_zval, source);
+
+ original_in_compilation = CG(in_compilation);
+ CG(in_compilation) = 1;
zend_save_lexical_state(&original_lex_state);
- if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) {
- zend_restore_lexical_state(&original_lex_state);
- RETURN_FALSE;
- }
+ if ((success = (zend_prepare_string_for_scanning(&source_zval, "") == SUCCESS))) {
+ CG(ast) = NULL;
+ CG(ast_arena) = zend_arena_create(1024 * 32);
+ LANG_SCNG(yy_state) = yycINITIAL;
+ LANG_SCNG(on_event) = on_event;
- LANG_SCNG(yy_state) = yycINITIAL;
+ array_init(&token_stream);
+ if((success = (zendparse() == SUCCESS))) {
+ ZVAL_ZVAL(return_value, &token_stream, 1, 0);
+ }
+ zval_dtor(&token_stream);
- tokenize(return_value);
+ zend_ast_destroy(CG(ast));
+ zend_arena_destroy(CG(ast_arena));
+ }
+ /* restore compiler and scanner global states */
zend_restore_lexical_state(&original_lex_state);
+ CG(in_compilation) = original_in_compilation;
+
zval_dtor(&source_zval);
+
+ return success;
+}
+
+/* }}} */
+
+/* {{{ proto array token_get_all(string source)
+ */
+PHP_FUNCTION(token_get_all)
+{
+ zend_string *source;
+ zend_long flags = 0;
+ zend_bool success;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|l", &source, &flags) == FAILURE) {
+ return;
+ }
+
+ if (flags & TOKEN_PARSE) {
+ success = tokenize_parse(return_value, source);
+ } else {
+ success = tokenize(return_value, source);
+ }
+
+ if (!success) RETURN_FALSE;
}
/* }}} */
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic