[prev in list] [next in list] [prev in thread] [next in thread]
List: jakarta-oro-dev
Subject: cvs commit: jakarta-oro/src/java/org/apache/oro/text/regex OpCode.java Perl5Compiler.java Perl5Debug
From: dfs () apache ! org
Date: 2001-03-30 5:39:03
[Download RAW message or body]
dfs 01/03/29 21:39:03
Modified: src/java/org/apache/oro/text/regex OpCode.java
Perl5Compiler.java Perl5Debug.java
Perl5Matcher.java
Log:
Changed WORD opcode back to ALNUM and the new ALNUM to ALNUMC to match
Perl's nomenclature. Accidentally reformatted some of Perl5Compiler.java
using Emacs indent region, producing a spurious diff.
Revision Changes Path
1.6 +9 -9 jakarta-oro/src/java/org/apache/oro/text/regex/OpCode.java
Index: OpCode.java
===================================================================
RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/regex/OpCode.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- OpCode.java 2001/03/29 16:33:17 1.5
+++ OpCode.java 2001/03/30 05:39:01 1.6
@@ -63,7 +63,7 @@
* op-codes used in a compiled regular expression.
@author <a href="mailto:dfs@savarese.org">Daniel F. Savarese</a>
- @version $Id: OpCode.java,v 1.5 2001/03/29 16:33:17 dfs Exp $
+ @version $Id: OpCode.java,v 1.6 2001/03/30 05:39:01 dfs Exp $
*/
final class OpCode {
@@ -91,8 +91,8 @@
_NOTHING = 15, // no Match empty string.
_STAR = 16, // yes Match this (simple) thing 0 or more times.
_PLUS = 17, // yes Match this (simple) thing 1 or more times.
- _WORD = 18, // no Match any word character
- _NWORD = 19, // no Match any non-word character
+ _ALNUM = 18, // no Match any word character
+ _NALNUM = 19, // no Match any non-word character
_BOUND = 20, // no Match "" at any word boundary
_NBOUND = 21, // no Match "" at any word non-boundary
_SPACE = 22, // no Match any whitespace character
@@ -125,7 +125,7 @@
_OPCODE = 47,
_NOPCODE = 48,
_ONECHAR = 49,
- _ALNUM = 50,
+ _ALNUMC = 50,
_ASCII = 51;
// Lengths of the various operands.
@@ -140,12 +140,12 @@
static final char _opType[] = {
_END, _BOL, _BOL, _BOL, _EOL, _EOL, _EOL, _ANY, _ANY, _ANYOF, _CURLY,
- _CURLY, _BRANCH, _BACK, _EXACTLY, _NOTHING, _STAR, _PLUS, _WORD,
- _NWORD, _BOUND, _NBOUND, _SPACE, _NSPACE, _DIGIT, _NDIGIT, _REF,
+ _CURLY, _BRANCH, _BACK, _EXACTLY, _NOTHING, _STAR, _PLUS, _ALNUM,
+ _NALNUM, _BOUND, _NBOUND, _SPACE, _NSPACE, _DIGIT, _NDIGIT, _REF,
_OPEN, _CLOSE, _MINMOD, _BOL, _BRANCH, _BRANCH, _END, _WHILEM,
_ANYOFUN, _NANYOFUN, _RANGE, _ALPHA, _BLANK, _CNTRL, _GRAPH,
_LOWER, _PRINT, _PUNCT, _UPPER, _XDIGIT, _OPCODE, _NOPCODE,
- _ONECHAR, _ALNUM, _ASCII
+ _ONECHAR, _ALNUMC, _ASCII
};
static final char _opLengthVaries[] = {
@@ -153,9 +153,9 @@
};
static final char _opLengthOne[] = {
- _ANY, _SANY, _ANYOF, _WORD, _NWORD, _SPACE, _NSPACE, _DIGIT, _NDIGIT,
+ _ANY, _SANY, _ANYOF, _ALNUM, _NALNUM, _SPACE, _NSPACE, _DIGIT, _NDIGIT,
_ANYOFUN, _NANYOFUN, _ALPHA, _BLANK, _CNTRL, _GRAPH, _LOWER, _PRINT,
- _PUNCT, _UPPER, _XDIGIT, _OPCODE, _NOPCODE, _ONECHAR, _ALNUM,
+ _PUNCT, _UPPER, _XDIGIT, _OPCODE, _NOPCODE, _ONECHAR, _ALNUMC,
_ASCII
};
1.9 +149 -145 jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Compiler.java
Index: Perl5Compiler.java
===================================================================
RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Compiler.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- Perl5Compiler.java 2001/03/29 16:33:17 1.8
+++ Perl5Compiler.java 2001/03/30 05:39:02 1.9
@@ -67,7 +67,7 @@
* information about Perl5 regular expressions.
@author <a href="mailto:dfs@savarese.org">Daniel F. Savarese</a>
- @version $Id: Perl5Compiler.java,v 1.8 2001/03/29 16:33:17 dfs Exp $
+ @version $Id: Perl5Compiler.java,v 1.9 2001/03/30 05:39:02 dfs Exp $
* @see PatternCompiler
* @see MalformedPatternException
@@ -110,8 +110,8 @@
static {
__hashPOSIX = new HashMap();
- __hashPOSIX.put("alnum", new Character(OpCode._ALNUM));
- __hashPOSIX.put("word", new Character(OpCode._WORD));
+ __hashPOSIX.put("alnum", new Character(OpCode._ALNUMC));
+ __hashPOSIX.put("word", new Character(OpCode._ALNUM));
__hashPOSIX.put("alpha", new Character(OpCode._ALPHA));
__hashPOSIX.put("blank", new Character(OpCode._BLANK));
__hashPOSIX.put("cntrl", new Character(OpCode._CNTRL));
@@ -644,12 +644,12 @@
__getNextChar();
break;
case 'w':
- offset = __emitNode(OpCode._WORD);
+ offset = __emitNode(OpCode._ALNUM);
retFlags[0] |= (__NONNULL | __SIMPLE);
__getNextChar();
break;
case 'W':
- offset = __emitNode(OpCode._NWORD);
+ offset = __emitNode(OpCode._NALNUM);
retFlags[0] |= (__NONNULL | __SIMPLE);
__getNextChar();
break;
@@ -1109,7 +1109,7 @@
char clss, lastclss = Character.MAX_VALUE;
int offset, numLength[] = { 0 };
- boolean negFlag[] = new boolean[1];
+ boolean negFlag[] = { false };
boolean opcodeFlag; /* clss isn't character when this flag true. */
if(__input._getValue() == '^') {
@@ -1127,153 +1127,156 @@
skipTest = false;
while((!__input._isAtEnd() && (clss = __input._getValue()) != ']')
- || skipTest) {
- // It sucks, but we have to make this assignment every time
- skipTest = false;
- opcodeFlag = false;
- __input._increment();
+ || skipTest)
+ {
+ // It sucks, but we have to make this assignment every time
+ skipTest = false;
+ opcodeFlag = false;
+ __input._increment();
- if(clss == '\\' || clss == '[') {
- if(clss == '\\') {
- /* character is escaped */
- clss = __input._postIncrement();
- } else {
- /* try POSIX expression */
- char posixOpCode = __parsePOSIX(negFlag);
- if(posixOpCode != 0){
- opcodeFlag = true;
- clss = posixOpCode;
- }
- }
- if (opcodeFlag != true) {
- switch(clss){
- case 'w':
- opcodeFlag = true;
- clss = OpCode._WORD;
- lastclss = Character.MAX_VALUE;
- break;
- case 'W':
- opcodeFlag = true;
- clss = OpCode._NWORD;
- lastclss = Character.MAX_VALUE;
- break;
- case 's':
- opcodeFlag = true;
- clss = OpCode._SPACE;
- lastclss = Character.MAX_VALUE;
- break;
- case 'S':
- opcodeFlag = true;
- clss = OpCode._NSPACE;
- lastclss = Character.MAX_VALUE;
- break;
- case 'd':
- opcodeFlag = true;
- clss = OpCode._DIGIT;
- lastclss = Character.MAX_VALUE;
- break;
- case 'D':
- opcodeFlag = true;
- clss = OpCode._NDIGIT;
- lastclss = Character.MAX_VALUE;
- break;
- case 'n':
- clss = '\n';
- break;
- case 'r':
- clss = '\r';
- break;
- case 't':
- clss = '\t';
- break;
- case 'f':
- clss = '\f';
- break;
- case 'b':
- clss = '\b';
- break;
- case 'e':
- clss = '\033';
- break;
- case 'a':
- clss = '\007';
- break;
- case 'x':
- clss = (char)__parseHex(__input._array, __input._getOffset(), 2,
- numLength);
- __input._increment(numLength[0]);
- break;
- case 'c':
+ if(clss == '\\' || clss == '[') {
+ if(clss == '\\') {
+ /* character is escaped */
clss = __input._postIncrement();
- if(Character.isLowerCase(clss))
- clss = Character.toUpperCase(clss);
- clss ^= 64;
- break;
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- clss = (char)__parseOctal(__input._array, __input._getOffset() - 1,
- 3, numLength);
- __input._increment(numLength[0] - 1);
- break;
- default:
- break;
- }
+ } else {
+ /* try POSIX expression */
+ char posixOpCode = __parsePOSIX(negFlag);
+ if(posixOpCode != 0){
+ opcodeFlag = true;
+ clss = posixOpCode;
+ }
+ }
+ if (opcodeFlag != true) {
+ switch(clss){
+ case 'w':
+ opcodeFlag = true;
+ clss = OpCode._ALNUM;
+ lastclss = Character.MAX_VALUE;
+ break;
+ case 'W':
+ opcodeFlag = true;
+ clss = OpCode._NALNUM;
+ lastclss = Character.MAX_VALUE;
+ break;
+ case 's':
+ opcodeFlag = true;
+ clss = OpCode._SPACE;
+ lastclss = Character.MAX_VALUE;
+ break;
+ case 'S':
+ opcodeFlag = true;
+ clss = OpCode._NSPACE;
+ lastclss = Character.MAX_VALUE;
+ break;
+ case 'd':
+ opcodeFlag = true;
+ clss = OpCode._DIGIT;
+ lastclss = Character.MAX_VALUE;
+ break;
+ case 'D':
+ opcodeFlag = true;
+ clss = OpCode._NDIGIT;
+ lastclss = Character.MAX_VALUE;
+ break;
+ case 'n':
+ clss = '\n';
+ break;
+ case 'r':
+ clss = '\r';
+ break;
+ case 't':
+ clss = '\t';
+ break;
+ case 'f':
+ clss = '\f';
+ break;
+ case 'b':
+ clss = '\b';
+ break;
+ case 'e':
+ clss = '\033';
+ break;
+ case 'a':
+ clss = '\007';
+ break;
+ case 'x':
+ clss = (char)__parseHex(__input._array, __input._getOffset(), 2,
+ numLength);
+ __input._increment(numLength[0]);
+ break;
+ case 'c':
+ clss = __input._postIncrement();
+ if(Character.isLowerCase(clss))
+ clss = Character.toUpperCase(clss);
+ clss ^= 64;
+ break;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ clss =
+ (char)__parseOctal(__input._array,
+ __input._getOffset() - 1, 3, numLength);
+ __input._increment(numLength[0] - 1);
+ break;
+ default:
+ break;
+ }
+ }
}
- }
- if(range) {
- if(lastclss > clss)
- throw new MalformedPatternException(
- "Invalid [] range in expression.");
- range = false;
- } else {
- lastclss = clss;
+ if(range) {
+ if(lastclss > clss)
+ throw new MalformedPatternException(
+ "Invalid [] range in expression.");
+ range = false;
+ } else {
+ lastclss = clss;
- if(__input._getValue() == '-' &&
- __input._getOffset() + 1 < __input._getLength() &&
- __input._getValueRelative(1) != ']') {
- __input._increment();
- range = true;
- continue;
+ if(__input._getValue() == '-' &&
+ __input._getOffset() + 1 < __input._getLength() &&
+ __input._getValueRelative(1) != ']') {
+ __input._increment();
+ range = true;
+ continue;
+ }
}
- }
- if(lastclss == clss) {
- if(opcodeFlag == true) {
- if(negFlag[0] == false)
- __emitCode(OpCode._OPCODE);
- else
- __emitCode(OpCode._NOPCODE);
- } else {
- __emitCode(OpCode._ONECHAR);
- }
- __emitCode(clss);
+ if(lastclss == clss) {
+ if(opcodeFlag == true) {
+ if(negFlag[0] == false)
+ __emitCode(OpCode._OPCODE);
+ else
+ __emitCode(OpCode._NOPCODE);
+ } else
+ __emitCode(OpCode._ONECHAR);
+
+ __emitCode(clss);
+
+ if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&
+ Character.isUpperCase(clss) && Character.isUpperCase(lastclss)){
+ __programSize--;
+ __emitCode(Character.toLowerCase(clss));
+ }
+ }
- if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&
- Character.isUpperCase(clss) && Character.isUpperCase(lastclss)){
- __programSize--;
- __emitCode(Character.toLowerCase(clss));
- }
- }
- if(lastclss < clss) {
- __emitCode(OpCode._RANGE);
- __emitCode(lastclss);
- __emitCode(clss);
-
- if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&
- Character.isUpperCase(clss) && Character.isUpperCase(lastclss)){
- __programSize-=2;
- __emitCode(Character.toLowerCase(lastclss));
- __emitCode(Character.toLowerCase(clss));
+ if(lastclss < clss) {
+ __emitCode(OpCode._RANGE);
+ __emitCode(lastclss);
+ __emitCode(clss);
+
+ if((__modifierFlags[0] & __CASE_INSENSITIVE) != 0 &&
+ Character.isUpperCase(clss) && Character.isUpperCase(lastclss)){
+ __programSize-=2;
+ __emitCode(Character.toLowerCase(lastclss));
+ __emitCode(Character.toLowerCase(clss));
+ }
- }
- lastclss = Character.MAX_VALUE;
- range = false;
- }
+ lastclss = Character.MAX_VALUE;
+ range = false;
+ }
- lastclss = clss;
- }
+ lastclss = clss;
+ }
if(__input._getValue() != ']')
throw new MalformedPatternException("Unmatched [] in expression.");
@@ -1285,11 +1288,13 @@
/**
- * parse POSIX exression like [:foo:].
+ * Parse POSIX epxression like [:foo:].
*
* @return OpCode. return 0 when fail parsing POSIX expression.
*/
- private char __parsePOSIX(boolean negFlag[]) throws MalformedPatternException {
+ private char __parsePOSIX(boolean negFlag[])
+ throws MalformedPatternException
+ {
int offset = __input._getOffset();
int len = __input._getLength();
int pos = offset;
@@ -1325,7 +1330,6 @@
return 0;
__input._setOffset(pos);
-// System.out.println("posix="+buf.toString()+":"+((Character)opcode).charValue());
return ((Character)opcode).charValue();
}
1.5 +34 -34 jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Debug.java
Index: Perl5Debug.java
===================================================================
RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Debug.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- Perl5Debug.java 2001/03/29 16:33:17 1.4
+++ Perl5Debug.java 2001/03/30 05:39:02 1.5
@@ -68,7 +68,7 @@
* comparison with the program generated by Perl5 with the -r option.
@author <a href="mailto:dfs@savarese.org">Daniel F. Savarese</a>
- @version $Id: Perl5Debug.java,v 1.4 2001/03/29 16:33:17 dfs Exp $
+ @version $Id: Perl5Debug.java,v 1.5 2001/03/30 05:39:02 dfs Exp $
* @see Perl5Pattern
*/
@@ -170,15 +170,15 @@
buffer.append(":");
switch(program[offset]) {
- case OpCode._BOL : str = "BOL"; break;
- case OpCode._MBOL : str = "MBOL"; break;
- case OpCode._SBOL : str = "SBOL"; break;
- case OpCode._EOL : str = "EOL"; break;
- case OpCode._MEOL : str = "MEOL"; break;
- case OpCode._ANY : str = "ANY"; break;
- case OpCode._SANY : str = "SANY"; break;
- case OpCode._ANYOF : str = "ANYOF"; break;
- case OpCode._ANYOFUN : str = "ANYOFUN"; break;
+ case OpCode._BOL : str = "BOL"; break;
+ case OpCode._MBOL : str = "MBOL"; break;
+ case OpCode._SBOL : str = "SBOL"; break;
+ case OpCode._EOL : str = "EOL"; break;
+ case OpCode._MEOL : str = "MEOL"; break;
+ case OpCode._ANY : str = "ANY"; break;
+ case OpCode._SANY : str = "SANY"; break;
+ case OpCode._ANYOF : str = "ANYOF"; break;
+ case OpCode._ANYOFUN : str = "ANYOFUN"; break;
case OpCode._NANYOFUN : str = "NANYOFUN"; break;
/*
case OpCode._ANYOF : // debug
@@ -194,30 +194,30 @@
buffer.append("\n\n");
break;
*/
- case OpCode._BRANCH: str = "BRANCH"; break;
- case OpCode._EXACTLY: str = "EXACTLY"; break;
- case OpCode._NOTHING: str = "NOTHING"; break;
- case OpCode._BACK : str = "BACK"; break;
- case OpCode._END : str = "END"; break;
- case OpCode._WORD : str = "WORD"; break;
- case OpCode._NWORD: str = "NWORD"; break;
- case OpCode._BOUND : str = "BOUND"; break;
- case OpCode._NBOUND: str = "NBOUND"; break;
- case OpCode._SPACE : str = "SPACE"; break;
- case OpCode._NSPACE: str = "NSPACE"; break;
- case OpCode._DIGIT : str = "DIGIT"; break;
- case OpCode._NDIGIT: str = "NDIGIT"; break;
- case OpCode._ALPHA : str = "ALPHA"; break;
- case OpCode._BLANK : str = "BLANK"; break;
- case OpCode._CNTRL : str = "CNTRL"; break;
- case OpCode._GRAPH : str = "GRAPH"; break;
- case OpCode._LOWER : str = "LOWER"; break;
- case OpCode._PRINT : str = "PRINT"; break;
- case OpCode._PUNCT : str = "PUNCT"; break;
- case OpCode._UPPER : str = "UPPER"; break;
- case OpCode._XDIGIT: str = "XDIGIT"; break;
- case OpCode._ALNUM : str = "ALNUM"; break;
- case OpCode._ASCII : str = "ASCII"; break;
+ case OpCode._BRANCH : str = "BRANCH"; break;
+ case OpCode._EXACTLY : str = "EXACTLY"; break;
+ case OpCode._NOTHING : str = "NOTHING"; break;
+ case OpCode._BACK : str = "BACK"; break;
+ case OpCode._END : str = "END"; break;
+ case OpCode._ALNUM : str = "ALNUM"; break;
+ case OpCode._NALNUM : str = "NALNUM"; break;
+ case OpCode._BOUND : str = "BOUND"; break;
+ case OpCode._NBOUND : str = "NBOUND"; break;
+ case OpCode._SPACE : str = "SPACE"; break;
+ case OpCode._NSPACE : str = "NSPACE"; break;
+ case OpCode._DIGIT : str = "DIGIT"; break;
+ case OpCode._NDIGIT : str = "NDIGIT"; break;
+ case OpCode._ALPHA : str = "ALPHA"; break;
+ case OpCode._BLANK : str = "BLANK"; break;
+ case OpCode._CNTRL : str = "CNTRL"; break;
+ case OpCode._GRAPH : str = "GRAPH"; break;
+ case OpCode._LOWER : str = "LOWER"; break;
+ case OpCode._PRINT : str = "PRINT"; break;
+ case OpCode._PUNCT : str = "PUNCT"; break;
+ case OpCode._UPPER : str = "UPPER"; break;
+ case OpCode._XDIGIT : str = "XDIGIT"; break;
+ case OpCode._ALNUMC : str = "ALNUMC"; break;
+ case OpCode._ASCII : str = "ASCII"; break;
case OpCode._CURLY :
buffer.append("CURLY {");
buffer.append((int)OpCode._getArg1(program, offset));
1.11 +10 -10 jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Matcher.java
Index: Perl5Matcher.java
===================================================================
RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/regex/Perl5Matcher.java,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- Perl5Matcher.java 2001/03/29 16:33:18 1.10
+++ Perl5Matcher.java 2001/03/30 05:39:02 1.11
@@ -66,7 +66,7 @@
* Perl5Compiler.
@author <a href="mailto:dfs@savarese.org">Daniel F. Savarese</a>
- @version $Id: Perl5Matcher.java,v 1.10 2001/03/29 16:33:18 dfs Exp $
+ @version $Id: Perl5Matcher.java,v 1.11 2001/03/30 05:39:02 dfs Exp $
* @see PatternMatcher
* @see Perl5Compiler
@@ -512,7 +512,7 @@
}
break;
- case OpCode._WORD:
+ case OpCode._ALNUM:
while(__currentOffset < endOffset) {
ch = __input[__currentOffset];
if(OpCode._isWordCharacter(ch)) {
@@ -527,7 +527,7 @@
}
break;
- case OpCode._NWORD:
+ case OpCode._NALNUM:
while(__currentOffset < endOffset) {
ch = __input[__currentOffset];
if(!OpCode._isWordCharacter(ch)) {
@@ -648,13 +648,13 @@
offset++;
switch ( __program[offset++] ) {
- case OpCode._ALNUM:
+ case OpCode._ALNUMC:
if(Character.isLetterOrDigit(code)) return isANYOF;
break;
- case OpCode._WORD:
+ case OpCode._ALNUM:
if(OpCode._isWordCharacter(code)) return isANYOF;
break;
- case OpCode._NWORD:
+ case OpCode._NALNUM:
if(!OpCode._isWordCharacter(code)) return isANYOF;
break;
case OpCode._SPACE:
@@ -793,12 +793,12 @@
}
break;
- case OpCode._WORD:
+ case OpCode._ALNUM:
while(scan < eol && OpCode._isWordCharacter(__input[scan]))
++scan;
break;
- case OpCode._NWORD:
+ case OpCode._NALNUM:
while(scan < eol && !OpCode._isWordCharacter(__input[scan]))
++scan;
break;
@@ -961,7 +961,7 @@
nextChar = (inputRemains ? __input[input] : __EOS);
break;
- case OpCode._WORD:
+ case OpCode._ALNUM:
if(!inputRemains)
return false;
if(!OpCode._isWordCharacter(nextChar))
@@ -970,7 +970,7 @@
nextChar = (inputRemains ? __input[input] : __EOS);
break;
- case OpCode._NWORD:
+ case OpCode._NALNUM:
if(!inputRemains && input >= __eol)
return false;
if(OpCode._isWordCharacter(nextChar))
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic