mirror of git://gcc.gnu.org/git/gcc.git
re PR libgcj/20504 (java.util.regex implementation doesn't support quoting constructs)
2005-05-16 Ziga Mahkovec <ziga.mahkovec@klika.si> PR libgcj/20504 gnu/regexp/RE.java: Add support for quoting constructs. From-SVN: r99789
This commit is contained in:
parent
bbf6f1cf02
commit
4f751533e4
|
@ -1,3 +1,8 @@
|
||||||
|
2005-05-16 Ziga Mahkovec <ziga.mahkovec@klika.si>
|
||||||
|
|
||||||
|
PR libgcj/20504
|
||||||
|
gnu/regexp/RE.java: Add support for quoting constructs.
|
||||||
|
|
||||||
2005-05-15 Tom Tromey <tromey@redhat.com>
|
2005-05-15 Tom Tromey <tromey@redhat.com>
|
||||||
|
|
||||||
* java/lang/String.java (startsWith): Fixed javadoc.
|
* java/lang/String.java (startsWith): Fixed javadoc.
|
||||||
|
|
|
@ -331,10 +331,22 @@ public class RE extends REToken {
|
||||||
// Buffer a token so we can create a TokenRepeated, etc.
|
// Buffer a token so we can create a TokenRepeated, etc.
|
||||||
REToken currentToken = null;
|
REToken currentToken = null;
|
||||||
char ch;
|
char ch;
|
||||||
|
boolean quot = false;
|
||||||
|
|
||||||
while (index < pLength) {
|
while (index < pLength) {
|
||||||
// read the next character unit (including backslash escapes)
|
// read the next character unit (including backslash escapes)
|
||||||
index = getCharUnit(pattern,index,unit);
|
index = getCharUnit(pattern,index,unit,quot);
|
||||||
|
|
||||||
|
if (unit.bk)
|
||||||
|
if (unit.ch == 'Q') {
|
||||||
|
quot = true;
|
||||||
|
continue;
|
||||||
|
} else if (unit.ch == 'E') {
|
||||||
|
quot = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (quot)
|
||||||
|
unit.bk = false;
|
||||||
|
|
||||||
// ALTERNATION OPERATOR
|
// ALTERNATION OPERATOR
|
||||||
// \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
|
// \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
|
||||||
|
@ -342,8 +354,8 @@ public class RE extends REToken {
|
||||||
|
|
||||||
// TODO: the '\n' literal here should be a test against REToken.newline,
|
// TODO: the '\n' literal here should be a test against REToken.newline,
|
||||||
// which unfortunately may be more than a single character.
|
// which unfortunately may be more than a single character.
|
||||||
if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ unit.bk))
|
if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ (unit.bk || quot)))
|
||||||
|| (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !unit.bk) )
|
|| (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !(unit.bk || quot)) )
|
||||||
&& !syntax.get(RESyntax.RE_LIMITED_OPS)) {
|
&& !syntax.get(RESyntax.RE_LIMITED_OPS)) {
|
||||||
// make everything up to here be a branch. create vector if nec.
|
// make everything up to here be a branch. create vector if nec.
|
||||||
addToken(currentToken);
|
addToken(currentToken);
|
||||||
|
@ -363,7 +375,7 @@ public class RE extends REToken {
|
||||||
// OPEN QUESTION:
|
// OPEN QUESTION:
|
||||||
// what is proper interpretation of '{' at start of string?
|
// what is proper interpretation of '{' at start of string?
|
||||||
|
|
||||||
else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)) {
|
else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) {
|
||||||
int newIndex = getMinMax(pattern,index,minMax,syntax);
|
int newIndex = getMinMax(pattern,index,minMax,syntax);
|
||||||
if (newIndex > index) {
|
if (newIndex > index) {
|
||||||
if (minMax.first > minMax.second)
|
if (minMax.first > minMax.second)
|
||||||
|
@ -388,7 +400,7 @@ public class RE extends REToken {
|
||||||
// LIST OPERATOR:
|
// LIST OPERATOR:
|
||||||
// [...] | [^...]
|
// [...] | [^...]
|
||||||
|
|
||||||
else if ((unit.ch == '[') && !unit.bk) {
|
else if ((unit.ch == '[') && !(unit.bk || quot)) {
|
||||||
Vector options = new Vector();
|
Vector options = new Vector();
|
||||||
boolean negative = false;
|
boolean negative = false;
|
||||||
char lastChar = 0;
|
char lastChar = 0;
|
||||||
|
@ -490,7 +502,7 @@ public class RE extends REToken {
|
||||||
// SUBEXPRESSIONS
|
// SUBEXPRESSIONS
|
||||||
// (...) | \(...\) depending on RE_NO_BK_PARENS
|
// (...) | \(...\) depending on RE_NO_BK_PARENS
|
||||||
|
|
||||||
else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) {
|
else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) {
|
||||||
boolean pure = false;
|
boolean pure = false;
|
||||||
boolean comment = false;
|
boolean comment = false;
|
||||||
boolean lookAhead = false;
|
boolean lookAhead = false;
|
||||||
|
@ -537,13 +549,13 @@ public class RE extends REToken {
|
||||||
int nextIndex = index;
|
int nextIndex = index;
|
||||||
int nested = 0;
|
int nested = 0;
|
||||||
|
|
||||||
while ( ((nextIndex = getCharUnit(pattern,endIndex,unit)) > 0)
|
while ( ((nextIndex = getCharUnit(pattern,endIndex,unit,false)) > 0)
|
||||||
&& !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) )
|
&& !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) )
|
||||||
if ((endIndex = nextIndex) >= pLength)
|
if ((endIndex = nextIndex) >= pLength)
|
||||||
throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
|
throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
|
||||||
else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
|
else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
|
||||||
nested++;
|
nested++;
|
||||||
else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
|
else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
|
||||||
nested--;
|
nested--;
|
||||||
|
|
||||||
// endIndex is now position at a ')','\)'
|
// endIndex is now position at a ')','\)'
|
||||||
|
@ -572,14 +584,14 @@ public class RE extends REToken {
|
||||||
// UNMATCHED RIGHT PAREN
|
// UNMATCHED RIGHT PAREN
|
||||||
// ) or \) throw exception if
|
// ) or \) throw exception if
|
||||||
// !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)
|
// !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)
|
||||||
else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))) {
|
else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) {
|
||||||
throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
|
throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
|
||||||
}
|
}
|
||||||
|
|
||||||
// START OF LINE OPERATOR
|
// START OF LINE OPERATOR
|
||||||
// ^
|
// ^
|
||||||
|
|
||||||
else if ((unit.ch == '^') && !unit.bk) {
|
else if ((unit.ch == '^') && !(unit.bk || quot)) {
|
||||||
addToken(currentToken);
|
addToken(currentToken);
|
||||||
currentToken = null;
|
currentToken = null;
|
||||||
addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
|
addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
|
||||||
|
@ -588,7 +600,7 @@ public class RE extends REToken {
|
||||||
// END OF LINE OPERATOR
|
// END OF LINE OPERATOR
|
||||||
// $
|
// $
|
||||||
|
|
||||||
else if ((unit.ch == '$') && !unit.bk) {
|
else if ((unit.ch == '$') && !(unit.bk || quot)) {
|
||||||
addToken(currentToken);
|
addToken(currentToken);
|
||||||
currentToken = null;
|
currentToken = null;
|
||||||
addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
|
addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
|
||||||
|
@ -597,7 +609,7 @@ public class RE extends REToken {
|
||||||
// MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)
|
// MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)
|
||||||
// .
|
// .
|
||||||
|
|
||||||
else if ((unit.ch == '.') && !unit.bk) {
|
else if ((unit.ch == '.') && !(unit.bk || quot)) {
|
||||||
addToken(currentToken);
|
addToken(currentToken);
|
||||||
currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
|
currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
|
||||||
}
|
}
|
||||||
|
@ -605,7 +617,7 @@ public class RE extends REToken {
|
||||||
// ZERO-OR-MORE REPEAT OPERATOR
|
// ZERO-OR-MORE REPEAT OPERATOR
|
||||||
// *
|
// *
|
||||||
|
|
||||||
else if ((unit.ch == '*') && !unit.bk) {
|
else if ((unit.ch == '*') && !(unit.bk || quot)) {
|
||||||
if (currentToken == null)
|
if (currentToken == null)
|
||||||
throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
|
throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
|
||||||
if (currentToken instanceof RETokenRepeated)
|
if (currentToken instanceof RETokenRepeated)
|
||||||
|
@ -621,7 +633,7 @@ public class RE extends REToken {
|
||||||
// + | \+ depending on RE_BK_PLUS_QM
|
// + | \+ depending on RE_BK_PLUS_QM
|
||||||
// not available if RE_LIMITED_OPS is set
|
// not available if RE_LIMITED_OPS is set
|
||||||
|
|
||||||
else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
|
else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
|
||||||
if (currentToken == null)
|
if (currentToken == null)
|
||||||
throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
|
throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
|
||||||
if (currentToken instanceof RETokenRepeated)
|
if (currentToken instanceof RETokenRepeated)
|
||||||
|
@ -638,7 +650,7 @@ public class RE extends REToken {
|
||||||
// not available if RE_LIMITED_OPS is set
|
// not available if RE_LIMITED_OPS is set
|
||||||
// stingy matching if RE_STINGY_OPS is set and it follows a quantifier
|
// stingy matching if RE_STINGY_OPS is set and it follows a quantifier
|
||||||
|
|
||||||
else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
|
else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
|
||||||
if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
|
if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
|
||||||
|
|
||||||
// Check for stingy matching on RETokenRepeated
|
// Check for stingy matching on RETokenRepeated
|
||||||
|
@ -805,9 +817,9 @@ public class RE extends REToken {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int getCharUnit(char[] input, int index, CharUnit unit) throws REException {
|
private static int getCharUnit(char[] input, int index, CharUnit unit, boolean quot) throws REException {
|
||||||
unit.ch = input[index++];
|
unit.ch = input[index++];
|
||||||
if (unit.bk = (unit.ch == '\\'))
|
if (unit.bk = (unit.ch == '\\' && (!quot || index >= input.length || input[index] == 'E')))
|
||||||
if (index < input.length)
|
if (index < input.length)
|
||||||
unit.ch = input[index++];
|
unit.ch = input[index++];
|
||||||
else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
|
else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
|
||||||
|
@ -1281,7 +1293,7 @@ public class RE extends REToken {
|
||||||
|
|
||||||
// Read string of digits
|
// Read string of digits
|
||||||
do {
|
do {
|
||||||
index = getCharUnit(input,index,unit);
|
index = getCharUnit(input,index,unit,false);
|
||||||
if (Character.isDigit(unit.ch))
|
if (Character.isDigit(unit.ch))
|
||||||
buf.append(unit.ch);
|
buf.append(unit.ch);
|
||||||
} while ((index != input.length) && Character.isDigit(unit.ch));
|
} while ((index != input.length) && Character.isDigit(unit.ch));
|
||||||
|
@ -1306,7 +1318,7 @@ public class RE extends REToken {
|
||||||
else if ((unit.ch == ',') && !unit.bk) {
|
else if ((unit.ch == ',') && !unit.bk) {
|
||||||
buf = new StringBuffer();
|
buf = new StringBuffer();
|
||||||
// Read string of digits
|
// Read string of digits
|
||||||
while (((index = getCharUnit(input,index,unit)) != input.length) && Character.isDigit(unit.ch))
|
while (((index = getCharUnit(input,index,unit,false)) != input.length) && Character.isDigit(unit.ch))
|
||||||
buf.append(unit.ch);
|
buf.append(unit.ch);
|
||||||
|
|
||||||
if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)))
|
if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)))
|
||||||
|
|
Loading…
Reference in New Issue