Bug 1395 – Regex does not support range [something to \*]
Status
RESOLVED
Resolution
INVALID
Severity
normal
Priority
P2
Component
phobos
Product
D
Version
D1 (retired)
Platform
All
OS
All
Creation time
2007-08-02T03:13:00Z
Last change time
2014-02-17T22:51:26Z
Assigned to
andrei
Creator
alan
Comments
Comment #0 by alan — 2007-08-02T03:13:01Z
test case:
r = new Regex("^(#)?([\w-\*]+)");
Error: inverted range in character class w > * in ^(#)?([\w-\*]+)
(error expanded a bit here..)
the problem appears to be the '*' char as a [...-*]
Along with having \w as the start char (which does resolve well in the check in regex.)
This is a fix for parseRange() that seems to work.
for (;;)
{
int lastStart = 0;
if (p == pattern.length)
goto Lerr;
switch (pattern[p])
{
case ']':
switch (rs)
{
case RS.dash:
r.setbit2('-');
case RS.rliteral:
r.setbit2(c);
break;
case RS.start:
break;
default:
assert(0);
}
p++;
break;
case '\\':
p++;
r.setbitmax(cmax);
if (p == pattern.length)
goto Lerr;
switch (pattern[p])
{
case 'd':
for (i = '0'; i <= '9'; i++)
r.bits[i] = 1;
lastStart = '0';
goto Lrs;
case 'D':
for (i = 1; i < '0'; i++)
r.bits[i] = 1;
for (i = '9' + 1; i <= cmax; i++)
r.bits[i] = 1;
lastStart = 1;
goto Lrs;
case 's':
lastStart = -1;
for (i = 0; i <= cmax; i++)
if (isspace(i)) {
r.bits[i] = 1;
lastStart = lastStart > -1 ? lastStart : i;
}
goto Lrs;
case 'S':
lastStart = -1;
for (i = 1; i <= cmax; i++)
if (!isspace(i)) {
r.bits[i] = 1;
lastStart = lastStart > -1 ? lastStart : i;
}
goto Lrs;
case 'w':
lastStart = -1;
for (i = 1; i <= cmax; i++)
if (isword(i)) {
r.bits[i] = 1;
lastStart = lastStart > -1 ? lastStart : i;
}
goto Lrs;
case 'W':
for (i = 1; i <= cmax; i++)
if (!isword(i)) {
r.bits[i] = 1;
lastStart = lastStart > -1 ? lastStart : i;
}
goto Lrs;
Lrs:
switch (rs)
{
case RS.dash:
r.setbit2('-');
case RS.rliteral:
r.setbit2(c);
break;
default:
break;
}
rs = RS.start;
continue;
default:
break;
}
c2 = escape();
goto Lrange;
case '-':
p++;
if (rs == RS.start)
goto Lrange;
else if (rs == RS.rliteral)
rs = RS.dash;
else if (rs == RS.dash)
{
r.setbit2(c);
r.setbit2('-');
rs = RS.start;
}
continue;
default:
lastStart = c;
c2 = pattern[p];
p++;
Lrange:
switch (rs)
{
case RS.rliteral:
r.setbit2(c);
case RS.start:
c = c2;
rs = RS.rliteral;
break;
case RS.dash:
if (c2 == '*') { // [a-\*]
for (i = lastStart; i <= cmax; i++)
r.bits[i] = 1;
r.setbitmax(cmax);
rs = RS.start;
break;
} else if (c > c2) {
error("inverted range in character class "
~ cast(char)c ~ " > " ~ cast(char)c2 ~ " in " ~ pattern ~ " \n" );
return 0;
}
r.setbitmax(c2);
//printf("c = %x, c2 = %x\n",c,c2);
for (; c <= c2; c++)
r.bits[c] = 1;
rs = RS.start;
break;
default:
assert(0);
}
continue;
}
break;
}
Comment #1 by alan — 2007-08-03T00:05:46Z
After looking into this in more detail - the fix is not valid.
The two regex's that are causing me problems are:
[\w-\.]
and
[\w-\*]
I think the parser needs to intepret that as
[\w\*-]
[\w\.-]