Bug 1394 – Regex does not support forgetfull matching (?:.......

Status
RESOLVED
Resolution
DUPLICATE
Severity
normal
Priority
P2
Component
phobos
Product
D
Version
D1 (retired)
Platform
All
OS
All
Creation time
2007-08-01T23:56:00Z
Last change time
2014-02-17T22:50:31Z
Keywords
patch
Assigned to
nobody
Creator
alan

Comments

Comment #0 by alan — 2007-08-01T23:56:06Z
Excuse the slight CS issues - but this fixes it. testcase: auto g = new Regex("/^(?:br|frame|hr|img|input|link|meta|range|spacer|wbr|area|param|col)$/i;"); --- dmd/src/phobos/std/regexp.d 2007-02-27 18:45:28.000000000 +0800 +++ /usr/src/dmd/src/phobos/std/regexp.d 2007-08-02 12:47:16.000000000 +0800 @@ -747,6 +747,7 @@ REbol, // beginning of line REeol, // end of line REparen, // parenthesized subexpression + REparenforget, // forgetfull parenthesized subexpression (?: REgoto, // goto offset REwordboundary, @@ -1372,11 +1397,13 @@ break; case REparen: + case REparenforget: + // len, n, () puint = cast(uint *)&prog[pc + 1]; len = puint[0]; n = puint[1]; - printf("\tREparen len=%d n=%d, pc=>%d\n", len, n, pc + 1 + uint.sizeof * 2 + len); + printf("\t%s len=%d n=%d, pc=>%d\n", (prog[pc] == REparen ? "REparen" : "REparenforget") , len, n, pc + 1 + uint.sizeof * 2 + len); pc += 1 + uint.sizeof * 2; break; @@ -1835,6 +1862,7 @@ pc = pop + len; break; + case REparenforget: case REparen: // len, () debug(regexp) printf("\tREparen\n"); @@ -1845,8 +1873,10 @@ ss = src; if (!trymatch(pop, pop + len)) goto Lnomatch; + if (program[pc] == REparen) { pmatch[n + 1].rm_so = ss; pmatch[n + 1].rm_eo = src; + } pc = pop + len; break; @@ -2150,16 +2180,25 @@ case '(': p++; + + if ( + ((p + 2) < pattern.length) && + (pattern[p] == '?') && + (pattern[p+1] == ':') + ) { + p+=2; + buf.write(REparenforget); + } else { buf.write(REparen); + } offset = buf.offset; buf.write(cast(uint)0); // reserve space for length - buf.write(re_nsub); + buf.write(re_nsub); // match number.. re_nsub++; - parseRegexp(); + parseRegexp(); // get all the sub pieces... *cast(uint *)&buf.data[offset] = buf.offset - (offset + uint.sizeof * 2); - if (p == pattern.length || pattern[p] != ')') - { + if (p == pattern.length || pattern[p] != ')') { error("')' expected"); return 0; } @@ -2738,6 +2777,7 @@ case REnm: case REnmq: case REparen: + case REparenforget: case REgoto: { auto bitbuf = new OutBuffer; @@ -2880,6 +2920,7 @@ i += 1 + uint.sizeof * 3 + len; break; + case REparenforget: case REparen: // len, () len = (cast(uint *)&prog[i + 1])[0];
Comment #1 by Jesse.K.Phillips+D — 2010-06-02T10:59:39Z
Adding the patch keyword as the original comment contained one.
Comment #2 by dmitry.olsh — 2010-11-08T06:19:48Z
The same thing and less fussy patch for D2 and std.regex: http://d.puremagic.com/issues/show_bug.cgi?id=5169 works with aforementioned test case: auto g = regex("^(?:br|frame|hr|img|input|link|meta|range|spacer|wbr|area|param|col)$","i"); I've got the same patch for std.regexp, but I'm not a patch master, I'd like this one verifed first.
Comment #3 by dmitry.olsh — 2011-06-06T02:17:15Z
*** This issue has been marked as a duplicate of issue 5673 ***