third_party/re2/ucs2.diff - Issue 10575037: Include RE2 library

Side by Side Diff: third_party/re2/ucs2.diff

Issue 10575037: Include RE2 library (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Less intrusive fix for Android Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 This is a dump from Google's source control system of the change

	2 that removed UCS-2 support from RE2. As the explanation below

	3 says, UCS-2 mode is fundamentally at odds with things like ^ and $,

	4 so it never really worked very well. But if you are interested in using

	5 it without those operators, it did work for that. It assumed that the

	6 UCS-2 data was in the native host byte order.

	7

	8 If you are interested in adding UCS-2 mode back, this patch might

	9 be a good starting point.

	10

	11

	12 Change 12780686 by rsc@rsc-re2 on 2009/09/16 15:30:15

	13

	14 Retire UCS-2 mode.

	15

	16 I added it as an experiment for V8, but it

	17 requires 2-byte lookahead to do completely,

	18 and RE2 has 1-byte lookahead (enough for UTF-8)

	19 as a fairly deep fundamental assumption,

	20 so it did not support ^ or $.

	21

	22 ==== re2/bitstate.cc#2 - re2/bitstate.cc#3 ====

	23 re2/bitstate.cc#2:314,321 - re2/bitstate.cc#3:314,319

	24 cap_[0] = p;

	25 if (TrySearch(prog_->start(), p)) // Match must be leftmost; done.

	26 return true;

	27 - if (prog_->flags() & Regexp::UCS2)

	28 - p++;

	29 }

	30 return false;

	31 }

	32 ==== re2/compile.cc#17 - re2/compile.cc#18 ====

	33 re2/compile.cc#17:95,101 - re2/compile.cc#18:95,100

	34 // Input encodings.

	35 enum Encoding {

	36 kEncodingUTF8 = 1, // UTF-8 (0-10FFFF)

	37 - kEncodingUCS2, // UCS-2 (0-FFFF), native byte order

	38 kEncodingLatin1, // Latin1 (0-FF)

	39 };

	40

	41 re2/compile.cc#17:168,176 - re2/compile.cc#18:167,172

	42 void AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase);

	43 void AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase);

	44 void Add_80_10ffff();

	45 - void AddRuneRangeUCS2(Rune lo, Rune hi, bool foldcase);

	46 - void AddUCS2Pair(uint8 lo1, uint8 hi1, bool fold1,

	47 - uint8 lo2, uint8 hi2, bool fold2);

	48

	49 // New suffix that matches the byte range lo-hi, then goes to next.

	50 Inst* RuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, Inst* next);

	51 re2/compile.cc#17:475,481 - re2/compile.cc#18:471,477

	52

	53 // Converts rune range lo-hi into a fragment that recognizes

	54 // the bytes that would make up those runes in the current

	55 - // encoding (Latin 1, UTF-8, or UCS-2).

	56 + // encoding (Latin 1 or UTF-8).

	57 // This lets the machine work byte-by-byte even when

	58 // using multibyte encodings.

	59

	60 re2/compile.cc#17:488,496 - re2/compile.cc#18:484,489

	61 case kEncodingLatin1:

	62 AddRuneRangeLatin1(lo, hi, foldcase);

	63 break;

	64 - case kEncodingUCS2:

	65 - AddRuneRangeUCS2(lo, hi, foldcase);

	66 - break;

	67 }

	68 }

	69

	70 re2/compile.cc#17:503,581 - re2/compile.cc#18:496,501

	71 AddSuffix(RuneByteSuffix(lo, hi, foldcase, NULL));

	72 }

	73

	74 - // Test whether 16-bit values are big or little endian.

	75 - static bool BigEndian() {

	76 - union {

	77 - char byte[2];

	78 - int16 endian;

	79 - } u;

	80 -

	81 - u.byte[0] = 1;

	82 - u.byte[1] = 2;

	83 - return u.endian == 0x0102;

	84 - }

	85 -

	86 - void Compiler::AddUCS2Pair(uint8 lo1, uint8 hi1, bool fold1,

	87 - uint8 lo2, uint8 hi2, bool fold2) {

	88 - Inst* ip;

	89 - if (reversed_) {

	90 - ip = RuneByteSuffix(lo1, hi1, fold1, NULL);

	91 - ip = RuneByteSuffix(lo2, hi2, fold2, ip);

	92 - } else {

	93 - ip = RuneByteSuffix(lo2, hi2, fold2, NULL);

	94 - ip = RuneByteSuffix(lo1, hi1, fold1, ip);

	95 - }

	96 - AddSuffix(ip);

	97 - }

	98 -

	99 - void Compiler::AddRuneRangeUCS2(Rune lo, Rune hi, bool foldcase) {

	100 - if (lo > hi \|\| lo > 0xFFFF)

	101 - return;

	102 - if (hi > 0xFFFF)

	103 - hi = 0xFFFF;

	104 -

	105 - // We'll assemble a pattern assuming big endian.

	106 - // If the machine isn't, tell Cat to reverse its arguments.

	107 - bool oldreversed = reversed_;

	108 - if (!BigEndian()) {

	109 - reversed_ = !oldreversed;

	110 - }

	111 -

	112 - // Split into bytes.

	113 - int lo1 = lo >> 8;

	114 - int lo2 = lo & 0xFF;

	115 - int hi1 = hi >> 8;

	116 - int hi2 = hi & 0xFF;

	117 -

	118 - if (lo1 == hi1) {

	119 - // Easy case: high bits are same in both.

	120 - // Only do ASCII case folding on the second byte if the top byte is 00.

	121 - AddUCS2Pair(lo1, lo1, false, lo2, hi2, lo1==0 && foldcase);

	122 - } else {

	123 - // Harder case: different second byte ranges depending on first byte.

	124 -

	125 - // Initial fragment.

	126 - if (lo2 > 0) {

	127 - AddUCS2Pair(lo1, lo1, false, lo2, 0xFF, lo1==0 && foldcase);

	128 - lo1++;

	129 - }

	130 -

	131 - // Trailing fragment.

	132 - if (hi2 < 0xFF) {

	133 - AddUCS2Pair(hi1, hi1, false, 0, hi2, false);

	134 - hi1--;

	135 - }

	136 -

	137 - // Inner ranges.

	138 - if (lo1 <= hi1) {

	139 - AddUCS2Pair(lo1, hi1, false, 0, 0xFF, false);

	140 - }

	141 - }

	142 -

	143 - // Restore reverse setting.

	144 - reversed_ = oldreversed;

	145 - }

	146 -

	147 // Table describing how to make a UTF-8 matching machine

	148 // for the rune range 80-10FFFF (Runeself-Runemax).

	149 // This range happens frequently enough (for example /./ and /[^a-z]/)

	150 re2/compile.cc#17:707,716 - re2/compile.cc#18:627,634

	151

	152 Frag Compiler::Literal(Rune r, bool foldcase) {

	153 switch (encoding_) {

	154 - default: // UCS-2 or something new

	155 - BeginRange();

	156 - AddRuneRange(r, r, foldcase);

	157 - return EndRange();

	158 + default:

	159 + return kNullFrag;

	160

	161 case kEncodingLatin1:

	162 return ByteRange(r, r, foldcase);

	163 re2/compile.cc#17:927,934 - re2/compile.cc#18:845,850

	164

	165 if (re->parse_flags() & Regexp::Latin1)

	166 c.encoding_ = kEncodingLatin1;

	167 - else if (re->parse_flags() & Regexp::UCS2)

	168 - c.encoding_ = kEncodingUCS2;

	169 c.reversed_ = reversed;

	170 if (max_mem <= 0) {

	171 c.max_inst_ = 100000; // more than enough

	172 re2/compile.cc#17:983,993 - re2/compile.cc#18:899,905

	173 c.prog_->set_start_unanchored(c.prog_->start());

	174 } else {

	175 Frag dot;

	176 - if (c.encoding_ == kEncodingUCS2) {

	177 - dot = c.Cat(c.ByteRange(0x00, 0xFF, false), c.ByteRange(0x00, 0xFF, fals e));

	178 - } else {

	179 - dot = c.ByteRange(0x00, 0xFF, false);

	180 - }

	181 + dot = c.ByteRange(0x00, 0xFF, false);

	182 Frag dotloop = c.Star(dot, true);

	183 Frag unanchored = c.Cat(dotloop, all);

	184 c.prog_->set_start_unanchored(unanchored.begin);

	185 ==== re2/nfa.cc#8 - re2/nfa.cc#9 ====

	186 re2/nfa.cc#8:426,432 - re2/nfa.cc#9:426,431

	187 const char* bp = context.begin();

	188 int c = -1;

	189 int wasword = 0;

	190 - bool ucs2 = prog_->flags() & Regexp::UCS2;

	191

	192 if (text.begin() > context.begin()) {

	193 c = text.begin()[-1] & 0xFF;

	194 re2/nfa.cc#8:492,498 - re2/nfa.cc#9:491,497

	195 // If there's a required first byte for an unanchored search

	196 // and we're not in the middle of any possible matches,

	197 // use memchr to search for the byte quickly.

	198 - if (!ucs2 && !anchored && first_byte_ >= 0 && runq->size() == 0 &&

	199 + if (!anchored && first_byte_ >= 0 && runq->size() == 0 &&

	200 p < text.end() && (p[0] & 0xFF) != first_byte_) {

	201 p = reinterpret_cast<const char*>(memchr(p, first_byte_,

	202 text.end() - p));

	203 re2/nfa.cc#8:505,526 - re2/nfa.cc#9:504,514

	204 flag = Prog::EmptyFlags(context, p);

	205 }

	206

	207 - // In UCS-2 mode, if we need to start a new thread,

	208 - // make sure to do it on an even boundary.

	209 - if(ucs2 && runq->size() == 0 &&

	210 - (p - context.begin()) % 2 && p < text.end()) {

	211 - p++;

	212 - flag = Prog::EmptyFlags(context, p);

	213 - }

	214 -

	215 // Steal match storage (cleared but unused as of yet)

	216 // temporarily to hold match boundaries for new thread.

	217 - // In UCS-2 mode, only start the thread on a 2-byte boundary.

	218 - if(!ucs2 \|\| (p - context.begin()) % 2 == 0) {

	219 - match_[0] = p;

	220 - AddToThreadq(runq, start_, flag, p, match_);

	221 - match_[0] = NULL;

	222 - }

	223 + match_[0] = p;

	224 + AddToThreadq(runq, start_, flag, p, match_);

	225 + match_[0] = NULL;

	226 }

	227

	228 // If all the threads have died, stop early.

	229 ==== re2/parse.cc#22 - re2/parse.cc#23 ====

	230 re2/parse.cc#22:160,167 - re2/parse.cc#23:160,165

	231 status_(status), stacktop_(NULL), ncap_(0) {

	232 if (flags_ & Latin1)

	233 rune_max_ = 0xFF;

	234 - else if (flags & UCS2)

	235 - rune_max_ = 0xFFFF;

	236 else

	237 rune_max_ = Runemax;

	238 }

	239 re2/parse.cc#22:365,387 - re2/parse.cc#23:363,374

	240 bool Regexp::ParseState::PushCarat() {

	241 if (flags_ & OneLine) {

	242 return PushSimpleOp(kRegexpBeginText);

	243 - } else {

	244 - if (flags_ & UCS2) {

	245 - status_->set_code(kRegexpUnsupported);

	246 - status_->set_error_arg("multiline ^ in UCS-2 mode");

	247 - return false;

	248 - }

	249 - return PushSimpleOp(kRegexpBeginLine);

	250 }

	251 + return PushSimpleOp(kRegexpBeginLine);

	252 }

	253

	254 // Pushes a \b or \B onto the stack.

	255 bool Regexp::ParseState::PushWordBoundary(bool word) {

	256 - if (flags_ & UCS2) {

	257 - status_->set_code(kRegexpUnsupported);

	258 - status_->set_error_arg("\\b or \\B in UCS-2 mode");

	259 - return false;

	260 - }

	261 if (word)

	262 return PushSimpleOp(kRegexpWordBoundary);

	263 return PushSimpleOp(kRegexpNoWordBoundary);

	264 re2/parse.cc#22:397,407 - re2/parse.cc#23:384,389

	265 bool ret = PushSimpleOp(kRegexpEndText);

	266 flags_ = oflags;

	267 return ret;

	268 - }

	269 - if (flags_ & UCS2) {

	270 - status_->set_code(kRegexpUnsupported);

	271 - status_->set_error_arg("multiline $ in UCS-2 mode");

	272 - return false;

	273 }

	274 return PushSimpleOp(kRegexpEndLine);

	275 }

	276 ==== re2/re2.cc#34 - re2/re2.cc#35 ====

	277 re2/re2.cc#34:79,86 - re2/re2.cc#35:79,84

	278 return RE2::ErrorBadUTF8;

	279 case re2::kRegexpBadNamedCapture:

	280 return RE2::ErrorBadNamedCapture;

	281 - case re2::kRegexpUnsupported:

	282 - return RE2::ErrorUnsupported;

	283 }

	284 return RE2::ErrorInternal;

	285 }

	286 re2/re2.cc#34:122,130 - re2/re2.cc#35:120,125

	287 break;

	288 case RE2::Options::EncodingLatin1:

	289 flags \|= Regexp::Latin1;

	290 - break;

	291 - case RE2::Options::EncodingUCS2:

	292 - flags \|= Regexp::UCS2;

	293 break;

	294 }

	295

	296 ==== re2/re2.h#36 - re2/re2.h#37 ====

	297 re2/re2.h#36:246,252 - re2/re2.h#37:246,251

	298 ErrorBadUTF8, // invalid UTF-8 in regexp

	299 ErrorBadNamedCapture, // bad named capture group

	300 ErrorPatternTooLarge, // pattern too large (compile failed)

	301 - ErrorUnsupported, // unsupported feature (in UCS-2 mode)

	302 };

	303

	304 // Predefined common options.

	305 re2/re2.h#36:570,576 - re2/re2.h#37:569,574

	306

	307 enum Encoding {

	308 EncodingUTF8 = 1,

	309 - EncodingUCS2, // 16-bit Unicode 0-FFFF only

	310 EncodingLatin1

	311 };

	312

	313 ==== re2/regexp.cc#15 - re2/regexp.cc#16 ====

	314 re2/regexp.cc#15:324,333 - re2/regexp.cc#16:324,329

	315 // the regexp that remains after the prefix. The prefix might

	316 // be ASCII case-insensitive.

	317 bool Regexp::RequiredPrefix(string prefix, bool foldcase, Regexp** suffix) {

	318 - // Don't even bother for UCS-2; it's time to throw that code away.

	319 - if (parse_flags_ & UCS2)

	320 - return false;

	321 -

	322 // No need for a walker: the regexp must be of the form

	323 // 1. some number of ^ anchors

	324 // 2. a literal char or string

	325 ==== re2/regexp.h#20 - re2/regexp.h#21 ====

	326 re2/regexp.h#20:187,193 - re2/regexp.h#21:187,192

	327 kRegexpBadPerlOp, // bad perl operator

	328 kRegexpBadUTF8, // invalid UTF-8 in regexp

	329 kRegexpBadNamedCapture, // bad named capture

	330 - kRegexpUnsupported, // unsupported operator

	331 };

	332

	333 // Error status for certain operations.

	334 re2/regexp.h#20:307,316 - re2/regexp.h#21:306,314

	335 // \Q and \E to disable/enable metacharacters

	336 // (?P<name>expr) for named captures

	337 // \C to match any single byte

	338 - UCS2 = 1<<10, // Text is in UCS-2, regexp is in UTF-8.

	339 - UnicodeGroups = 1<<11, // Allow \p{Han} for Unicode Han group

	340 + UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group

	341 // and \P{Han} for its negation.

	342 - NeverNL = 1<<12, // Never match NL, even if the regexp mentions

	343 + NeverNL = 1<<11, // Never match NL, even if the regexp mentions

	344 // it explicitly.

	345

	346 // As close to Perl as we can get.

	347 ==== re2/testing/backtrack.cc#4 - re2/testing/backtrack.cc#5 ====

	348 re2/testing/backtrack.cc#4:134,141 - re2/testing/backtrack.cc#5:134,139

	349 cap_[0] = p;

	350 if (Visit(prog_->start(), p)) // Match must be leftmost; done.

	351 return true;

	352 - if (prog_->flags() & Regexp::UCS2)

	353 - p++;

	354 }

	355 return false;

	356 }

	357 ==== re2/testing/tester.cc#12 - re2/testing/tester.cc#13 ====

	358 re2/testing/tester.cc#12:144,154 - re2/testing/tester.cc#13:144,152

	359 static ParseMode parse_modes[] = {

	360 { single_line, "single-line" },

	361 { single_line\|Regexp::Latin1, "single-line, latin1" },

	362 - { single_line\|Regexp::UCS2, "single-line, ucs2" },

	363 { multi_line, "multiline" },

	364 { multi_line\|Regexp::NonGreedy, "multiline, nongreedy" },

	365 { multi_line\|Regexp::Latin1, "multiline, latin1" },

	366 - { multi_line\|Regexp::UCS2, "multiline, ucs2" },

	367 };

	368

	369 static string FormatMode(Regexp::ParseFlags flags) {

	370 re2/testing/tester.cc#12:179,189 - re2/testing/tester.cc#13:177,185

	371 RegexpStatus status;

	372 regexp_ = Regexp::Parse(regexp_str, flags, &status);

	373 if (regexp_ == NULL) {

	374 - if (status.code() != kRegexpUnsupported) {

	375 - LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)

	376 - << " mode: " << FormatMode(flags);

	377 - error_ = true;

	378 - }

	379 + LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)

	380 + << " mode: " << FormatMode(flags);

	381 + error_ = true;

	382 return;

	383 }

	384 prog_ = regexp_->CompileToProg(0);

	385 re2/testing/tester.cc#12:230,237 - re2/testing/tester.cc#13:226,231

	386 RE2::Options options;

	387 if (flags & Regexp::Latin1)

	388 options.set_encoding(RE2::Options::EncodingLatin1);

	389 - else if (flags & Regexp::UCS2)

	390 - options.set_encoding(RE2::Options::EncodingUCS2);

	391 if (kind_ == Prog::kLongestMatch)

	392 options.set_longest_match(true);

	393 re2_ = new RE2(re, options);

	394 re2/testing/tester.cc#12:281,379 - re2/testing/tester.cc#13:275,280

	395 delete re2_;

	396 }

	397

	398 - // Converts UTF-8 string in text into UCS-2 string in new_text.

	399 - static bool ConvertUTF8ToUCS2(const StringPiece& text, StringPiece* new_text) {

	400 - const char* p = text.begin();

	401 - const char* ep = text.end();

	402 - uint16* q = new uint16[ep - p];

	403 - uint16* q0 = q;

	404 -

	405 - int n;

	406 - Rune r;

	407 - for (; p < ep; p += n) {

	408 - if (!fullrune(p, ep - p)) {

	409 - delete[] q0;

	410 - return false;

	411 - }

	412 - n = chartorune(&r, p);

	413 - if (r > 0xFFFF) {

	414 - delete[] q0;

	415 - return false;

	416 - }

	417 - *q++ = r;

	418 - }

	419 - new_text = StringPiece(reinterpret_cast<char>(q0), 2*(q - q0));

	420 - return true;

	421 - }

	422 -

	423 - // Rewrites *sp from being a pointer into text8 (UTF-8)

	424 - // to being a pointer into text16 (equivalent text but in UCS-2).

	425 - static void AdjustUTF8ToUCS2(const StringPiece& text8, const StringPiece& text 16,

	426 - StringPiece *sp) {

	427 - if (sp->begin() == NULL && text8.begin() != NULL)

	428 - return;

	429 -

	430 - int nrune = 0;

	431 - int n;

	432 - Rune r;

	433 - const char* p = text8.begin();

	434 - const char* ep = text8.end();

	435 - const char* spbegin = NULL;

	436 - const char* spend = NULL;

	437 - for (;;) {

	438 - if (p == sp->begin())

	439 - spbegin = text16.begin() + sizeof(uint16)*nrune;

	440 - if (p == sp->end())

	441 - spend = text16.begin() + sizeof(uint16)*nrune;

	442 - if (p >= ep)

	443 - break;

	444 - n = chartorune(&r, p);

	445 - p += n;

	446 - nrune++;

	447 - }

	448 - if (spbegin == NULL \|\| spend == NULL) {

	449 - LOG(FATAL) << "Error in AdjustUTF8ToUCS2 "

	450 - << CEscape(text8) << " "

	451 - << (int)(sp->begin() - text8.begin()) << " "

	452 - << (int)(sp->end() - text8.begin());

	453 - }

	454 - *sp = StringPiece(spbegin, spend - spbegin);

	455 - }

	456 -

	457 - // Rewrites *sp from begin a pointer into text16 (UCS-2)

	458 - // to being a pointer into text8 (equivalent text but in UTF-8).

	459 - static void AdjustUCS2ToUTF8(const StringPiece& text16, const StringPiece& tex t8,

	460 - StringPiece* sp) {

	461 - if (sp->begin() == NULL)

	462 - return;

	463 -

	464 - int nrune = 0;

	465 - int n;

	466 - Rune r;

	467 - const char* p = text8.begin();

	468 - const char* ep = text8.end();

	469 - const char* spbegin = NULL;

	470 - const char* spend = NULL;

	471 - for (;;) {

	472 - if (nrune == (sp->begin() - text16.begin())/2)

	473 - spbegin = p;

	474 - if (nrune == (sp->end() - text16.begin())/2)

	475 - spend = p;

	476 - if (p >= ep)

	477 - break;

	478 - n = chartorune(&r, p);

	479 - p += n;

	480 - nrune++;

	481 - }

	482 - if (text8.begin() != NULL && (spbegin == NULL \|\| spend == NULL)) {

	483 - LOG(FATAL) << "Error in AdjustUCS2ToUTF8 "

	484 - << CEscape(text16) << " "

	485 - << (int)(sp->begin() - text16.begin()) << " "

	486 - << (int)(sp->end() - text16.begin());

	487 - }

	488 - *sp = StringPiece(spbegin, spend - spbegin);

	489 - }

	490 -

	491 // Runs a single search using the named engine type.

	492 // This interface hides all the irregularities of the various

	493 // engine interfaces from the rest of this file.

	494 re2/testing/tester.cc#12:393,411 - re2/testing/tester.cc#13:294,300

	495

	496 StringPiece text = orig_text;

	497 StringPiece context = orig_context;

	498 - bool ucs2 = false;

	499

	500 - if ((flags() & Regexp::UCS2) && type != kEnginePCRE) {

	501 - if (!ConvertUTF8ToUCS2(orig_context, &context)) {

	502 - result->skipped = true;

	503 - return;

	504 - }

	505 -

	506 - // Rewrite context to refer to new text.

	507 - AdjustUTF8ToUCS2(orig_context, context, &text);

	508 - ucs2 = true;

	509 - }

	510 -

	511 switch (type) {

	512 default:

	513 LOG(FATAL) << "Bad RunSearch type: " << (int)type;

	514 re2/testing/tester.cc#12:557,577 - re2/testing/tester.cc#13:446,451

	515 }

	516 }

	517

	518 - // If we did UCS-2 matching, rewrite the matches to refer

	519 - // to the original UTF-8 text.

	520 - if (ucs2) {

	521 - if (result->matched) {

	522 - if (result->have_submatch0) {

	523 - AdjustUCS2ToUTF8(context, orig_context, &result->submatch[0]);

	524 - } else if (result->have_submatch) {

	525 - for (int i = 0; i < nsubmatch; i++) {

	526 - AdjustUCS2ToUTF8(context, orig_context, &result->submatch[i]);

	527 - }

	528 - }

	529 - }

	530 - delete[] context.begin();

	531 - }

	532 -

	533 if (!result->matched)

	534 memset(result->submatch, 0, sizeof result->submatch);

	535 }

	536 re2/testing/tester.cc#12:596,617 - re2/testing/tester.cc#13:470,475

	537 return true;

	538 }

	539

	540 - // Check whether text uses only Unicode points <= 0xFFFF

	541 - // (in the BMP).

	542 - static bool IsBMP(const StringPiece& text) {

	543 - const char* p = text.begin();

	544 - const char* ep = text.end();

	545 - while (p < ep) {

	546 - if (!fullrune(p, ep - p))

	547 - return false;

	548 - Rune r;

	549 - p += chartorune(&r, p);

	550 - if (r > 0xFFFF)

	551 - return false;

	552 - }

	553 - return true;

	554 - }

	555 -

	556 // Runs a single test.

	557 bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context ,

	558 Prog::Anchor anchor) {

	559 re2/testing/tester.cc#12:619,625 - re2/testing/tester.cc#13:477,483

	560 Result correct;

	561 RunSearch(kEngineBacktrack, text, context, anchor, &correct);

	562 if (correct.skipped) {

	563 - if (regexp_ == NULL \|\| !IsBMP(context)) // okay to skip in UCS-2 mode

	564 + if (regexp_ == NULL)

	565 return true;

	566 LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_)

	567 << " " << FormatMode(flags_);

OLD	NEW

« no previous file with comments | « third_party/re2/testinstall.cc ('k') | third_party/re2/util/arena.h » ('j') | no next file with comments »