Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(553)

Side by Side Diff: src/jsregexp.cc

Issue 11962035: Fix some latin-1 webkit units tests (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Fixed last latin-1 webkit test failure Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | src/regexp-macro-assembler.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2837 matching lines...) Expand 10 before | Expand all | Expand 10 after
2848 2848
2849 2849
2850 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { 2850 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {
2851 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case); 2851 RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case);
2852 if (next == NULL) return set_replacement(NULL); 2852 if (next == NULL) return set_replacement(NULL);
2853 on_success_ = next; 2853 on_success_ = next;
2854 return set_replacement(this); 2854 return set_replacement(this);
2855 } 2855 }
2856 2856
2857 2857
2858 // We need to check for the following characters: 0x39c 0x3bc 0x178.
2859 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
2860 #ifdef ENABLE_LATIN_1
2861 // TODO(dcarney): this could be a lot more efficient.
2862 return range.Contains(0x39c) ||
2863 range.Contains(0x3bc) || range.Contains(0x178);
2864 #else
2865 return false;
2866 #endif
2867 }
2868
2869
2870 #ifdef ENABLE_LATIN_1
2871 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
2872 for (int i = 0; i < ranges->length(); i++) {
2873 // TODO(dcarney): this could be a lot more efficient.
2874 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
2875 }
2876 return false;
2877 }
2878 #endif
2879
2880
2858 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) { 2881 RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
2859 if (info()->replacement_calculated) return replacement(); 2882 if (info()->replacement_calculated) return replacement();
2860 if (depth < 0) return this; 2883 if (depth < 0) return this;
2861 ASSERT(!info()->visited); 2884 ASSERT(!info()->visited);
2862 VisitMarker marker(info()); 2885 VisitMarker marker(info());
2863 int element_count = elms_->length(); 2886 int element_count = elms_->length();
2864 for (int i = 0; i < element_count; i++) { 2887 for (int i = 0; i < element_count; i++) {
2865 TextElement elm = elms_->at(i); 2888 TextElement elm = elms_->at(i);
2866 if (elm.type == TextElement::ATOM) { 2889 if (elm.type == TextElement::ATOM) {
2867 Vector<const uc16> quarks = elm.data.u_atom->data(); 2890 Vector<const uc16> quarks = elm.data.u_atom->data();
2868 for (int j = 0; j < quarks.length(); j++) { 2891 for (int j = 0; j < quarks.length(); j++) {
2869 #ifndef ENABLE_LATIN_1 2892 #ifndef ENABLE_LATIN_1
2870 if (quarks[j] > String::kMaxOneByteCharCode) { 2893 if (quarks[j] > String::kMaxOneByteCharCode) {
2871 return set_replacement(NULL); 2894 return set_replacement(NULL);
2872 } 2895 }
2873 #else 2896 #else
2874 if (quarks[j] <= String::kMaxOneByteCharCode) continue; 2897 uint16_t c = quarks[j];
2898 if (c <= String::kMaxOneByteCharCode) continue;
2875 if (!ignore_case) return set_replacement(NULL); 2899 if (!ignore_case) return set_replacement(NULL);
2876 // Here, we need to check for characters whose upper and lower cases 2900 // Here, we need to check for characters whose upper and lower cases
2877 // are outside the Latin-1 range. 2901 // are outside the Latin-1 range.
2878 if (!unibrow::Latin1::NonLatin1CanBeConvertedToLatin1(quarks[j])) { 2902 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);
2879 return set_replacement(NULL); 2903 // Character is outside Latin-1 completely
2880 } 2904 if (converted == 0) return set_replacement(NULL);
2905 // Convert quark to Latin-1 in place.
2906 uint16_t* copy = const_cast<uint16_t*>(quarks.start());
2907 copy[j] = converted;
2881 #endif 2908 #endif
2882 } 2909 }
2883 } else { 2910 } else {
2884 ASSERT(elm.type == TextElement::CHAR_CLASS); 2911 ASSERT(elm.type == TextElement::CHAR_CLASS);
2885 #ifdef ENABLE_LATIN_1
2886 // TODO(dcarney): Can this be improved?
2887 if (ignore_case) continue;
2888 #endif
2889 RegExpCharacterClass* cc = elm.data.u_char_class; 2912 RegExpCharacterClass* cc = elm.data.u_char_class;
2890 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); 2913 ZoneList<CharacterRange>* ranges = cc->ranges(zone());
2891 if (!CharacterRange::IsCanonical(ranges)) { 2914 if (!CharacterRange::IsCanonical(ranges)) {
2892 CharacterRange::Canonicalize(ranges); 2915 CharacterRange::Canonicalize(ranges);
2893 } 2916 }
2894 // Now they are in order so we only need to look at the first. 2917 // Now they are in order so we only need to look at the first.
2895 int range_count = ranges->length(); 2918 int range_count = ranges->length();
2896 if (cc->is_negated()) { 2919 if (cc->is_negated()) {
2897 if (range_count != 0 && 2920 if (range_count != 0 &&
2898 ranges->at(0).from() == 0 && 2921 ranges->at(0).from() == 0 &&
2899 ranges->at(0).to() >= String::kMaxOneByteCharCode) { 2922 ranges->at(0).to() >= String::kMaxOneByteCharCode) {
2923 #ifdef ENABLE_LATIN_1
2924 // This will be handled in a later filter.
2925 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2926 #endif
2900 return set_replacement(NULL); 2927 return set_replacement(NULL);
2901 } 2928 }
2902 } else { 2929 } else {
2903 if (range_count == 0 || 2930 if (range_count == 0 ||
2904 ranges->at(0).from() > String::kMaxOneByteCharCode) { 2931 ranges->at(0).from() > String::kMaxOneByteCharCode) {
2932 #ifdef ENABLE_LATIN_1
2933 // This will be handled in a later filter.
2934 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
2935 #endif
2905 return set_replacement(NULL); 2936 return set_replacement(NULL);
2906 } 2937 }
2907 } 2938 }
2908 } 2939 }
2909 } 2940 }
2910 return FilterSuccessor(depth - 1, ignore_case); 2941 return FilterSuccessor(depth - 1, ignore_case);
2911 } 2942 }
2912 2943
2913 2944
2914 RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) { 2945 RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) {
(...skipping 2432 matching lines...) Expand 10 before | Expand all | Expand 10 after
5347 table.ForEach(&callback); 5378 table.ForEach(&callback);
5348 } 5379 }
5349 5380
5350 5381
5351 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, 5382 void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
5352 bool is_ascii, 5383 bool is_ascii,
5353 Zone* zone) { 5384 Zone* zone) {
5354 Isolate* isolate = Isolate::Current(); 5385 Isolate* isolate = Isolate::Current();
5355 uc16 bottom = from(); 5386 uc16 bottom = from();
5356 uc16 top = to(); 5387 uc16 top = to();
5357 if (is_ascii) { 5388 if (is_ascii && !RangeContainsLatin1Equivalents(*this)) {
5358 if (bottom > String::kMaxOneByteCharCode) return; 5389 if (bottom > String::kMaxOneByteCharCode) return;
5359 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode; 5390 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
5360 } 5391 }
5361 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 5392 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
5362 if (top == bottom) { 5393 if (top == bottom) {
5363 // If this is a singleton we just expand the one character. 5394 // If this is a singleton we just expand the one character.
5364 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars); 5395 int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
5365 for (int i = 0; i < length; i++) { 5396 for (int i = 0; i < length; i++) {
5366 uc32 chr = chars[i]; 5397 uc32 chr = chars[i];
5367 if (chr != bottom) { 5398 if (chr != bottom) {
(...skipping 811 matching lines...) Expand 10 before | Expand all | Expand 10 after
6179 } 6210 }
6180 6211
6181 return compiler.Assemble(&macro_assembler, 6212 return compiler.Assemble(&macro_assembler,
6182 node, 6213 node,
6183 data->capture_count, 6214 data->capture_count,
6184 pattern); 6215 pattern);
6185 } 6216 }
6186 6217
6187 6218
6188 }} // namespace v8::internal 6219 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « no previous file | src/regexp-macro-assembler.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698