src/runtime.cc - Issue 11889007: Add back ascii fast path for toupper/tolower

Unified Diff: src/runtime.cc

Issue 11889007: Add back ascii fast path for toupper/tolower (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 7 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/runtime.cc

diff --git a/src/runtime.cc b/src/runtime.cc

index b508ad3e995ed3f3a39454e69fe1633c41b88214..cd2d2dd5febdabe0430b5ff23970498cdde77033 100644

--- a/src/runtime.cc

+++ b/src/runtime.cc

@@ -5803,7 +5803,9 @@ MUST_USE_RESULT static MaybeObject* ConvertCaseHelper(

namespace {

static const uintptr_t kOneInEveryByte = kUintptrAllBitsSet / 0xFF;

+#ifdef ENABLE_LATIN_1

+static const uintptr_t kAsciiMask = kOneInEveryByte << 7;

+#endif

// Given a word and two range boundaries returns a word with high bit

// set in every byte iff the corresponding input byte was strictly in

@@ -5817,7 +5819,10 @@ static inline uintptr_t AsciiRangeMask(uintptr_t w, char m, char n) {

ASSERT((w & (kOneInEveryByte * 0x7F)) == w);

// Use strict inequalities since in edge cases the function could be

// further simplified.

- ASSERT(0 < m && m < n && n < 0x7F);

+ ASSERT(0 < m && m < n);

+#ifndef ENABLE_LATIN_1

+ ASSERT(n < 0x7F);

+#endif

// Has high bit set in every w byte less than n.

uintptr_t tmp1 = kOneInEveryByte * (0x7F + n) - w;

// Has high bit set in every w byte greater than m.

@@ -5834,7 +5839,11 @@ enum AsciiCaseConversion {

template <AsciiCaseConversion dir>

struct FastAsciiConverter {

+#ifdef ENABLE_LATIN_1

+ static bool Convert(char* dst, char* src, int length, bool* changed_out) {

+#else

static bool Convert(char* dst, char* src, int length) {

+#endif

#ifdef DEBUG

char* saved_dst = dst;

char* saved_src = src;

@@ -5846,12 +5855,18 @@ struct FastAsciiConverter {

const char lo = (dir == ASCII_TO_LOWER) ? 'A' - 1 : 'a' - 1;

const char hi = (dir == ASCII_TO_LOWER) ? 'Z' + 1 : 'z' + 1;

bool changed = false;

+#ifdef ENABLE_LATIN_1

+ uintptr_t or_acc = 0;

+#endif

char* const limit = src + length;

#ifdef V8_HOST_CAN_READ_UNALIGNED

// Process the prefix of the input that requires no conversion one

// (machine) word at a time.

while (src <= limit - sizeof(uintptr_t)) {

uintptr_t w = *reinterpret_cast<uintptr_t*>(src);

+#ifdef ENABLE_LATIN_1

+ or_acc |= w;

+#endif

if (AsciiRangeMask(w, lo, hi) != 0) {

changed = true;

break;

@@ -5864,6 +5879,9 @@ struct FastAsciiConverter {

// required one word at a time.

while (src <= limit - sizeof(uintptr_t)) {

uintptr_t w = *reinterpret_cast<uintptr_t*>(src);

+#ifdef ENABLE_LATIN_1

+ or_acc |= w;

+#endif

uintptr_t m = AsciiRangeMask(w, lo, hi);

// The mask has high (7th) bit set in every byte that needs

// conversion and we know that the distance between cases is

@@ -5877,6 +5895,9 @@ struct FastAsciiConverter {

// unaligned access is not supported).

while (src < limit) {

char c = *src;

+#ifdef ENABLE_LATIN_1

+ or_acc |= c;

+#endif

if (lo < c && c < hi) {

c ^= (1 << 5);

changed = true;

@@ -5885,10 +5906,20 @@ struct FastAsciiConverter {

++src;

++dst;

}

+#ifdef ENABLE_LATIN_1

+ if ((or_acc & kAsciiMask) != 0) {

Yang 2013/01/14 15:56:51 Would it be noticeably more expensive to do this c

+ return false;

+ }

+#endif

#ifdef DEBUG

CheckConvert(saved_dst, saved_src, length, changed);

#endif

+#ifdef ENABLE_LATIN_1

+ *changed_out = changed;

+ return true;

+#else

return changed;

+#endif

}

#ifdef DEBUG

@@ -5941,7 +5972,6 @@ MUST_USE_RESULT static MaybeObject* ConvertCase(

// Assume that the string is not empty; we need this assumption later

if (length == 0) return s;

-#ifndef ENABLE_LATIN_1

// Simpler handling of ASCII strings.

// NOTE: This assumes that the upper/lower case of an ASCII

@@ -5954,13 +5984,25 @@ MUST_USE_RESULT static MaybeObject* ConvertCase(

if (!maybe_o->ToObject(&o)) return maybe_o;

}

SeqOneByteString* result = SeqOneByteString::cast(o);

+#ifndef ENABLE_LATIN_1

bool has_changed_character = ConvertTraits::AsciiConverter::Convert(

reinterpret_cast<char*>(result->GetChars()),

reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()),

length);

return has_changed_character ? result : s;

- }

+#else

+ bool has_changed_character;

+ bool is_ascii = ConvertTraits::AsciiConverter::Convert(

+ reinterpret_cast<char*>(result->GetChars()),

+ reinterpret_cast<char*>(SeqOneByteString::cast(s)->GetChars()),

+ length,

+ &has_changed_character);

+ // If not ASCII, we discard the result and take the 2 byte path.

+ if (is_ascii) {

Yang 2013/01/14 15:56:51 It looks like we could save time if Convert failed

+ return has_changed_character ? result : s;

+ }

#endif

+ }

Object* answer;

{ MaybeObject* maybe_answer =

« no previous file with comments | « no previous file | no next file » | no next file with comments »