| OLD | NEW | 
|    1 // Copyright 2007-2010 the V8 project authors. All rights reserved. |    1 // Copyright 2007-2010 the V8 project authors. All rights reserved. | 
|    2 // Redistribution and use in source and binary forms, with or without |    2 // Redistribution and use in source and binary forms, with or without | 
|    3 // modification, are permitted provided that the following conditions are |    3 // modification, are permitted provided that the following conditions are | 
|    4 // met: |    4 // met: | 
|    5 // |    5 // | 
|    6 //     * Redistributions of source code must retain the above copyright |    6 //     * Redistributions of source code must retain the above copyright | 
|    7 //       notice, this list of conditions and the following disclaimer. |    7 //       notice, this list of conditions and the following disclaimer. | 
|    8 //     * Redistributions in binary form must reproduce the above |    8 //     * Redistributions in binary form must reproduce the above | 
|    9 //       copyright notice, this list of conditions and the following |    9 //       copyright notice, this list of conditions and the following | 
|   10 //       disclaimer in the documentation and/or other materials provided |   10 //       disclaimer in the documentation and/or other materials provided | 
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|   71     } else { |   71     } else { | 
|   72       entries_[c & kMask] = CacheEntry(c, 0); |   72       entries_[c & kMask] = CacheEntry(c, 0); | 
|   73       return 0; |   73       return 0; | 
|   74     } |   74     } | 
|   75   } else { |   75   } else { | 
|   76     return length; |   76     return length; | 
|   77   } |   77   } | 
|   78 } |   78 } | 
|   79  |   79  | 
|   80  |   80  | 
|   81 unsigned Utf8::Encode(char* str, uchar c) { |   81 unsigned Utf8::Encode(char* str, uchar c, int previous) { | 
|   82   static const int kMask = ~(1 << 6); |   82   static const int kMask = ~(1 << 6); | 
|   83   if (c <= kMaxOneByteChar) { |   83   if (c <= kMaxOneByteChar) { | 
|   84     str[0] = c; |   84     str[0] = c; | 
|   85     return 1; |   85     return 1; | 
|   86   } else if (c <= kMaxTwoByteChar) { |   86   } else if (c <= kMaxTwoByteChar) { | 
|   87     str[0] = 0xC0 | (c >> 6); |   87     str[0] = 0xC0 | (c >> 6); | 
|   88     str[1] = 0x80 | (c & kMask); |   88     str[1] = 0x80 | (c & kMask); | 
|   89     return 2; |   89     return 2; | 
|   90   } else if (c <= kMaxThreeByteChar) { |   90   } else if (c <= kMaxThreeByteChar) { | 
 |   91     if (Utf16::IsTrailSurrogate(c) && | 
 |   92         Utf16::IsLeadSurrogate(previous)) { | 
 |   93       const int kUnmatchedSize = kSizeOfUnmatchedSurrogate; | 
 |   94       return Encode(str - kUnmatchedSize, | 
 |   95                     Utf16::CombineSurrogatePair(previous, c), | 
 |   96                     Utf16::kNoPreviousCharacter) - kUnmatchedSize; | 
 |   97     } | 
|   91     str[0] = 0xE0 | (c >> 12); |   98     str[0] = 0xE0 | (c >> 12); | 
|   92     str[1] = 0x80 | ((c >> 6) & kMask); |   99     str[1] = 0x80 | ((c >> 6) & kMask); | 
|   93     str[2] = 0x80 | (c & kMask); |  100     str[2] = 0x80 | (c & kMask); | 
|   94     return 3; |  101     return 3; | 
|   95   } else { |  102   } else { | 
|   96     str[0] = 0xF0 | (c >> 18); |  103     str[0] = 0xF0 | (c >> 18); | 
|   97     str[1] = 0x80 | ((c >> 12) & kMask); |  104     str[1] = 0x80 | ((c >> 12) & kMask); | 
|   98     str[2] = 0x80 | ((c >> 6) & kMask); |  105     str[2] = 0x80 | ((c >> 6) & kMask); | 
|   99     str[3] = 0x80 | (c & kMask); |  106     str[3] = 0x80 | (c & kMask); | 
|  100     return 4; |  107     return 4; | 
|  101   } |  108   } | 
|  102 } |  109 } | 
|  103  |  110  | 
|  104  |  111  | 
|  105 uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) { |  112 uchar Utf8::ValueOf(const byte* bytes, unsigned length, unsigned* cursor) { | 
|  106   if (length <= 0) return kBadChar; |  113   if (length <= 0) return kBadChar; | 
|  107   byte first = bytes[0]; |  114   byte first = bytes[0]; | 
|  108   // Characters between 0000 and 0007F are encoded as a single character |  115   // Characters between 0000 and 0007F are encoded as a single character | 
|  109   if (first <= kMaxOneByteChar) { |  116   if (first <= kMaxOneByteChar) { | 
|  110     *cursor += 1; |  117     *cursor += 1; | 
|  111     return first; |  118     return first; | 
|  112   } |  119   } | 
|  113   return CalculateValue(bytes, length, cursor); |  120   return CalculateValue(bytes, length, cursor); | 
|  114 } |  121 } | 
|  115  |  122  | 
|  116 unsigned Utf8::Length(uchar c) { |  123 unsigned Utf8::Length(uchar c, int previous) { | 
|  117   if (c <= kMaxOneByteChar) { |  124   if (c <= kMaxOneByteChar) { | 
|  118     return 1; |  125     return 1; | 
|  119   } else if (c <= kMaxTwoByteChar) { |  126   } else if (c <= kMaxTwoByteChar) { | 
|  120     return 2; |  127     return 2; | 
|  121   } else if (c <= kMaxThreeByteChar) { |  128   } else if (c <= kMaxThreeByteChar) { | 
 |  129     if (Utf16::IsTrailSurrogate(c) && | 
 |  130         Utf16::IsLeadSurrogate(previous)) { | 
 |  131       return kSizeOfUnmatchedSurrogate - kBytesSavedByCombiningSurrogates; | 
 |  132     } | 
|  122     return 3; |  133     return 3; | 
|  123   } else { |  134   } else { | 
|  124     return 4; |  135     return 4; | 
|  125   } |  136   } | 
|  126 } |  137 } | 
|  127  |  138  | 
|  128 uchar CharacterStream::GetNext() { |  139 uchar CharacterStream::GetNext() { | 
|  129   uchar result = DecodeCharacter(buffer_, &cursor_); |  140   uchar result = DecodeCharacter(buffer_, &cursor_); | 
|  130   if (remaining_ == 1) { |  141   if (remaining_ == 1) { | 
|  131     cursor_ = 0; |  142     cursor_ = 0; | 
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  229  |  240  | 
|  230 template <unsigned s> |  241 template <unsigned s> | 
|  231 Utf8InputBuffer<s>::Utf8InputBuffer(const char* data, unsigned length) |  242 Utf8InputBuffer<s>::Utf8InputBuffer(const char* data, unsigned length) | 
|  232     : InputBuffer<Utf8, Buffer<const char*>, s>(Buffer<const char*>(data, |  243     : InputBuffer<Utf8, Buffer<const char*>, s>(Buffer<const char*>(data, | 
|  233                                                                     length)) { |  244                                                                     length)) { | 
|  234 } |  245 } | 
|  235  |  246  | 
|  236 }  // namespace unibrow |  247 }  // namespace unibrow | 
|  237  |  248  | 
|  238 #endif  // V8_UNICODE_INL_H_ |  249 #endif  // V8_UNICODE_INL_H_ | 
| OLD | NEW |