| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 110 Data data() { return data_; } | 110 Data data() { return data_; } |
| 111 unsigned length() { return length_; } | 111 unsigned length() { return length_; } |
| 112 private: | 112 private: |
| 113 Data data_; | 113 Data data_; |
| 114 unsigned length_; | 114 unsigned length_; |
| 115 }; | 115 }; |
| 116 | 116 |
| 117 | 117 |
| 118 class Utf16 { | 118 class Utf16 { |
| 119 public: | 119 public: |
| 120 static inline bool IsLeadSurrogate(int32_t code) { | 120 static inline bool IsLeadSurrogate(int code) { |
| 121 if (code == kNoPreviousCharacter) return false; | 121 if (code == kNoPreviousCharacter) return false; |
| 122 return (code & 0xfc00) == 0xd800; | 122 return (code & 0xfc00) == 0xd800; |
| 123 } | 123 } |
| 124 static inline bool IsTrailSurrogate(int32_t code) { | 124 static inline bool IsTrailSurrogate(int code) { |
| 125 if (code == kNoPreviousCharacter) return false; | 125 if (code == kNoPreviousCharacter) return false; |
| 126 return (code & 0xfc00) == 0xdc00; | 126 return (code & 0xfc00) == 0xdc00; |
| 127 } | 127 } |
| 128 | 128 |
| 129 static inline int32_t CombineSurrogatePair(uchar lead, uchar trail) { | 129 static inline int CombineSurrogatePair(uchar lead, uchar trail) { |
| 130 return 0x10000 + ((lead & 0x3ff) << 10) + (trail & 0x3ff); | 130 return 0x10000 + ((lead & 0x3ff) << 10) + (trail & 0x3ff); |
| 131 } | 131 } |
| 132 static const int32_t kNoPreviousCharacter = -1; | 132 static const int kNoPreviousCharacter = -1; |
| 133 static const uchar kMaxNonSurrogateCharCode = 0xffff; | 133 static const uchar kMaxNonSurrogateCharCode = 0xffff; |
| 134 // Encoding a single UTF-16 code unit will produce 1, 2 or 3 bytes | 134 // Encoding a single UTF-16 code unit will produce 1, 2 or 3 bytes |
| 135 // of UTF-8 data. The special case where the unit is a surrogate | 135 // of UTF-8 data. The special case where the unit is a surrogate |
| 136 // trail produces 1 byte net, because the encoding of the pair is | 136 // trail produces 1 byte net, because the encoding of the pair is |
| 137 // 4 bytes and the 3 bytes that were used to encode the lead surrogate | 137 // 4 bytes and the 3 bytes that were used to encode the lead surrogate |
| 138 // can be reclaimed. | 138 // can be reclaimed. |
| 139 static const int kMaxExtraUtf8BytesForOneUtf16CodeUnit = 3; | 139 static const int kMaxExtraUtf8BytesForOneUtf16CodeUnit = 3; |
| 140 // One UTF-16 surrogate is endoded (illegally) as 3 UTF-8 bytes. | 140 // One UTF-16 surrogate is endoded (illegally) as 3 UTF-8 bytes. |
| 141 // The illegality stems from the surrogate not being part of a pair. | 141 // The illegality stems from the surrogate not being part of a pair. |
| 142 static const int kUtf8BytesToCodeASurrogate = 3; | 142 static const int kUtf8BytesToCodeASurrogate = 3; |
| 143 static inline uchar LeadSurrogate(int32_t char_code) { | 143 static inline uchar LeadSurrogate(int char_code) { |
| 144 return 0xd800 + (((char_code - 0x10000) >> 10) & 0x3ff); | 144 return 0xd800 + (((char_code - 0x10000) >> 10) & 0x3ff); |
| 145 } | 145 } |
| 146 static inline uchar TrailSurrogate(int32_t char_code) { | 146 static inline uchar TrailSurrogate(int char_code) { |
| 147 return 0xdc00 + (char_code & 0x3ff); | 147 return 0xdc00 + (char_code & 0x3ff); |
| 148 } | 148 } |
| 149 }; | 149 }; |
| 150 | 150 |
| 151 | 151 |
| 152 class Utf8 { | 152 class Utf8 { |
| 153 public: | 153 public: |
| 154 static inline uchar Length(uchar chr, int previous); | 154 static inline uchar Length(uchar chr, int previous); |
| 155 static inline unsigned Encode( | 155 static inline unsigned Encode( |
| 156 char* out, uchar c, int previous); | 156 char* out, uchar c, int previous); |
| (...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 314 static const int kMaxWidth = 1; | 314 static const int kMaxWidth = 1; |
| 315 static int Convert(uchar c, | 315 static int Convert(uchar c, |
| 316 uchar n, | 316 uchar n, |
| 317 uchar* result, | 317 uchar* result, |
| 318 bool* allow_caching_ptr); | 318 bool* allow_caching_ptr); |
| 319 }; | 319 }; |
| 320 | 320 |
| 321 } // namespace unibrow | 321 } // namespace unibrow |
| 322 | 322 |
| 323 #endif // V8_UNICODE_H_ | 323 #endif // V8_UNICODE_H_ |
| OLD | NEW |