Index: sdk/lib/convert/utf.dart |
diff --git a/sdk/lib/convert/utf.dart b/sdk/lib/convert/utf.dart |
index 6abaa0519b959200fe49ec33d7df190bd23fc352..356208510ace823ab6078f81455dbdd34a3c88e6 100644 |
--- a/sdk/lib/convert/utf.dart |
+++ b/sdk/lib/convert/utf.dart |
@@ -7,6 +7,9 @@ part of dart.convert; |
/** The Unicode Replacement character `U+FFFD` (�). */ |
const UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; |
+/** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */ |
+const UNICODE_BOM_CHARACTER_RUNE = 0xFEFF; |
+ |
/** |
* An instance of the default implementation of the [Utf8Codec]. |
* |
@@ -48,6 +51,9 @@ class Utf8Codec extends Encoding { |
* Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the |
* corresponding string. |
* |
+ * If the [codeUnits] start with a leading [UNICODE_BOM_CHARACTER_RUNE] this |
+ * character is discarded. |
+ * |
* If [allowMalformed] is `true` the decoder replaces invalid (or |
* unterminated) character sequences with the Unicode Replacement character |
* `U+FFFD` (�). Otherwise it throws a [FormatException]. |
@@ -303,6 +309,9 @@ class Utf8Decoder extends Converter<List<int>, String> { |
/** |
* Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the |
* corresponding string. |
+ * |
+ * If the [codeUnits] start with a leading [UNICODE_BOM_CHARACTER_RUNE] this |
+ * character is discarded. |
*/ |
String convert(List<int> codeUnits) { |
StringBuffer buffer = new StringBuffer(); |
@@ -346,9 +355,6 @@ const int _SURROGATE_VALUE_MASK = 0x3FF; |
const int _LEAD_SURROGATE_MIN = 0xD800; |
const int _TAIL_SURROGATE_MIN = 0xDC00; |
-const int _REPLACEMENT_CHARACTER = 0xFFFD; |
-const int _BOM_CHARACTER = 0xFEFF; |
- |
bool _isSurrogate(int codeUnit) => |
(codeUnit & _SURROGATE_MASK) == _LEAD_SURROGATE_MIN; |
bool _isLeadSurrogate(int codeUnit) => |
@@ -356,7 +362,7 @@ bool _isLeadSurrogate(int codeUnit) => |
bool _isTailSurrogate(int codeUnit) => |
(codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN; |
int _combineSurrogatePair(int lead, int tail) => |
- 0x10000 | ((lead & _SURROGATE_VALUE_MASK) << 10) |
+ 0x10000 + ((lead & _SURROGATE_VALUE_MASK) << 10) |
| (tail & _SURROGATE_VALUE_MASK); |
@@ -400,7 +406,7 @@ class _Utf8Decoder { |
if (!_allowMalformed) { |
throw new FormatException("Unfinished UTF-8 octet sequence"); |
} |
- _stringSink.writeCharCode(_REPLACEMENT_CHARACTER); |
+ _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE); |
_value = 0; |
_expectedUnits = 0; |
_extraUnits = 0; |
@@ -430,7 +436,7 @@ class _Utf8Decoder { |
"Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); |
} |
_isFirstCharacter = false; |
- _stringSink.writeCharCode(_REPLACEMENT_CHARACTER); |
+ _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE); |
break multibyte; |
} else { |
value = (value << 6) | (unit & 0x3f); |
@@ -446,16 +452,16 @@ class _Utf8Decoder { |
"Overlong encoding of 0x${value.toRadixString(16)}"); |
} |
expectedUnits = extraUnits = 0; |
- value = _REPLACEMENT_CHARACTER; |
+ value = UNICODE_REPLACEMENT_CHARACTER_RUNE; |
} |
if (value > _FOUR_BYTE_LIMIT) { |
if (!_allowMalformed) { |
throw new FormatException("Character outside valid Unicode range: " |
"0x${value.toRadixString(16)}"); |
} |
- value = _REPLACEMENT_CHARACTER; |
+ value = UNICODE_REPLACEMENT_CHARACTER_RUNE; |
} |
- if (!_isFirstCharacter || value != _BOM_CHARACTER) { |
+ if (!_isFirstCharacter || value != UNICODE_BOM_CHARACTER_RUNE) { |
_stringSink.writeCharCode(value); |
} |
_isFirstCharacter = false; |
@@ -474,7 +480,7 @@ class _Utf8Decoder { |
throw new FormatException( |
"Negative UTF-8 code unit: -0x${(-unit).toRadixString(16)}"); |
} |
- _stringSink.writeCharCode(_REPLACEMENT_CHARACTER); |
+ _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE); |
} else if (unit <= _ONE_BYTE_LIMIT) { |
_isFirstCharacter = false; |
_stringSink.writeCharCode(unit); |
@@ -499,7 +505,7 @@ class _Utf8Decoder { |
throw new FormatException( |
"Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); |
} |
- value = _REPLACEMENT_CHARACTER; |
+ value = UNICODE_REPLACEMENT_CHARACTER_RUNE; |
expectedUnits = extraUnits = 0; |
_isFirstCharacter = false; |
_stringSink.writeCharCode(value); |