Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(913)

Unified Diff: Source/core/fetch/TextResourceDecoder.cpp

Issue 23623012: Move TextResourceDecoder from loader/ to fetch/ (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « Source/core/fetch/TextResourceDecoder.h ('k') | Source/core/fetch/XSLStyleSheetResource.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: Source/core/fetch/TextResourceDecoder.cpp
diff --git a/Source/core/loader/TextResourceDecoder.cpp b/Source/core/fetch/TextResourceDecoder.cpp
similarity index 86%
rename from Source/core/loader/TextResourceDecoder.cpp
rename to Source/core/fetch/TextResourceDecoder.cpp
index 139fcf1cb649d7b285eac564f5645b9772c84440..b6759bffb89f02431d538dd580e21336e5faf7a1 100644
--- a/Source/core/loader/TextResourceDecoder.cpp
+++ b/Source/core/fetch/TextResourceDecoder.cpp
@@ -21,7 +21,7 @@
#include "config.h"
-#include "core/loader/TextResourceDecoder.h"
+#include "core/fetch/TextResourceDecoder.h"
#include "HTMLNames.h"
#include "core/dom/DOMImplementation.h"
@@ -175,9 +175,9 @@ enum KanjiCode::Type KanjiCode::judge(const char* str, int size)
while (i < size) {
if (ptr[i] == ESC && (size - i >= 3)) {
if (bytesEqual(str + i + 1, '$', 'B')
- || bytesEqual(str + i + 1, '(', 'B')
- || bytesEqual(str + i + 1, '$', '@')
- || bytesEqual(str + i + 1, '(', 'J')) {
+ || bytesEqual(str + i + 1, '(', 'B')
+ || bytesEqual(str + i + 1, '$', '@')
+ || bytesEqual(str + i + 1, '(', 'J')) {
code = JIS;
goto breakBreak;
}
@@ -195,11 +195,10 @@ enum KanjiCode::Type KanjiCode::judge(const char* str, int size)
bfk = 0;
/* ?? check kudokuten ?? && ?? hiragana ?? */
if ((i >= 2) && (ptr[i - 2] == 0x81)
- && (0x41 <= ptr[i - 1] && ptr[i - 1] <= 0x49)) {
+ && (0x41 <= ptr[i - 1] && ptr[i - 1] <= 0x49)) {
code = SJIS;
sjis += 100; /* kudokuten */
- } else if ((i >= 2) && (ptr[i - 2] == 0xa1)
- && (0xa2 <= ptr[i - 1] && ptr[i - 1] <= 0xaa)) {
+ } else if ((i >= 2) && (ptr[i - 2] == 0xa1) && (0xa2 <= ptr[i - 1] && ptr[i - 1] <= 0xaa)) {
code = EUC;
euc += 100; /* kudokuten */
} else if ((i >= 2) && (ptr[i - 2] == 0x82) && (0xa0 <= ptr[i - 1])) {
@@ -211,8 +210,7 @@ enum KanjiCode::Type KanjiCode::judge(const char* str, int size)
/* ?? check hiragana or katana ?? */
if ((size - i > 1) && (ptr[i] == 0x82) && (0xa0 <= ptr[i + 1])) {
sjis++; /* hiragana */
- } else if ((size - i > 1) && (ptr[i] == 0x83)
- && (0x40 <= ptr[i + 1] && ptr[i + 1] <= 0x9f)) {
+ } else if ((size - i > 1) && (ptr[i] == 0x83) && (0x40 <= ptr[i + 1] && ptr[i + 1] <= 0x9f)) {
sjis++; /* katakana */
} else if ((size - i > 1) && (ptr[i] == 0xa4) && (0xa0 <= ptr[i + 1])) {
euc++; /* hiragana */
@@ -223,39 +221,50 @@ enum KanjiCode::Type KanjiCode::judge(const char* str, int size)
if ((i >= 1) && (0x40 <= ptr[i] && ptr[i] <= 0xa0) && ISkanji(ptr[i - 1])) {
code = SJIS;
goto breakBreak;
- } else if ((i >= 1) && (0x81 <= ptr[i - 1] && ptr[i - 1] <= 0x9f) && ((0x40 <= ptr[i] && ptr[i] < 0x7e) || (0x7e < ptr[i] && ptr[i] <= 0xfc))) {
+ }
+
+ if ((i >= 1) && (0x81 <= ptr[i - 1] && ptr[i - 1] <= 0x9f) && ((0x40 <= ptr[i] && ptr[i] < 0x7e) || (0x7e < ptr[i] && ptr[i] <= 0xfc))) {
code = SJIS;
goto breakBreak;
- } else if ((i >= 1) && (0xfd <= ptr[i] && ptr[i] <= 0xfe) && (0xa1 <= ptr[i - 1] && ptr[i - 1] <= 0xfe)) {
+ }
+
+ if ((i >= 1) && (0xfd <= ptr[i] && ptr[i] <= 0xfe) && (0xa1 <= ptr[i - 1] && ptr[i - 1] <= 0xfe)) {
code = EUC;
goto breakBreak;
- } else if ((i >= 1) && (0xfd <= ptr[i - 1] && ptr[i - 1] <= 0xfe) && (0xa1 <= ptr[i] && ptr[i] <= 0xfe)) {
+ }
+
+ if ((i >= 1) && (0xfd <= ptr[i - 1] && ptr[i - 1] <= 0xfe) && (0xa1 <= ptr[i] && ptr[i] <= 0xfe)) {
code = EUC;
goto breakBreak;
- } else if ((i >= 1) && (ptr[i] < 0xa0 || 0xdf < ptr[i]) && (0x8e == ptr[i - 1])) {
+ }
+
+ if ((i >= 1) && (ptr[i] < 0xa0 || 0xdf < ptr[i]) && (0x8e == ptr[i - 1])) {
code = SJIS;
goto breakBreak;
- } else if (ptr[i] <= 0x7f) {
+ }
+
+ if (ptr[i] <= 0x7f) {
code = SJIS;
goto breakBreak;
- } else {
- if (0xa1 <= ptr[i] && ptr[i] <= 0xa6) {
- euc++; /* sjis hankaku kana kigo */
- } else if (0xa1 <= ptr[i] && ptr[i] <= 0xdf) {
- ; /* sjis hankaku kana */
- } else if (0xa1 <= ptr[i] && ptr[i] <= 0xfe) {
- euc++;
- } else if (0x8e == ptr[i]) {
- euc++;
- } else if (0x20 <= ptr[i] && ptr[i] <= 0x7f) {
- sjis++;
- }
- bfr = false;
- bfk = 0;
}
+
+ if (0xa1 <= ptr[i] && ptr[i] <= 0xa6) {
+ euc++; /* sjis hankaku kana kigo */
+ } else if (0xa1 <= ptr[i] && ptr[i] <= 0xdf) {
+ /* sjis hankaku kana */
+ } else if (0xa1 <= ptr[i] && ptr[i] <= 0xfe) {
+ euc++;
+ } else if (0x8e == ptr[i]) {
+ euc++;
+ } else if (0x20 <= ptr[i] && ptr[i] <= 0x7f) {
+ sjis++;
+ }
+
+ bfr = false;
+ bfk = 0;
} else if (0x8e == ptr[i]) {
if (size - i <= 1) {
- ;
+
} else if (0xa1 <= ptr[i + 1] && ptr[i + 1] <= 0xdf) {
/* EUC KANA or SJIS KANJI */
if (bfk == 1) {
@@ -271,20 +280,15 @@ enum KanjiCode::Type KanjiCode::judge(const char* str, int size)
} else if (0x81 <= ptr[i] && ptr[i] <= 0x9f) {
/* SJIS only */
code = SJIS;
- if ((size - i >= 1)
- && ((0x40 <= ptr[i + 1] && ptr[i + 1] <= 0x7e)
- || (0x80 <= ptr[i + 1] && ptr[i + 1] <= 0xfc))) {
+ if ((size - i >= 1) && ((0x40 <= ptr[i + 1] && ptr[i + 1] <= 0x7e) || (0x80 <= ptr[i + 1] && ptr[i + 1] <= 0xfc)))
goto breakBreak;
- }
} else if (0xfd <= ptr[i] && ptr[i] <= 0xfe) {
/* EUC only */
code = EUC;
- if ((size - i >= 1)
- && (0xa1 <= ptr[i + 1] && ptr[i + 1] <= 0xfe)) {
+ if ((size - i >= 1) && (0xa1 <= ptr[i + 1] && ptr[i + 1] <= 0xfe))
goto breakBreak;
- }
} else if (ptr[i] <= 0x7f) {
- ;
+
} else {
bfr = true;
bfk = 0;
@@ -353,7 +357,7 @@ void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, Encodin
// When encoding comes from meta tag (i.e. it cannot be XML files sent via XHR),
// treat x-user-defined as windows-1252 (bug 18270)
- if (source == EncodingFromMetaTag && strcasecmp(encoding.name(), "x-user-defined") == 0)
+ if (source == EncodingFromMetaTag && !strcasecmp(encoding.name(), "x-user-defined"))
m_encoding = "windows-1252";
else if (source == EncodingFromMetaTag || source == EncodingFromXMLHeader || source == EncodingFromCSSCharset)
m_encoding = encoding.closestByteBasedEquivalent();
@@ -433,7 +437,7 @@ size_t TextResourceDecoder::checkForBOM(const char* data, size_t len)
// Check for the BOM.
if (c1 == 0xFF && c2 == 0xFE) {
- if (c3 != 0 || c4 != 0) {
+ if (c3 || c4) {
setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding);
lengthOfBOM = 2;
} else {
@@ -446,7 +450,7 @@ size_t TextResourceDecoder::checkForBOM(const char* data, size_t len)
} else if (c1 == 0xFE && c2 == 0xFF) {
setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding);
lengthOfBOM = 2;
- } else if (c1 == 0 && c2 == 0 && c3 == 0xFE && c4 == 0xFF) {
+ } else if (!c1 && !c2 && c3 == 0xFE && c4 == 0xFF) {
setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding);
lengthOfBOM = 4;
}
@@ -534,14 +538,15 @@ bool TextResourceDecoder::checkForXMLCharset(const char* data, size_t len, bool&
if (pos != -1)
setEncoding(findTextEncoding(ptr + pos, len), EncodingFromXMLHeader);
// continue looking for a charset - it may be specified in an HTTP-Equiv meta
- } else if (bytesEqual(ptr, '<', 0, '?', 0, 'x', 0))
+ } else if (bytesEqual(ptr, '<', 0, '?', 0, 'x', 0)) {
setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding);
- else if (bytesEqual(ptr, 0, '<', 0, '?', 0, 'x'))
+ } else if (bytesEqual(ptr, 0, '<', 0, '?', 0, 'x')) {
setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding);
- else if (bytesEqual(ptr, '<', 0, 0, 0, '?', 0, 0, 0))
+ } else if (bytesEqual(ptr, '<', 0, 0, 0, '?', 0, 0, 0)) {
setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding);
- else if (bytesEqual(ptr, 0, 0, 0, '<', 0, 0, 0, '?'))
+ } else if (bytesEqual(ptr, 0, 0, 0, '<', 0, 0, 0, '?')) {
setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding);
+ }
m_checkedForXMLCharset = true;
return true;
@@ -569,19 +574,19 @@ void TextResourceDecoder::checkForMetaCharset(const char* data, size_t length)
void TextResourceDecoder::detectJapaneseEncoding(const char* data, size_t len)
{
switch (KanjiCode::judge(data, len)) {
- case KanjiCode::JIS:
- setEncoding("ISO-2022-JP", EncodingFromContentSniffing);
- break;
- case KanjiCode::EUC:
- setEncoding("EUC-JP", EncodingFromContentSniffing);
- break;
- case KanjiCode::SJIS:
- setEncoding("Shift_JIS", EncodingFromContentSniffing);
- break;
- case KanjiCode::ASCII:
- case KanjiCode::UTF16:
- case KanjiCode::UTF8:
- break;
+ case KanjiCode::JIS:
+ setEncoding("ISO-2022-JP", EncodingFromContentSniffing);
+ break;
+ case KanjiCode::EUC:
+ setEncoding("EUC-JP", EncodingFromContentSniffing);
+ break;
+ case KanjiCode::SJIS:
+ setEncoding("Shift_JIS", EncodingFromContentSniffing);
+ break;
+ case KanjiCode::ASCII:
+ case KanjiCode::UTF16:
+ case KanjiCode::UTF8:
+ break;
}
}
@@ -610,20 +615,22 @@ String TextResourceDecoder::decode(const char* data, size_t len)
bool movedDataToBuffer = false;
- if (m_contentType == CSS && !m_checkedForCSSCharset)
+ if (m_contentType == CSS && !m_checkedForCSSCharset) {
if (!checkForCSSCharset(data, len, movedDataToBuffer))
return emptyString();
+ }
- if ((m_contentType == HTML || m_contentType == XML) && !m_checkedForXMLCharset)
+ if ((m_contentType == HTML || m_contentType == XML) && !m_checkedForXMLCharset) {
if (!checkForXMLCharset(data, len, movedDataToBuffer))
return emptyString();
+ }
// FIXME: It would be more efficient to move this logic below checkForMetaCharset because
// checkForMetaCharset can overrule these detections.
if (shouldAutoDetect()) {
- if (m_encoding.isJapanese())
+ if (m_encoding.isJapanese()) {
detectJapaneseEncoding(data, len); // FIXME: We should use detectTextEncoding() for all languages.
- else {
+ } else {
WTF::TextEncoding detectedEncoding;
if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding))
setEncoding(detectedEncoding, EncodingFromContentSniffing);
@@ -660,9 +667,9 @@ String TextResourceDecoder::decode(const char* data, size_t len)
String TextResourceDecoder::flush()
{
- // If we can not identify the encoding even after a document is completely
- // loaded, we need to detect the encoding if other conditions for
- // autodetection is satisfied.
+ // If we can not identify the encoding even after a document is completely
+ // loaded, we need to detect the encoding if other conditions for
+ // autodetection is satisfied.
if (m_buffer.size() && shouldAutoDetect()
&& ((!m_checkedForXMLCharset && (m_contentType == HTML || m_contentType == XML)) || (!m_checkedForCSSCharset && (m_contentType == CSS)))) {
WTF::TextEncoding detectedEncoding;
« no previous file with comments | « Source/core/fetch/TextResourceDecoder.h ('k') | Source/core/fetch/XSLStyleSheetResource.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698