OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/translate/translate_helper.h" | 5 #include "chrome/renderer/translate/translate_helper.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/compiler_specific.h" | 8 #include "base/compiler_specific.h" |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/message_loop.h" | 10 #include "base/message_loop.h" |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
88 // meta tag for "content-language". This may or may not also | 88 // meta tag for "content-language". This may or may not also |
89 // have a value derived from the actual Content-Language HTTP | 89 // have a value derived from the actual Content-Language HTTP |
90 // header. The two actually have different meanings (despite the | 90 // header. The two actually have different meanings (despite the |
91 // original intent of http-equiv to be an equivalent) with the former | 91 // original intent of http-equiv to be an equivalent) with the former |
92 // being the language of the document and the latter being the | 92 // being the language of the document and the latter being the |
93 // language of the intended audience (a distinction really only | 93 // language of the intended audience (a distinction really only |
94 // relevant for things like langauge textbooks). This distinction | 94 // relevant for things like langauge textbooks). This distinction |
95 // shouldn't affect translation. | 95 // shouldn't affect translation. |
96 WebDocument document = GetMainFrame()->document(); | 96 WebDocument document = GetMainFrame()->document(); |
97 std::string content_language = document.contentLanguage().utf8(); | 97 std::string content_language = document.contentLanguage().utf8(); |
98 std::string language = DeterminePageLanguage(content_language, contents); | 98 std::string cld_language; |
| 99 bool is_cld_reliable; |
| 100 std::string language = DeterminePageLanguage( |
| 101 content_language, contents, &cld_language, &is_cld_reliable); |
| 102 |
| 103 if (language.empty()) |
| 104 return; |
99 | 105 |
100 language_determined_time_ = base::TimeTicks::Now(); | 106 language_determined_time_ = base::TimeTicks::Now(); |
101 | 107 |
| 108 GURL url(document.url()); |
| 109 LanguageDetectionDetails details; |
| 110 details.time = base::Time::Now(); |
| 111 details.url = url; |
| 112 details.content_language = content_language; |
| 113 details.cld_language = cld_language; |
| 114 details.is_cld_reliable = is_cld_reliable; |
| 115 details.adopted_language = language; |
| 116 |
102 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( | 117 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( |
103 routing_id(), | 118 routing_id(), |
104 language, | 119 details, |
105 IsTranslationAllowed(&document) && !language.empty())); | 120 IsTranslationAllowed(&document) && !language.empty())); |
106 } | 121 } |
107 | 122 |
108 void TranslateHelper::CancelPendingTranslation() { | 123 void TranslateHelper::CancelPendingTranslation() { |
109 weak_method_factory_.InvalidateWeakPtrs(); | 124 weak_method_factory_.InvalidateWeakPtrs(); |
110 translation_pending_ = false; | 125 translation_pending_ = false; |
111 page_id_ = -1; | 126 page_id_ = -1; |
112 source_lang_.clear(); | 127 source_lang_.clear(); |
113 target_lang_.clear(); | 128 target_lang_.clear(); |
114 } | 129 } |
115 | 130 |
116 #if defined(ENABLE_LANGUAGE_DETECTION) | 131 #if defined(ENABLE_LANGUAGE_DETECTION) |
117 // static | 132 // static |
118 std::string TranslateHelper::DetermineTextLanguage(const string16& text) { | 133 std::string TranslateHelper::DetermineTextLanguage(const string16& text, |
| 134 bool* is_cld_reliable) { |
119 std::string language = chrome::kUnknownLanguageCode; | 135 std::string language = chrome::kUnknownLanguageCode; |
120 int num_languages = 0; | 136 int num_languages = 0; |
121 int text_bytes = 0; | 137 int text_bytes = 0; |
122 bool is_reliable = false; | 138 bool is_reliable = false; |
123 Language cld_language = | 139 Language cld_language = |
124 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, | 140 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, |
125 &num_languages, NULL, &text_bytes); | 141 &num_languages, NULL, &text_bytes); |
| 142 if (is_cld_reliable != NULL) |
| 143 *is_cld_reliable = is_reliable; |
| 144 |
126 // We don't trust the result if the CLD reports that the detection is not | 145 // We don't trust the result if the CLD reports that the detection is not |
127 // reliable, or if the actual text used to detect the language was less than | 146 // reliable, or if the actual text used to detect the language was less than |
128 // 100 bytes (short texts can often lead to wrong results). | 147 // 100 bytes (short texts can often lead to wrong results). |
129 // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that | 148 // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that |
130 // the determined language code is correct with 50% confidence. Chrome should | 149 // the determined language code is correct with 50% confidence. Chrome should |
131 // handle the real confidence value to judge. | 150 // handle the real confidence value to judge. |
132 if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && | 151 if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && |
133 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { | 152 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { |
134 // We should not use LanguageCode_ISO_639_1 because it does not cover all | 153 // We should not use LanguageCode_ISO_639_1 because it does not cover all |
135 // the languages CLD can detect. As a result, it'll return the invalid | 154 // the languages CLD can detect. As a result, it'll return the invalid |
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
294 size_t dash_index = code->find('-'); | 313 size_t dash_index = code->find('-'); |
295 if (!(dash_index == 2 && code->size() == 5) && | 314 if (!(dash_index == 2 && code->size() == 5) && |
296 !(dash_index == std::string::npos && code->size() == 2)) { | 315 !(dash_index == std::string::npos && code->size() == 2)) { |
297 // Reset |language| to ignore the invalid code. | 316 // Reset |language| to ignore the invalid code. |
298 *code = std::string(); | 317 *code = std::string(); |
299 } | 318 } |
300 } | 319 } |
301 | 320 |
302 // static | 321 // static |
303 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, | 322 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, |
304 const string16& contents) { | 323 const string16& contents, |
| 324 std::string* cld_language_p, |
| 325 bool* is_cld_reliable_p) { |
305 #if defined(ENABLE_LANGUAGE_DETECTION) | 326 #if defined(ENABLE_LANGUAGE_DETECTION) |
306 base::TimeTicks begin_time = base::TimeTicks::Now(); | 327 base::TimeTicks begin_time = base::TimeTicks::Now(); |
307 std::string cld_language = DetermineTextLanguage(contents); | 328 bool is_cld_reliable; |
| 329 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); |
308 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, | 330 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, |
309 base::TimeTicks::Now()); | 331 base::TimeTicks::Now()); |
| 332 |
| 333 if (cld_language_p != NULL) |
| 334 *cld_language_p = cld_language; |
| 335 if (is_cld_reliable_p != NULL) |
| 336 *is_cld_reliable_p = is_cld_reliable; |
310 ConvertLanguageCodeSynonym(&cld_language); | 337 ConvertLanguageCodeSynonym(&cld_language); |
311 VLOG(9) << "CLD determined language code: " << cld_language; | 338 VLOG(9) << "CLD determined language code: " << cld_language; |
312 #endif // defined(ENABLE_LANGUAGE_DETECTION) | 339 #endif // defined(ENABLE_LANGUAGE_DETECTION) |
313 | 340 |
314 // Correct well-known format errors. | 341 // Correct well-known format errors. |
315 std::string language = code; | 342 std::string language = code; |
316 CorrectLanguageCodeTypo(&language); | 343 CorrectLanguageCodeTypo(&language); |
317 | 344 |
318 // Convert language code synonym firstly because sometime synonym code is in | 345 // Convert language code synonym firstly because sometime synonym code is in |
319 // invalid format, e.g. 'fil'. After validation, such a 3 characters language | 346 // invalid format, e.g. 'fil'. After validation, such a 3 characters language |
(...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
565 WebView* web_view = render_view()->GetWebView(); | 592 WebView* web_view = render_view()->GetWebView(); |
566 if (!web_view) { | 593 if (!web_view) { |
567 // When the WebView is going away, the render view should have called | 594 // When the WebView is going away, the render view should have called |
568 // CancelPendingTranslation() which should have stopped any pending work, so | 595 // CancelPendingTranslation() which should have stopped any pending work, so |
569 // that case should not happen. | 596 // that case should not happen. |
570 NOTREACHED(); | 597 NOTREACHED(); |
571 return NULL; | 598 return NULL; |
572 } | 599 } |
573 return web_view->mainFrame(); | 600 return web_view->mainFrame(); |
574 } | 601 } |
OLD | NEW |