OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/translate/translate_helper.h" | 5 #include "chrome/renderer/translate/translate_helper.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/compiler_specific.h" | 8 #include "base/compiler_specific.h" |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/message_loop.h" | 10 #include "base/message_loop.h" |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
86 void TranslateHelper::PageCaptured(const string16& contents) { | 86 void TranslateHelper::PageCaptured(const string16& contents) { |
87 // Get the document language as set by WebKit from the http-equiv | 87 // Get the document language as set by WebKit from the http-equiv |
88 // meta tag for "content-language". This may or may not also | 88 // meta tag for "content-language". This may or may not also |
89 // have a value derived from the actual Content-Language HTTP | 89 // have a value derived from the actual Content-Language HTTP |
90 // header. The two actually have different meanings (despite the | 90 // header. The two actually have different meanings (despite the |
91 // original intent of http-equiv to be an equivalent) with the former | 91 // original intent of http-equiv to be an equivalent) with the former |
92 // being the language of the document and the latter being the | 92 // being the language of the document and the latter being the |
93 // language of the intended audience (a distinction really only | 93 // language of the intended audience (a distinction really only |
94 // relevant for things like langauge textbooks). This distinction | 94 // relevant for things like langauge textbooks). This distinction |
95 // shouldn't affect translation. | 95 // shouldn't affect translation. |
96 WebDocument document = GetMainFrame()->document(); | 96 WebFrame* main_frame = GetMainFrame(); |
| 97 if (!main_frame) |
| 98 return; |
| 99 WebDocument document = main_frame->document(); |
97 std::string content_language = document.contentLanguage().utf8(); | 100 std::string content_language = document.contentLanguage().utf8(); |
| 101 WebElement html_element = document.documentElement(); |
| 102 std::string html_lang; |
| 103 // |html_element| can be null element, e.g. in |
| 104 // BrowserTest.WindowOpenClose. |
| 105 if (!html_element.isNull()) |
| 106 html_lang = html_element.getAttribute("lang").utf8(); |
98 std::string cld_language; | 107 std::string cld_language; |
99 bool is_cld_reliable; | 108 bool is_cld_reliable; |
100 std::string language = DeterminePageLanguage( | 109 std::string language = DeterminePageLanguage( |
101 content_language, contents, &cld_language, &is_cld_reliable); | 110 content_language, html_lang, contents, &cld_language, &is_cld_reliable); |
102 | 111 |
103 if (language.empty()) | 112 if (language.empty()) |
104 return; | 113 return; |
105 | 114 |
106 language_determined_time_ = base::TimeTicks::Now(); | 115 language_determined_time_ = base::TimeTicks::Now(); |
107 | 116 |
| 117 // TODO(toyoshim): Add |html_lang| to LanguageDetectionDetails. |
108 GURL url(document.url()); | 118 GURL url(document.url()); |
109 LanguageDetectionDetails details; | 119 LanguageDetectionDetails details; |
110 details.time = base::Time::Now(); | 120 details.time = base::Time::Now(); |
111 details.url = url; | 121 details.url = url; |
112 details.content_language = content_language; | 122 details.content_language = content_language; |
113 details.cld_language = cld_language; | 123 details.cld_language = cld_language; |
114 details.is_cld_reliable = is_cld_reliable; | 124 details.is_cld_reliable = is_cld_reliable; |
115 details.adopted_language = language; | 125 details.adopted_language = language; |
116 | 126 |
117 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( | 127 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( |
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
312 // Roughly check if the language code follows [a-z][a-z](-[A-Z][A-Z]). | 322 // Roughly check if the language code follows [a-z][a-z](-[A-Z][A-Z]). |
313 size_t dash_index = code->find('-'); | 323 size_t dash_index = code->find('-'); |
314 if (!(dash_index == 2 && code->size() == 5) && | 324 if (!(dash_index == 2 && code->size() == 5) && |
315 !(dash_index == std::string::npos && code->size() == 2)) { | 325 !(dash_index == std::string::npos && code->size() == 2)) { |
316 // Reset |language| to ignore the invalid code. | 326 // Reset |language| to ignore the invalid code. |
317 *code = std::string(); | 327 *code = std::string(); |
318 } | 328 } |
319 } | 329 } |
320 | 330 |
321 // static | 331 // static |
| 332 void TranslateHelper::ApplyLanguageCodeCorrection(std::string* code) { |
| 333 // Correct well-known format errors. |
| 334 CorrectLanguageCodeTypo(code); |
| 335 |
| 336 // Convert language code synonym firstly because sometime synonym code is in |
| 337 // invalid format, e.g. 'fil'. After validation, such a 3 characters language |
| 338 // gets converted to an empty string. |
| 339 ConvertLanguageCodeSynonym(code); |
| 340 ResetInvalidLanguageCode(code); |
| 341 } |
| 342 |
| 343 // static |
322 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, | 344 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, |
| 345 const std::string& html_lang, |
323 const string16& contents, | 346 const string16& contents, |
324 std::string* cld_language_p, | 347 std::string* cld_language_p, |
325 bool* is_cld_reliable_p) { | 348 bool* is_cld_reliable_p) { |
326 #if defined(ENABLE_LANGUAGE_DETECTION) | 349 #if defined(ENABLE_LANGUAGE_DETECTION) |
327 base::TimeTicks begin_time = base::TimeTicks::Now(); | 350 base::TimeTicks begin_time = base::TimeTicks::Now(); |
328 bool is_cld_reliable; | 351 bool is_cld_reliable; |
329 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); | 352 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); |
330 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, | 353 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, |
331 base::TimeTicks::Now()); | 354 base::TimeTicks::Now()); |
332 | 355 |
333 if (cld_language_p != NULL) | 356 if (cld_language_p != NULL) |
334 *cld_language_p = cld_language; | 357 *cld_language_p = cld_language; |
335 if (is_cld_reliable_p != NULL) | 358 if (is_cld_reliable_p != NULL) |
336 *is_cld_reliable_p = is_cld_reliable; | 359 *is_cld_reliable_p = is_cld_reliable; |
337 ConvertLanguageCodeSynonym(&cld_language); | 360 ConvertLanguageCodeSynonym(&cld_language); |
338 #endif // defined(ENABLE_LANGUAGE_DETECTION) | 361 #endif // defined(ENABLE_LANGUAGE_DETECTION) |
339 | 362 |
340 // Correct well-known format errors. | 363 // Check if html lang attribute is valid. |
341 std::string language = code; | 364 std::string modified_html_lang; |
342 CorrectLanguageCodeTypo(&language); | 365 if (!html_lang.empty()) { |
| 366 modified_html_lang = html_lang; |
| 367 ApplyLanguageCodeCorrection(&modified_html_lang); |
| 368 TranslateHelperMetrics::ReportHtmlLang(html_lang, modified_html_lang); |
| 369 VLOG(9) << "html lang based language code: " << modified_html_lang; |
| 370 } |
343 | 371 |
344 // Convert language code synonym firstly because sometime synonym code is in | 372 // Check if Content-Language is valid. |
345 // invalid format, e.g. 'fil'. After validation, such a 3 characters language | 373 std::string modified_code; |
346 // gets converted to an empty string. | 374 if (!code.empty()) { |
347 ConvertLanguageCodeSynonym(&language); | 375 modified_code = code; |
348 ResetInvalidLanguageCode(&language); | 376 ApplyLanguageCodeCorrection(&modified_code); |
| 377 TranslateHelperMetrics::ReportContentLanguage(code, modified_code); |
| 378 } |
349 | 379 |
350 TranslateHelperMetrics::ReportContentLanguage(code, language); | 380 // Adopt |modified_html_lang| if it is valid. Otherwise, adopt |
| 381 // |modified_code|. |
| 382 std::string language = modified_html_lang.empty() ? modified_code : |
| 383 modified_html_lang; |
351 | 384 |
352 #if defined(ENABLE_LANGUAGE_DETECTION) | 385 #if defined(ENABLE_LANGUAGE_DETECTION) |
353 // If |language| is empty, just use CLD result even though it might be | 386 // If |language| is empty, just use CLD result even though it might be |
354 // chrome::kUnknownLanguageCode. | 387 // chrome::kUnknownLanguageCode. |
355 if (language.empty()) { | 388 if (language.empty()) { |
356 TranslateHelperMetrics::ReportLanguageVerification( | 389 TranslateHelperMetrics::ReportLanguageVerification( |
357 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY); | 390 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY); |
358 return cld_language; | 391 return cld_language; |
359 } | 392 } |
360 | 393 |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
426 IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation, OnRevertTranslation) | 459 IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation, OnRevertTranslation) |
427 IPC_MESSAGE_UNHANDLED(handled = false) | 460 IPC_MESSAGE_UNHANDLED(handled = false) |
428 IPC_END_MESSAGE_MAP() | 461 IPC_END_MESSAGE_MAP() |
429 return handled; | 462 return handled; |
430 } | 463 } |
431 | 464 |
432 void TranslateHelper::OnTranslatePage(int page_id, | 465 void TranslateHelper::OnTranslatePage(int page_id, |
433 const std::string& translate_script, | 466 const std::string& translate_script, |
434 const std::string& source_lang, | 467 const std::string& source_lang, |
435 const std::string& target_lang) { | 468 const std::string& target_lang) { |
436 if (render_view()->GetPageId() != page_id) | 469 WebFrame* main_frame = GetMainFrame(); |
| 470 if (!main_frame || render_view()->GetPageId() != page_id) |
437 return; // We navigated away, nothing to do. | 471 return; // We navigated away, nothing to do. |
438 | 472 |
439 if (translation_pending_ && page_id == page_id_ && | 473 if (translation_pending_ && page_id == page_id_ && |
440 target_lang_ == target_lang) { | 474 target_lang_ == target_lang) { |
441 // A similar translation is already under way, nothing to do. | 475 // A similar translation is already under way, nothing to do. |
442 return; | 476 return; |
443 } | 477 } |
444 | 478 |
445 // Any pending translation is now irrelevant. | 479 // Any pending translation is now irrelevant. |
446 CancelPendingTranslation(); | 480 CancelPendingTranslation(); |
447 | 481 |
448 // Set our states. | 482 // Set our states. |
449 translation_pending_ = true; | 483 translation_pending_ = true; |
450 page_id_ = page_id; | 484 page_id_ = page_id; |
451 // If the source language is undetermined, we'll let the translate element | 485 // If the source language is undetermined, we'll let the translate element |
452 // detect it. | 486 // detect it. |
453 source_lang_ = (source_lang != chrome::kUnknownLanguageCode) ? | 487 source_lang_ = (source_lang != chrome::kUnknownLanguageCode) ? |
454 source_lang : kAutoDetectionLanguage; | 488 source_lang : kAutoDetectionLanguage; |
455 target_lang_ = target_lang; | 489 target_lang_ = target_lang; |
456 | 490 |
457 TranslateHelperMetrics::ReportUserActionDuration(language_determined_time_, | 491 TranslateHelperMetrics::ReportUserActionDuration(language_determined_time_, |
458 base::TimeTicks::Now()); | 492 base::TimeTicks::Now()); |
459 | 493 |
460 GURL url(GetMainFrame()->document().url()); | 494 GURL url(main_frame->document().url()); |
461 TranslateHelperMetrics::ReportPageScheme(url.scheme()); | 495 TranslateHelperMetrics::ReportPageScheme(url.scheme()); |
462 | 496 |
463 if (!IsTranslateLibAvailable()) { | 497 if (!IsTranslateLibAvailable()) { |
464 // Evaluate the script to add the translation related method to the global | 498 // Evaluate the script to add the translation related method to the global |
465 // context of the page. | 499 // context of the page. |
466 ExecuteScript(translate_script); | 500 ExecuteScript(translate_script); |
467 DCHECK(IsTranslateLibAvailable()); | 501 DCHECK(IsTranslateLibAvailable()); |
468 } | 502 } |
469 | 503 |
470 TranslatePageImpl(0); | 504 TranslatePageImpl(0); |
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
583 TranslateErrors::Type error) { | 617 TranslateErrors::Type error) { |
584 translation_pending_ = false; | 618 translation_pending_ = false; |
585 // Notify the browser there was an error. | 619 // Notify the browser there was an error. |
586 render_view()->Send(new ChromeViewHostMsg_PageTranslated( | 620 render_view()->Send(new ChromeViewHostMsg_PageTranslated( |
587 render_view()->GetRoutingID(), page_id_, source_lang_, | 621 render_view()->GetRoutingID(), page_id_, source_lang_, |
588 target_lang_, error)); | 622 target_lang_, error)); |
589 } | 623 } |
590 | 624 |
591 WebFrame* TranslateHelper::GetMainFrame() { | 625 WebFrame* TranslateHelper::GetMainFrame() { |
592 WebView* web_view = render_view()->GetWebView(); | 626 WebView* web_view = render_view()->GetWebView(); |
593 if (!web_view) { | 627 |
594 // When the WebView is going away, the render view should have called | 628 // When the tab is going to be closed, the web_view can be NULL. |
595 // CancelPendingTranslation() which should have stopped any pending work, so | 629 if (!web_view) |
596 // that case should not happen. | |
597 NOTREACHED(); | |
598 return NULL; | 630 return NULL; |
599 } | 631 |
600 return web_view->mainFrame(); | 632 return web_view->mainFrame(); |
601 } | 633 } |
OLD | NEW |