Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(170)

Side by Side Diff: chrome/renderer/translate/translate_helper.cc

Issue 15728002: Translate: adopt html lang attribute if valid value is provided (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: fix WindowOpenClose crash Created 7 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/translate/translate_helper.h" 5 #include "chrome/renderer/translate/translate_helper.h"
6 6
7 #include "base/bind.h" 7 #include "base/bind.h"
8 #include "base/compiler_specific.h" 8 #include "base/compiler_specific.h"
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/message_loop.h" 10 #include "base/message_loop.h"
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
86 void TranslateHelper::PageCaptured(const string16& contents) { 86 void TranslateHelper::PageCaptured(const string16& contents) {
87 // Get the document language as set by WebKit from the http-equiv 87 // Get the document language as set by WebKit from the http-equiv
88 // meta tag for "content-language". This may or may not also 88 // meta tag for "content-language". This may or may not also
89 // have a value derived from the actual Content-Language HTTP 89 // have a value derived from the actual Content-Language HTTP
90 // header. The two actually have different meanings (despite the 90 // header. The two actually have different meanings (despite the
91 // original intent of http-equiv to be an equivalent) with the former 91 // original intent of http-equiv to be an equivalent) with the former
92 // being the language of the document and the latter being the 92 // being the language of the document and the latter being the
93 // language of the intended audience (a distinction really only 93 // language of the intended audience (a distinction really only
94 // relevant for things like langauge textbooks). This distinction 94 // relevant for things like langauge textbooks). This distinction
95 // shouldn't affect translation. 95 // shouldn't affect translation.
96 WebDocument document = GetMainFrame()->document(); 96 WebFrame* main_frame = GetMainFrame();
97 if (!main_frame)
98 return;
99 WebDocument document = main_frame->document();
97 std::string content_language = document.contentLanguage().utf8(); 100 std::string content_language = document.contentLanguage().utf8();
101 WebElement html_element = document.documentElement();
102 std::string html_lang;
103 // |html_element| can be null element, e.g. in
104 // BrowserTest.WindowOpenClose.
105 if (!html_element.isNull())
106 html_lang = html_element.getAttribute("lang").utf8();
98 std::string cld_language; 107 std::string cld_language;
99 bool is_cld_reliable; 108 bool is_cld_reliable;
100 std::string language = DeterminePageLanguage( 109 std::string language = DeterminePageLanguage(
101 content_language, contents, &cld_language, &is_cld_reliable); 110 content_language, html_lang, contents, &cld_language, &is_cld_reliable);
102 111
103 if (language.empty()) 112 if (language.empty())
104 return; 113 return;
105 114
106 language_determined_time_ = base::TimeTicks::Now(); 115 language_determined_time_ = base::TimeTicks::Now();
107 116
117 // TODO(toyoshim): Add |html_lang| to LanguageDetectionDetails.
108 GURL url(document.url()); 118 GURL url(document.url());
109 LanguageDetectionDetails details; 119 LanguageDetectionDetails details;
110 details.time = base::Time::Now(); 120 details.time = base::Time::Now();
111 details.url = url; 121 details.url = url;
112 details.content_language = content_language; 122 details.content_language = content_language;
113 details.cld_language = cld_language; 123 details.cld_language = cld_language;
114 details.is_cld_reliable = is_cld_reliable; 124 details.is_cld_reliable = is_cld_reliable;
115 details.adopted_language = language; 125 details.adopted_language = language;
116 126
117 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( 127 Send(new ChromeViewHostMsg_TranslateLanguageDetermined(
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after
312 // Roughly check if the language code follows [a-z][a-z](-[A-Z][A-Z]). 322 // Roughly check if the language code follows [a-z][a-z](-[A-Z][A-Z]).
313 size_t dash_index = code->find('-'); 323 size_t dash_index = code->find('-');
314 if (!(dash_index == 2 && code->size() == 5) && 324 if (!(dash_index == 2 && code->size() == 5) &&
315 !(dash_index == std::string::npos && code->size() == 2)) { 325 !(dash_index == std::string::npos && code->size() == 2)) {
316 // Reset |language| to ignore the invalid code. 326 // Reset |language| to ignore the invalid code.
317 *code = std::string(); 327 *code = std::string();
318 } 328 }
319 } 329 }
320 330
321 // static 331 // static
332 void TranslateHelper::ApplyLanguageCodeCorrection(std::string* code) {
333 // Correct well-known format errors.
334 CorrectLanguageCodeTypo(code);
335
336 // Convert language code synonym firstly because sometime synonym code is in
337 // invalid format, e.g. 'fil'. After validation, such a 3 characters language
338 // gets converted to an empty string.
339 ConvertLanguageCodeSynonym(code);
340 ResetInvalidLanguageCode(code);
341 }
342
343 // static
322 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, 344 std::string TranslateHelper::DeterminePageLanguage(const std::string& code,
345 const std::string& html_lang,
323 const string16& contents, 346 const string16& contents,
324 std::string* cld_language_p, 347 std::string* cld_language_p,
325 bool* is_cld_reliable_p) { 348 bool* is_cld_reliable_p) {
326 #if defined(ENABLE_LANGUAGE_DETECTION) 349 #if defined(ENABLE_LANGUAGE_DETECTION)
327 base::TimeTicks begin_time = base::TimeTicks::Now(); 350 base::TimeTicks begin_time = base::TimeTicks::Now();
328 bool is_cld_reliable; 351 bool is_cld_reliable;
329 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); 352 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable);
330 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, 353 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time,
331 base::TimeTicks::Now()); 354 base::TimeTicks::Now());
332 355
333 if (cld_language_p != NULL) 356 if (cld_language_p != NULL)
334 *cld_language_p = cld_language; 357 *cld_language_p = cld_language;
335 if (is_cld_reliable_p != NULL) 358 if (is_cld_reliable_p != NULL)
336 *is_cld_reliable_p = is_cld_reliable; 359 *is_cld_reliable_p = is_cld_reliable;
337 ConvertLanguageCodeSynonym(&cld_language); 360 ConvertLanguageCodeSynonym(&cld_language);
338 #endif // defined(ENABLE_LANGUAGE_DETECTION) 361 #endif // defined(ENABLE_LANGUAGE_DETECTION)
339 362
340 // Correct well-known format errors. 363 // Check if html lang attribute is valid.
341 std::string language = code; 364 std::string modified_html_lang;
342 CorrectLanguageCodeTypo(&language); 365 if (!html_lang.empty()) {
366 modified_html_lang = html_lang;
367 ApplyLanguageCodeCorrection(&modified_html_lang);
368 TranslateHelperMetrics::ReportHtmlLang(html_lang, modified_html_lang);
369 VLOG(9) << "html lang based language code: " << modified_html_lang;
370 }
343 371
344 // Convert language code synonym firstly because sometime synonym code is in 372 // Check if Content-Language is valid.
345 // invalid format, e.g. 'fil'. After validation, such a 3 characters language 373 std::string modified_code;
346 // gets converted to an empty string. 374 if (!code.empty()) {
347 ConvertLanguageCodeSynonym(&language); 375 modified_code = code;
348 ResetInvalidLanguageCode(&language); 376 ApplyLanguageCodeCorrection(&modified_code);
377 TranslateHelperMetrics::ReportContentLanguage(code, modified_code);
378 }
349 379
350 TranslateHelperMetrics::ReportContentLanguage(code, language); 380 // Adopt |modified_html_lang| if it is valid. Otherwise, adopt
381 // |modified_code|.
382 std::string language = modified_html_lang.empty() ? modified_code :
383 modified_html_lang;
351 384
352 #if defined(ENABLE_LANGUAGE_DETECTION) 385 #if defined(ENABLE_LANGUAGE_DETECTION)
353 // If |language| is empty, just use CLD result even though it might be 386 // If |language| is empty, just use CLD result even though it might be
354 // chrome::kUnknownLanguageCode. 387 // chrome::kUnknownLanguageCode.
355 if (language.empty()) { 388 if (language.empty()) {
356 TranslateHelperMetrics::ReportLanguageVerification( 389 TranslateHelperMetrics::ReportLanguageVerification(
357 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY); 390 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY);
358 return cld_language; 391 return cld_language;
359 } 392 }
360 393
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
426 IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation, OnRevertTranslation) 459 IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation, OnRevertTranslation)
427 IPC_MESSAGE_UNHANDLED(handled = false) 460 IPC_MESSAGE_UNHANDLED(handled = false)
428 IPC_END_MESSAGE_MAP() 461 IPC_END_MESSAGE_MAP()
429 return handled; 462 return handled;
430 } 463 }
431 464
432 void TranslateHelper::OnTranslatePage(int page_id, 465 void TranslateHelper::OnTranslatePage(int page_id,
433 const std::string& translate_script, 466 const std::string& translate_script,
434 const std::string& source_lang, 467 const std::string& source_lang,
435 const std::string& target_lang) { 468 const std::string& target_lang) {
436 if (render_view()->GetPageId() != page_id) 469 WebFrame* main_frame = GetMainFrame();
470 if (!main_frame || render_view()->GetPageId() != page_id)
437 return; // We navigated away, nothing to do. 471 return; // We navigated away, nothing to do.
438 472
439 if (translation_pending_ && page_id == page_id_ && 473 if (translation_pending_ && page_id == page_id_ &&
440 target_lang_ == target_lang) { 474 target_lang_ == target_lang) {
441 // A similar translation is already under way, nothing to do. 475 // A similar translation is already under way, nothing to do.
442 return; 476 return;
443 } 477 }
444 478
445 // Any pending translation is now irrelevant. 479 // Any pending translation is now irrelevant.
446 CancelPendingTranslation(); 480 CancelPendingTranslation();
447 481
448 // Set our states. 482 // Set our states.
449 translation_pending_ = true; 483 translation_pending_ = true;
450 page_id_ = page_id; 484 page_id_ = page_id;
451 // If the source language is undetermined, we'll let the translate element 485 // If the source language is undetermined, we'll let the translate element
452 // detect it. 486 // detect it.
453 source_lang_ = (source_lang != chrome::kUnknownLanguageCode) ? 487 source_lang_ = (source_lang != chrome::kUnknownLanguageCode) ?
454 source_lang : kAutoDetectionLanguage; 488 source_lang : kAutoDetectionLanguage;
455 target_lang_ = target_lang; 489 target_lang_ = target_lang;
456 490
457 TranslateHelperMetrics::ReportUserActionDuration(language_determined_time_, 491 TranslateHelperMetrics::ReportUserActionDuration(language_determined_time_,
458 base::TimeTicks::Now()); 492 base::TimeTicks::Now());
459 493
460 GURL url(GetMainFrame()->document().url()); 494 GURL url(main_frame->document().url());
461 TranslateHelperMetrics::ReportPageScheme(url.scheme()); 495 TranslateHelperMetrics::ReportPageScheme(url.scheme());
462 496
463 if (!IsTranslateLibAvailable()) { 497 if (!IsTranslateLibAvailable()) {
464 // Evaluate the script to add the translation related method to the global 498 // Evaluate the script to add the translation related method to the global
465 // context of the page. 499 // context of the page.
466 ExecuteScript(translate_script); 500 ExecuteScript(translate_script);
467 DCHECK(IsTranslateLibAvailable()); 501 DCHECK(IsTranslateLibAvailable());
468 } 502 }
469 503
470 TranslatePageImpl(0); 504 TranslatePageImpl(0);
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
583 TranslateErrors::Type error) { 617 TranslateErrors::Type error) {
584 translation_pending_ = false; 618 translation_pending_ = false;
585 // Notify the browser there was an error. 619 // Notify the browser there was an error.
586 render_view()->Send(new ChromeViewHostMsg_PageTranslated( 620 render_view()->Send(new ChromeViewHostMsg_PageTranslated(
587 render_view()->GetRoutingID(), page_id_, source_lang_, 621 render_view()->GetRoutingID(), page_id_, source_lang_,
588 target_lang_, error)); 622 target_lang_, error));
589 } 623 }
590 624
591 WebFrame* TranslateHelper::GetMainFrame() { 625 WebFrame* TranslateHelper::GetMainFrame() {
592 WebView* web_view = render_view()->GetWebView(); 626 WebView* web_view = render_view()->GetWebView();
593 if (!web_view) { 627
594 // When the WebView is going away, the render view should have called 628 // When the tab is going to be closed, the web_view can be NULL.
595 // CancelPendingTranslation() which should have stopped any pending work, so 629 if (!web_view)
596 // that case should not happen.
597 NOTREACHED();
598 return NULL; 630 return NULL;
599 } 631
600 return web_view->mainFrame(); 632 return web_view->mainFrame();
601 } 633 }
OLDNEW
« no previous file with comments | « chrome/renderer/translate/translate_helper.h ('k') | chrome/renderer/translate/translate_helper_metrics.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698