Commit 25689773 authored by bcwhite@chromium.org's avatar bcwhite@chromium.org

Get the document language directly from WebKit rather than trying to figure

it out ourselves.  Since WebKit is already case-insensitive, this fixes
test-case #3.  Test-Case #1 requires WebKit support for extracting the
language from the HTTP headers (https://bugs.webkit.org/show_bug.cgi?id=97929).

Note that this patch will fail completely until a WebKit patch to export
this information is accepted (https://bugs.webkit.org/show_bug.cgi?id=98066).

BUG=145689


Review URL: https://chromiumcodereview.appspot.com/11052002

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@162212 0039d316-1c4b-4281-b951-d872f2087c98
parent dddc4a07
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "base/logging.h" #include "base/logging.h"
#include "base/message_loop.h" #include "base/message_loop.h"
#include "base/metrics/histogram.h" #include "base/metrics/histogram.h"
#include "base/string16.h"
#include "base/utf_string_conversions.h" #include "base/utf_string_conversions.h"
#include "chrome/common/chrome_constants.h" #include "chrome/common/chrome_constants.h"
#include "chrome/common/render_messages.h" #include "chrome/common/render_messages.h"
...@@ -60,9 +61,25 @@ TranslateHelper::~TranslateHelper() { ...@@ -60,9 +61,25 @@ TranslateHelper::~TranslateHelper() {
void TranslateHelper::PageCaptured(const string16& contents) { void TranslateHelper::PageCaptured(const string16& contents) {
WebDocument document = render_view()->GetWebView()->mainFrame()->document(); WebDocument document = render_view()->GetWebView()->mainFrame()->document();
// If the page explicitly specifies a language, use it, otherwise we'll
// determine it based on the text content using the CLD. // Get the document language as set by WebKit from the http-equiv
std::string language = GetPageLanguageFromMetaTag(&document); // meta tag for "content-language". This may or may not also
// have a value derived from the actual Content-Language HTTP
// header. The two actually have different meanings (despite the
// original intent of http-equiv to be an equivalent) with the former
// being the language of the document and the latter being the
// language of the intended audience (a distinction really only
// relevant for things like langauge textbooks). This distinction
// shouldn't affect translation.
std::string language = document.contentLanguage().utf8();
size_t coma_index = language.find(',');
if (coma_index != std::string::npos) {
// There are more than 1 language specified, just keep the first one.
language = language.substr(0, coma_index);
}
TrimWhitespaceASCII(language, TRIM_ALL, &language);
language = StringToLowerASCII(language);
if (language.empty()) { if (language.empty()) {
base::TimeTicks begin_time = base::TimeTicks::Now(); base::TimeTicks begin_time = base::TimeTicks::Now();
language = DetermineTextLanguage(contents); language = DetermineTextLanguage(contents);
...@@ -104,40 +121,6 @@ bool TranslateHelper::IsPageTranslatable(WebDocument* document) { ...@@ -104,40 +121,6 @@ bool TranslateHelper::IsPageTranslatable(WebDocument* document) {
return true; return true;
} }
// static
std::string TranslateHelper::GetPageLanguageFromMetaTag(WebDocument* document) {
// The META language tag looks like:
// <meta http-equiv="content-language" content="en">
// It can contain more than one language:
// <meta http-equiv="content-language" content="en, fr">
std::vector<WebElement> meta_elements;
webkit_glue::GetMetaElementsWithAttribute(document,
ASCIIToUTF16("http-equiv"),
ASCIIToUTF16("content-language"),
&meta_elements);
if (meta_elements.empty())
return std::string();
// We don't expect more than one such tag. If there are several, just use the
// first one.
WebString attribute = meta_elements[0].getAttribute("content");
if (attribute.isEmpty())
return std::string();
// The value is supposed to be ASCII.
if (!IsStringASCII(attribute))
return std::string();
std::string language = StringToLowerASCII(UTF16ToASCII(attribute));
size_t coma_index = language.find(',');
if (coma_index != std::string::npos) {
// There are more than 1 language specified, just keep the first one.
language = language.substr(0, coma_index);
}
TrimWhitespaceASCII(language, TRIM_ALL, &language);
return language;
}
// static // static
std::string TranslateHelper::DetermineTextLanguage(const string16& text) { std::string TranslateHelper::DetermineTextLanguage(const string16& text) {
std::string language = chrome::kUnknownLanguageCode; std::string language = chrome::kUnknownLanguageCode;
......
...@@ -70,13 +70,6 @@ class TranslateHelper : public content::RenderViewObserver { ...@@ -70,13 +70,6 @@ class TranslateHelper : public content::RenderViewObserver {
// should not be translated. // should not be translated.
static bool IsPageTranslatable(WebKit::WebDocument* document); static bool IsPageTranslatable(WebKit::WebDocument* document);
// Returns the language specified in the language meta tag of |document|, or
// an empty string if no such tag was found.
// The tag may specify several languages, the first one is returned.
// Example of such meta-tag:
// <meta http-equiv="content-language" content="en, fr">
static std::string GetPageLanguageFromMetaTag(WebKit::WebDocument* document);
// Returns the ISO 639_1 language code of the specified |text|, or 'unknown' // Returns the ISO 639_1 language code of the specified |text|, or 'unknown'
// if it failed. // if it failed.
static std::string DetermineTextLanguage(const string16& text); static std::string DetermineTextLanguage(const string16& text);
......
...@@ -367,6 +367,35 @@ TEST_F(ChromeRenderViewTest, LanguageMetaTag) { ...@@ -367,6 +367,35 @@ TEST_F(ChromeRenderViewTest, LanguageMetaTag) {
EXPECT_EQ("fr", params.a); EXPECT_EQ("fr", params.a);
} }
// Tests that the language meta tag works even with non-all-lower-case.
// http://code.google.com/p/chromium/issues/detail?id=145689
TEST_F(ChromeRenderViewTest, LanguageMetaTagCase) {
// Suppress the normal delay that occurs when the page is loaded before which
// the renderer sends the page contents to the browser.
SendContentStateImmediately();
LoadHTML("<html><head><meta http-equiv=\"Content-Language\" content=\"es\">"
"</head><body>A random page with random content.</body></html>");
const IPC::Message* message = render_thread_->sink().GetUniqueMessageMatching(
ChromeViewHostMsg_TranslateLanguageDetermined::ID);
ASSERT_NE(static_cast<IPC::Message*>(NULL), message);
ChromeViewHostMsg_TranslateLanguageDetermined::Param params;
ChromeViewHostMsg_TranslateLanguageDetermined::Read(message, &params);
EXPECT_EQ("es", params.a);
render_thread_->sink().ClearMessages();
// Makes sure we support multiple languages specified.
LoadHTML("<html><head><meta http-equiv=\"Content-Language\" "
"content=\" fr , es,en \">"
"</head><body>A random page with random content.</body></html>");
message = render_thread_->sink().GetUniqueMessageMatching(
ChromeViewHostMsg_TranslateLanguageDetermined::ID);
ASSERT_NE(static_cast<IPC::Message*>(NULL), message);
ChromeViewHostMsg_TranslateLanguageDetermined::Read(message, &params);
EXPECT_EQ("fr", params.a);
}
// Tests that a back navigation gets a translate language message. // Tests that a back navigation gets a translate language message.
TEST_F(ChromeRenderViewTest, BackToTranslatablePage) { TEST_F(ChromeRenderViewTest, BackToTranslatablePage) {
SendContentStateImmediately(); SendContentStateImmediately();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment