Commit 77d62249 authored by Chris Hall's avatar Chris Hall Committed by Commit Bot

Language detection: code style cleanup.

 - specifying types of constants.
 - clarifying comment references to functions and variables.
 - adding `using` aliases to shorten long library type names.
 - adding explicit type for value returned from library.

These changes came out of suggestions from thestig@ on crrev.com/c/1949807

R=thestig, akihiroota, dmazzoni

Change-Id: I25fc99843c27f7a41403f15e3e82d688fd5b44fe
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2096417
Commit-Queue: Chris Hall <chrishall@chromium.org>
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: default avatarLei Zhang <thestig@chromium.org>
Reviewed-by: default avatarAkihiro Ota <akihiroota@chromium.org>
Reviewed-by: default avatarDominic Mazzoni <dmazzoni@chromium.org>
Cr-Commit-Position: refs/heads/master@{#749064}
parent d025469c
...@@ -21,19 +21,22 @@ namespace ui { ...@@ -21,19 +21,22 @@ namespace ui {
namespace { namespace {
// This is the maximum number of languages we assign per page, so only the top // This is the maximum number of languages we assign per page, so only the top
// 3 languages on the top will be assigned to any node. // 3 languages on the top will be assigned to any node.
const auto kMaxDetectedLanguagesPerPage = 3; const int kMaxDetectedLanguagesPerPage = 3;
// This is the maximum number of languages that cld3 will detect for each // This is the maximum number of languages that cld3 will detect for each
// input we give it, 3 was recommended to us by the ML team as a good // input we give it, 3 was recommended to us by the ML team as a good
// starting point. // starting point.
const auto kMaxDetectedLanguagesPerSpan = 3; const int kMaxDetectedLanguagesPerSpan = 3;
const auto kShortTextIdentifierMinByteLength = 1; const int kShortTextIdentifierMinByteLength = 1;
// TODO(https://bugs.chromium.org/p/chromium/issues/detail?id=971360): // TODO(https://crbug.com/971360): Determine appropriate value for
// Determine appropriate value for kShortTextIdentifierMaxByteLength. // |kShortTextIdentifierMaxByteLength|.
const auto kShortTextIdentifierMaxByteLength = 1000; const int kShortTextIdentifierMaxByteLength = 1000;
} // namespace } // namespace
using Result = chrome_lang_id::NNetLanguageIdentifier::Result;
using SpanInfo = chrome_lang_id::NNetLanguageIdentifier::SpanInfo;
AXLanguageInfo::AXLanguageInfo() = default; AXLanguageInfo::AXLanguageInfo() = default;
AXLanguageInfo::~AXLanguageInfo() = default; AXLanguageInfo::~AXLanguageInfo() = default;
...@@ -54,7 +57,7 @@ void AXLanguageInfoStats::Add(const std::vector<std::string>& languages) { ...@@ -54,7 +57,7 @@ void AXLanguageInfoStats::Add(const std::vector<std::string>& languages) {
// Assign languages with higher probability a higher score. // Assign languages with higher probability a higher score.
// TODO(chrishall): consider more complex scoring // TODO(chrishall): consider more complex scoring
size_t score = kMaxDetectedLanguagesPerSpan; unsigned int score = kMaxDetectedLanguagesPerSpan;
for (const auto& lang : languages) { for (const auto& lang : languages) {
lang_counts_[lang] += score; lang_counts_[lang] += score;
...@@ -260,17 +263,18 @@ void AXLanguageDetectionManager::DetectLanguagesForNode(AXNode* node) { ...@@ -260,17 +263,18 @@ void AXLanguageDetectionManager::DetectLanguagesForNode(AXNode* node) {
// concatenation and bubbling up results. // concatenation and bubbling up results.
auto text = node->GetStringAttribute(ax::mojom::StringAttribute::kName); auto text = node->GetStringAttribute(ax::mojom::StringAttribute::kName);
// FindTopNMostFreqLangs will pad the results with // FindTopNMostFreqLangs() will pad the results with
// NNetLanguageIdentifier::kUnknown in order to reach the requested number // |NNetLanguageIdentifier::kUnknown| in order to reach the requested number
// of languages, this means we cannot rely on the results' length and we // of languages, this means we cannot rely on the results' length and we
// have to filter the results. // have to filter the results.
const auto results = language_identifier_.FindTopNMostFreqLangs( const std::vector<Result> results =
text, kMaxDetectedLanguagesPerSpan); language_identifier_.FindTopNMostFreqLangs(text,
kMaxDetectedLanguagesPerSpan);
std::vector<std::string> reliable_results; std::vector<std::string> reliable_results;
for (const auto& res : results) { for (const auto& res : results) {
// The output of FindTopNMostFreqLangs is already sorted by byte count, // The output of FindTopNMostFreqLangs() is already sorted by byte count,
// this seems good enough for now. // this seems good enough for now.
// Only consider results which are 'reliable', this will also remove // Only consider results which are 'reliable', this will also remove
// 'unknown'. // 'unknown'.
...@@ -405,13 +409,12 @@ AXLanguageDetectionManager::GetLanguageAnnotationForStringAttribute( ...@@ -405,13 +409,12 @@ AXLanguageDetectionManager::GetLanguageAnnotationForStringAttribute(
// Calculate top 3 languages. // Calculate top 3 languages.
// TODO(akihiroota): What's a reasonable number of languages to have // TODO(akihiroota): What's a reasonable number of languages to have
// cld_3 find? Should vary. // cld_3 find? Should vary.
std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> top_languages = std::vector<Result> top_languages =
short_text_language_identifier_.FindTopNMostFreqLangs( short_text_language_identifier_.FindTopNMostFreqLangs(
attr_value, kMaxDetectedLanguagesPerPage); attr_value, kMaxDetectedLanguagesPerPage);
// Create vector of AXLanguageSpans. // Create vector of AXLanguageSpans.
for (const auto& result : top_languages) { for (const auto& result : top_languages) {
std::vector<chrome_lang_id::NNetLanguageIdentifier::SpanInfo> ranges = std::vector<SpanInfo> ranges = result.byte_ranges;
result.byte_ranges;
for (const auto& span_info : ranges) { for (const auto& span_info : ranges) {
language_annotation.push_back( language_annotation.push_back(
AXLanguageSpan{span_info.start_index, span_info.end_index, AXLanguageSpan{span_info.start_index, span_info.end_index,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment