Commit 7fda0168 authored by Alexandre Frechette's avatar Alexandre Frechette Committed by Commit Bot

Unit test to cross check serialized data against complete data.

(1) Adds an optional test to check serialized data against complete data.
(2) Add script to move data from a single arbitrary folder to the correct locations in chromium.
(3) Update data to latest version.
(4) Fix serialized data to C script so that bytes to uint32 conversion is done manually.

Bug: 850947
Change-Id: I84d5a6e34a99eee60c973525f2c8358c583d3a87
Reviewed-on: https://chromium-review.googlesource.com/c/1383351Reviewed-by: default avataranthonyvd <anthonyvd@chromium.org>
Commit-Queue: Alexandre Frechette <frechette@chromium.org>
Cr-Commit-Position: refs/heads/master@{#622095}
parent 561141eb
...@@ -171,6 +171,7 @@ vs-chromium-project.txt ...@@ -171,6 +171,7 @@ vs-chromium-project.txt
/components/resources/default_100_percent/google_chrome /components/resources/default_100_percent/google_chrome
/components/resources/default_200_percent/google_chrome /components/resources/default_200_percent/google_chrome
/components/search_engines/prepopulated_engines.xml /components/search_engines/prepopulated_engines.xml
/components/test/data/language/
/components/suggestions.xml /components/suggestions.xml
/components/variations.xml /components/variations.xml
/components/zucchini/testdata/*.exe /components/zucchini/testdata/*.exe
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
# found in the LICENSE file. # found in the LICENSE file.
import("//build/config/compiler/compiler.gni") import("//build/config/compiler/compiler.gni")
import("//testing/test.gni")
action("ulp_serialized_to_static_c") { action("ulp_serialized_to_static_c") {
script = "ulp_serialized_to_static_c.py" script = "ulp_serialized_to_static_c.py"
...@@ -64,3 +65,20 @@ source_set("unit_tests") { ...@@ -64,3 +65,20 @@ source_set("unit_tests") {
"//third_party/s2cellid", "//third_party/s2cellid",
] ]
} }
test("data_tests") {
sources = [
"//components/test/run_all_unittests.cc",
"ulp_language_code_locator_datatest.cc",
]
deps = [
":s2langquadtree",
":ulp_language_code_locator",
"//base",
"//base/test:test_support",
"//components/test:test_support",
"//testing/gmock",
"//testing/gtest",
"//third_party/s2cellid",
]
}
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <map>
#include <string>
#include <vector>
#include "base/files/file_util.h"
#include "base/logging.h"
#include "base/path_service.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "components/language/content/browser/ulp_language_code_locator/s2langquadtree.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/s2cellid/src/s2/s2cellid.h"
#include "third_party/s2cellid/src/s2/s2latlng.h"
namespace language {
namespace {
#include "components/language/content/browser/ulp_language_code_locator/ulp_language_code_locator_helper.h"
} // namespace
const std::map<S2LatLng, std::string> GetData() {
std::map<S2LatLng, std::string> latlng_to_lang;
std::string data;
base::FilePath source_dir;
CHECK(base::PathService::Get(base::DIR_SOURCE_ROOT, &source_dir));
base::FilePath data_dir =
source_dir.AppendASCII("components/test/data/language/");
base::FilePath data_filepath =
data_dir.AppendASCII("celltolang-data_rank0.csv");
if (!base::ReadFileToString(data_filepath, &data))
LOG(FATAL) << "Could not read data from `" << data_filepath << "`.";
std::vector<std::string> lines = base::SplitString(
data, "\n", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
for (size_t i = 0; i < lines.size(); ++i) {
// TODO(frechette) Remove once we ensured no empty line in data file.
if (lines[i].empty())
continue;
std::vector<std::string> fields = base::SplitString(
lines[i], ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
CHECK_EQ(3u, fields.size());
double lat, lng;
CHECK(base::StringToDouble(fields[0], &lat));
CHECK(base::StringToDouble(fields[1], &lng));
latlng_to_lang[S2LatLng::FromDegrees(lat, lng)] = fields[2];
}
return latlng_to_lang;
}
TEST(UlpLanguageCodeLocatorDataTest, TreeContainsData) {
const S2LangQuadTreeNode root =
S2LangQuadTreeNode::Deserialize(GetLanguages(), GetTreeSerialized());
const std::map<S2LatLng, std::string> data = GetData();
for (const auto& latlng_lang : data) {
S2CellId cell(latlng_lang.first);
EXPECT_EQ(latlng_lang.second, root.Get(cell));
}
}
} // namespace language
...@@ -5,11 +5,8 @@ ...@@ -5,11 +5,8 @@
"""Generate c++ structure containing serialized ULP language quad tree""" """Generate c++ structure containing serialized ULP language quad tree"""
import argparse import argparse
import csv
import os.path import os.path
import string
import sys import sys
import array
sys.path.insert(1, sys.path.insert(1,
os.path.join(os.path.dirname(__file__), os.path.join(os.path.dirname(__file__),
...@@ -24,15 +21,21 @@ import jinja2 # pylint: disable=F0401 ...@@ -24,15 +21,21 @@ import jinja2 # pylint: disable=F0401
def ReadSerializedData(input_path): def ReadSerializedData(input_path):
"""Read serialized ULP language quad tree""" """Read serialized ULP language quad tree"""
with open(input_path) as input_file:
with open(input_path, 'rb') as input_file:
data = input_file.read() data = input_file.read()
linebreak = data.index('\n') linebreak = data.index('\n')
# First line is comma-separated list of languages. # First line is comma-separated list of languages.
language_codes = data[:linebreak].strip().split(',') language_codes = data[:linebreak].strip().split(',')
# Rest of the file is the serialized tree. We read the bits as 32 bits, # Rest of the file is the serialized tree.
# unsigned int words. tree_bytes = data[linebreak+1:]
tree_serialized = array.array('I', data[linebreak+1:]) # We group the bytes in the string into 32 bits integers.
assert tree_serialized.itemsize == 4, "Items must be 4 bytes ints." tree_serialized = [
sum((ord(tree_bytes[i+b]) << (8*b)) if i+b < len(tree_bytes) else 0
for b in xrange(4))
for i in xrange(0, len(tree_bytes), 4)
]
return tree_serialized, language_codes return tree_serialized, language_codes
......
#!/bin/bash
# Copyright 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# This script copies ULP language code locator data from a single folder
# to the right location in the Chromium source.
DIR=$1
if [ ! -d $DIR ]; then
echo "First argument ${DIR} is not an existing directory."
return
fi
if [ ! -d components/test/data/language ]; then
echo "Making components/test/data/language"
mkdir components/test/data/language
fi
for i in `seq 0 2`;
do
cp ${DIR}/geolanguage-data_rank$i.txt \
components/language/content/browser/ulp_language_code_locator/
cp ${DIR}/celltolang-data_rank$i.csv components/test/data/language/
done
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment