Commit 28cff5e2 authored by Sam Bowen's avatar Sam Bowen Committed by Commit Bot

Add validator for schema org entities and types in doc metadata mojo.

* Add a validator that removes properties that are not the right type.
* Add double, time, and timedelta types in the document metadata mojo
  structure.
* Use the new mojo types in document metadata extractor.

Downstream clank will not be able to read fields parsed into the new
type, but will just skip over them. We should consider updating support
there.
http://cs/clank/java/src/com/google/android/apps/chrome/icing/AppIndexingReporterInternal.java?l=133

Bug: 1044250
Change-Id: Icfd7f0db74abf7bd1261b30dc416c94a7a23b4c0
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2062907Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarBecca Hughes <beccahughes@chromium.org>
Commit-Queue: Sam Bowen <sgbowen@google.com>
Cr-Commit-Position: refs/heads/master@{#748357}
parent 57d82214
......@@ -7,11 +7,13 @@ source_set("unit_tests") {
sources = [
"extractor_unittest.cc",
"generate_schema_org_code_unittest.cc",
"validator_unittest.cc",
]
deps = [
":extractor",
":generate_schema_org_code",
":schema_org",
":schema_org_properties",
"//base",
"//components/schema_org/common:mojom",
......@@ -67,6 +69,20 @@ static_library("extractor") {
"extractor.h",
]
deps = [
"//components/schema_org:generate_schema_org_code",
"//components/schema_org:schema_org",
"//components/schema_org:schema_org_properties",
"//components/schema_org/common:mojom",
]
}
static_library("schema_org") {
sources = [
"validator.cc",
"validator.h",
]
deps = [
"//components/schema_org:generate_schema_org_code",
"//components/schema_org:schema_org_properties",
......
......@@ -8,5 +8,8 @@ mojom("mojom") {
generate_java = true
sources = [ "metadata.mojom" ]
public_deps = [ "//url/mojom:url_mojom_gurl" ]
public_deps = [
"//mojo/public/mojom/base",
"//url/mojom:url_mojom_gurl",
]
}
......@@ -4,12 +4,17 @@
module schema_org.mojom;
import "mojo/public/mojom/base/time.mojom";
// Due to the restriction of AppIndexing, all elements should be of the
// same type. Non-array values are converted to arrays of one element.
union Values {
array<bool> bool_values;
array<int64> long_values;
array<string> string_values;
array<double> double_values;
array<mojo_base.mojom.Time> date_time_values;
array<mojo_base.mojom.TimeDelta> time_values;
array<Entity> entity_values;
};
......
......@@ -15,6 +15,8 @@
#include "base/values.h"
#include "components/schema_org/common/metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/validator.h"
namespace schema_org {
......@@ -45,11 +47,111 @@ bool IsSupportedType(const std::string& type) {
return kSupportedTypes.find(type) != kSupportedTypes.end();
}
void ExtractEntity(base::DictionaryValue*, mojom::Entity&, int recursionLevel);
void ExtractEntity(base::DictionaryValue*, mojom::Entity&, int recursion_level);
// Initializes a vector of the appropriate type for the property.
bool InitializeStringValue(const std::string& property_type,
mojom::Values* values) {
schema_org::property::PropertyConfiguration prop_config =
schema_org::property::GetPropertyConfiguration(property_type);
if (prop_config.text) {
values->set_string_values(std::vector<std::string>());
} else if (prop_config.number) {
values->set_double_values(std::vector<double>());
} else if (prop_config.date_time || prop_config.date) {
values->set_date_time_values(std::vector<base::Time>());
} else if (prop_config.time) {
values->set_time_values(std::vector<base::TimeDelta>());
} else {
return false;
}
return true;
}
// Parses a string into a property value. The string may be parsed as a double,
// date, or time, depending on the types that the property supports. If the
// property supports text, uses the string itself.
bool ParseStringValue(const std::string& property_type,
base::StringPiece value,
mojom::Values* values) {
value = value.substr(0, kMaxStringLength);
schema_org::property::PropertyConfiguration prop_config =
schema_org::property::GetPropertyConfiguration(property_type);
if (prop_config.text) {
values->get_string_values().push_back(value.as_string());
return true;
}
if (prop_config.number) {
double d;
bool parsed_double = base::StringToDouble(value, &d);
if (parsed_double) {
values->get_double_values().push_back(d);
return true;
}
}
if (prop_config.date_time || prop_config.date) {
base::Time time;
bool parsed_time = base::Time::FromString(value.data(), &time);
if (parsed_time) {
values->get_date_time_values().push_back(time);
return true;
}
}
if (prop_config.time) {
base::Time time_of_day;
base::Time start_of_day;
bool parsed_time = base::Time::FromString(
("1970-01-01T" + value.as_string()).c_str(), &time_of_day);
bool parsed_day_start =
base::Time::FromString("1970-01-01T00:00:00", &start_of_day);
base::TimeDelta time = time_of_day - start_of_day;
// The string failed to parse as a DateTime, but did parse as a Time. Use
// this value, initializing the vector first. (We cannot initialize it in
// advance like the others, because we do not know if parsing will work in
// advance.)
if (parsed_time && parsed_day_start) {
if (!values->is_time_values()) {
values->set_time_values(std::vector<base::TimeDelta>());
}
values->get_time_values().push_back(time);
return true;
}
}
return false;
}
// Parses a property with multiple string values and places the result in
// values. This will be an array of a single type, depending on what kinds of
// types the property supports.
bool ParseRepeatedString(const base::Value::ListView& arr,
const std::string& property_type,
mojom::Values* values) {
if (!InitializeStringValue(property_type, values)) {
return false;
}
for (size_t j = 0; j < std::min(arr.size(), kMaxRepeatedSize); ++j) {
auto& list_item = arr[j];
if (list_item.type() != base::Value::Type::STRING) {
return false;
}
base::StringPiece v = list_item.GetString();
if (!ParseStringValue(property_type, v, values)) {
return false;
}
}
return true;
}
// Parses a repeated property value and places the result in values. The result
// will be an array of a single type.
bool ParseRepeatedValue(base::Value::ListView& arr,
mojom::Values& values,
int recursionLevel) {
const std::string& property_type,
mojom::Values* values,
int recursion_level) {
if (arr.empty()) {
return false;
}
......@@ -58,30 +160,29 @@ bool ParseRepeatedValue(base::Value::ListView& arr,
base::Value::Type type = base::Value::Type::NONE;
for (size_t j = 0; j < std::min(arr.size(), kMaxRepeatedSize); ++j) {
auto& listItem = arr[j];
auto& list_item = arr[j];
if (is_first_item) {
is_first_item = false;
type = listItem.type();
type = list_item.type();
switch (type) {
case base::Value::Type::BOOLEAN:
values.set_bool_values(std::vector<bool>());
values->set_bool_values(std::vector<bool>());
break;
case base::Value::Type::INTEGER:
values.set_long_values(std::vector<int64_t>());
values->set_long_values(std::vector<int64_t>());
break;
case base::Value::Type::DOUBLE:
// App Indexing doesn't support double type, so just encode its
// decimal value as a string instead.
values.set_string_values(std::vector<std::string>());
values->set_string_values(std::vector<std::string>());
break;
case base::Value::Type::STRING:
values.set_string_values(std::vector<std::string>());
break;
return ParseRepeatedString(arr, property_type, values);
case base::Value::Type::DICTIONARY:
if (recursionLevel + 1 >= kMaxDepth) {
if (recursion_level + 1 >= kMaxDepth) {
return false;
}
values.set_entity_values(std::vector<mojom::EntityPtr>());
values->set_entity_values(std::vector<mojom::EntityPtr>());
break;
case base::Value::Type::LIST:
// App Indexing doesn't support nested arrays.
......@@ -92,41 +193,36 @@ bool ParseRepeatedValue(base::Value::ListView& arr,
}
}
if (listItem.type() != type) {
if (list_item.type() != type) {
// App Indexing doesn't support mixed types. If there are mixed
// types in the parsed object, we will drop the property.
return false;
}
switch (listItem.type()) {
switch (list_item.type()) {
case base::Value::Type::BOOLEAN: {
bool v;
listItem.GetAsBoolean(&v);
values.get_bool_values().push_back(v);
list_item.GetAsBoolean(&v);
values->get_bool_values().push_back(v);
} break;
case base::Value::Type::INTEGER: {
int v = listItem.GetInt();
values.get_long_values().push_back(v);
int v = list_item.GetInt();
values->get_long_values().push_back(v);
} break;
case base::Value::Type::DOUBLE: {
// App Indexing doesn't support double type, so just encode its decimal
// value as a string instead.
double v = listItem.GetDouble();
double v = list_item.GetDouble();
std::string s = base::NumberToString(v);
s = s.substr(0, kMaxStringLength);
values.get_string_values().push_back(s);
} break;
case base::Value::Type::STRING: {
std::string v = listItem.GetString();
v = v.substr(0, kMaxStringLength);
values.get_string_values().push_back(v);
values->get_string_values().push_back(std::move(s));
} break;
case base::Value::Type::DICTIONARY: {
values.get_entity_values().push_back(mojom::Entity::New());
values->get_entity_values().push_back(mojom::Entity::New());
base::DictionaryValue* dict_value = nullptr;
if (listItem.GetAsDictionary(&dict_value)) {
ExtractEntity(dict_value, *(values.get_entity_values().at(j)),
recursionLevel + 1);
if (list_item.GetAsDictionary(&dict_value)) {
ExtractEntity(dict_value, *(values->get_entity_values().at(j)),
recursion_level + 1);
}
} break;
default:
......@@ -138,8 +234,8 @@ bool ParseRepeatedValue(base::Value::ListView& arr,
void ExtractEntity(base::DictionaryValue* val,
mojom::Entity& entity,
int recursionLevel) {
if (recursionLevel >= kMaxDepth) {
int recursion_level) {
if (recursion_level >= kMaxDepth) {
return;
}
......@@ -160,27 +256,26 @@ void ExtractEntity(base::DictionaryValue* val,
}
property->values = mojom::Values::New();
if (entry.second.is_bool()) {
bool v;
val->GetBoolean(entry.first, &v);
property->values->set_bool_values({v});
} else if (entry.second.is_int()) {
int v;
val->GetInteger(entry.first, &v);
property->values->set_long_values({v});
} else if (entry.second.is_double()) {
double v;
val->GetDouble(entry.first, &v);
std::string s = base::NumberToString(v);
s = s.substr(0, kMaxStringLength);
property->values->set_string_values({s});
} else if (entry.second.is_string()) {
std::string v;
val->GetString(entry.first, &v);
v = v.substr(0, kMaxStringLength);
property->values->set_string_values({v});
} else if (entry.second.is_dict()) {
if (recursionLevel + 1 >= kMaxDepth) {
switch (entry.second.type()) {
case base::Value::Type::BOOLEAN:
property->values->set_bool_values({entry.second.GetBool()});
break;
case base::Value::Type::INTEGER:
property->values->set_long_values({entry.second.GetInt()});
break;
case base::Value::Type::DOUBLE:
property->values->set_double_values({entry.second.GetDouble()});
break;
case base::Value::Type::STRING: {
base::StringPiece v = entry.second.GetString();
if (!(InitializeStringValue(property->name, property->values.get()) &&
ParseStringValue(property->name, v, property->values.get()))) {
continue;
}
break;
}
case base::Value::Type::DICTIONARY: {
if (recursion_level + 1 >= kMaxDepth) {
continue;
}
property->values->set_entity_values(std::vector<mojom::EntityPtr>());
......@@ -190,11 +285,21 @@ void ExtractEntity(base::DictionaryValue* val,
if (!entry.second.GetAsDictionary(&dict_value)) {
continue;
}
ExtractEntity(dict_value, *(property->values->get_entity_values().at(0)),
recursionLevel + 1);
} else if (entry.second.is_list()) {
ExtractEntity(dict_value,
*(property->values->get_entity_values().at(0)),
recursion_level + 1);
break;
}
case base::Value::Type::LIST: {
base::Value::ListView list_view = entry.second.GetList();
if (!ParseRepeatedValue(list_view, *(property->values), recursionLevel)) {
if (!ParseRepeatedValue(list_view, property->name,
property->values.get(), recursion_level)) {
continue;
}
break;
}
default: {
// Unsupported value type. Skip this property.
continue;
}
}
......@@ -225,7 +330,14 @@ mojom::EntityPtr Extractor::Extract(const std::string& content) {
return nullptr;
}
return ExtractTopLevelEntity(dict_value);
mojom::EntityPtr entity = ExtractTopLevelEntity(dict_value);
bool is_valid = false;
if (!entity.is_null()) {
is_valid = ValidateEntity(entity.get());
}
return is_valid ? std::move(entity) : nullptr;
}
} // namespace schema_org
......@@ -37,6 +37,14 @@ class SchemaOrgExtractorTest : public testing::Test {
PropertyPtr CreateLongProperty(const std::string& name, const int64_t& value);
PropertyPtr CreateDoubleProperty(const std::string& name, double value);
PropertyPtr CreateDateTimeProperty(const std::string& name,
const base::Time& value);
PropertyPtr CreateTimeProperty(const std::string& name,
const base::TimeDelta& value);
PropertyPtr CreateEntityProperty(const std::string& name, EntityPtr value);
};
......@@ -69,6 +77,36 @@ PropertyPtr SchemaOrgExtractorTest::CreateLongProperty(const std::string& name,
return property;
}
PropertyPtr SchemaOrgExtractorTest::CreateDoubleProperty(
const std::string& name,
double value) {
PropertyPtr property = Property::New();
property->name = name;
property->values = Values::New();
property->values->set_double_values({value});
return property;
}
PropertyPtr SchemaOrgExtractorTest::CreateDateTimeProperty(
const std::string& name,
const base::Time& value) {
PropertyPtr property = Property::New();
property->name = name;
property->values = Values::New();
property->values->set_date_time_values({value});
return property;
}
PropertyPtr SchemaOrgExtractorTest::CreateTimeProperty(
const std::string& name,
const base::TimeDelta& value) {
PropertyPtr property = Property::New();
property->name = name;
property->values = Values::New();
property->values->set_time_values({value});
return property;
}
PropertyPtr SchemaOrgExtractorTest::CreateEntityProperty(
const std::string& name,
EntityPtr value) {
......@@ -96,7 +134,7 @@ TEST_F(SchemaOrgExtractorTest, Basic) {
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, booleanValue) {
TEST_F(SchemaOrgExtractorTest, BooleanValue) {
EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"requiresSubscription\": true }");
ASSERT_FALSE(extracted.is_null());
......@@ -109,7 +147,7 @@ TEST_F(SchemaOrgExtractorTest, booleanValue) {
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, longValue) {
TEST_F(SchemaOrgExtractorTest, LongValue) {
EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"position\": 111 }");
ASSERT_FALSE(extracted.is_null());
......@@ -121,14 +159,75 @@ TEST_F(SchemaOrgExtractorTest, longValue) {
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, doubleValue) {
TEST_F(SchemaOrgExtractorTest, DoubleValue) {
EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"copyrightYear\": 1999.5 }");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(CreateDoubleProperty("copyrightYear", 1999.5));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, StringValueRepresentingDouble) {
EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\",\"copyrightYear\": \"1999.5\"}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(CreateDoubleProperty("copyrightYear", 1999.5));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, StringValueRepresentingTime) {
EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"width\": 111.5 }");
Extract("{\"@type\": \"VideoObject\",\"startTime\": \"05:30:00\"}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(CreateTimeProperty(
"startTime", base::TimeDelta::FromMinutes(60 * 5 + 30)));
EXPECT_EQ(expected, extracted);
}
// startTime can be a DateTime or a Time. If it parses as DateTime successfully,
// we should use that type.
TEST_F(SchemaOrgExtractorTest, StringValueRepresentingDateTimeOrTime) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\",\"startTime\": "
"\"2012-12-12T00:00:00 GMT\"}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(CreateStringProperty("width", "111.5"));
expected->properties.push_back(CreateDateTimeProperty(
"startTime", base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromMilliseconds(12999744000000))));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, StringValueRepresentingDateTime) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\",\"dateCreated\": "
"\"2012-12-12T00:00:00 GMT\"}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(CreateDateTimeProperty(
"dateCreated", base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromMilliseconds(12999744000000))));
EXPECT_EQ(expected, extracted);
}
......@@ -265,27 +364,43 @@ TEST_F(SchemaOrgExtractorTest, TruncateTooManyValuesInField) {
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, truncateTooManyFields) {
std::stringstream tooManyFields;
for (int i = 0; i < 26; ++i) {
tooManyFields << "\"" << i << "\": \"a\"";
if (i != 25) {
tooManyFields << ",";
}
}
EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\"," + tooManyFields.str() + "}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
TEST_F(SchemaOrgExtractorTest, TruncateTooManyProperties) {
// Create an entity with more than the supported number of properties. All the
// properties must be valid to be included. 26 properties below, should
// truncate to 25.
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\","
"\"name\": \"a video!\","
"\"transcript\":\"a short movie\","
"\"videoFrameSize\":\"1200x800\","
"\"videoQuality\":\"high\","
"\"bitrate\":\"24mbps\","
"\"contentSize\":\"8MB\","
"\"encodingFormat\":\"H264\","
"\"accessMode\":\"visual\","
"\"accessibilitySummary\":\"short description\","
"\"alternativeHeadline\":\"OR other title\","
"\"award\":\"best picture\","
"\"educationalUse\":\"assignment\","
"\"headline\":\"headline\","
"\"interactivityType\":\"active\","
"\"keywords\":\"video\","
"\"learningResourceType\":\"presentation\","
"\"material\":\"film\","
"\"mentions\":\"other work\","
"\"schemaVersion\":\"http://schema.org/version/2.0/\","
"\"text\":\"a short work\","
"\"typicalAgeRange\":\"5-\","
"\"version\":\"5\","
"\"alternateName\":\"other title\","
"\"description\":\"a short description\","
"\"disambiguatingDescription\":\"clarifying point\","
"\"identifier\":\"ID12345\""
"}");
for (int i = 0; i < 25; ++i) {
expected->properties.push_back(
CreateStringProperty(base::NumberToString(i), "a"));
}
ASSERT_FALSE(extracted.is_null());
EXPECT_EQ(expected->properties.size(), extracted->properties.size());
EXPECT_EQ(25u, extracted->properties.size());
}
TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithEmptyArray) {
......@@ -323,12 +438,12 @@ TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithNestedArray) {
TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", \"name\": \"a video!\","
"\"1\": {"
" \"2\": {"
" \"3\": {"
" \"4\": {"
" \"5\": {"
" \"6\": 7"
"\"actor\": {"
" \"address\": {"
" \"addressCountry\": {"
" \"containedInPlace\": {"
" \"containedInPlace\": {"
" \"name\": \"matroska\""
" }"
" }"
" }"
......@@ -349,10 +464,14 @@ TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) {
EntityPtr entity4 = Entity::New();
entity4->type = "Thing";
entity3->properties.push_back(CreateEntityProperty("4", std::move(entity4)));
entity2->properties.push_back(CreateEntityProperty("3", std::move(entity3)));
entity1->properties.push_back(CreateEntityProperty("2", std::move(entity2)));
expected->properties.push_back(CreateEntityProperty("1", std::move(entity1)));
entity3->properties.push_back(
CreateEntityProperty("containedInPlace", std::move(entity4)));
entity2->properties.push_back(
CreateEntityProperty("addressCountry", std::move(entity3)));
entity1->properties.push_back(
CreateEntityProperty("address", std::move(entity2)));
expected->properties.push_back(
CreateEntityProperty("actor", std::move(entity1)));
expected->properties.push_back(CreateStringProperty("name", "a video!"));
EXPECT_EQ(expected, extracted);
......@@ -361,11 +480,11 @@ TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) {
TEST_F(SchemaOrgExtractorTest, MaxNestingDepthWithTerminalProperty) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", \"name\": \"a video!\","
"\"1\": {"
" \"2\": {"
" \"3\": {"
" \"4\": {"
" \"5\": 6"
"\"actor\": {"
" \"address\": {"
" \"addressCountry\": {"
" \"containedInPlace\": {"
" \"name\": \"matroska\""
" }"
" }"
" }"
......@@ -385,12 +504,16 @@ TEST_F(SchemaOrgExtractorTest, MaxNestingDepthWithTerminalProperty) {
EntityPtr entity4 = Entity::New();
entity4->type = "Thing";
entity4->properties.push_back(CreateLongProperty("5", 6));
entity3->properties.push_back(CreateEntityProperty("4", std::move(entity4)));
entity2->properties.push_back(CreateEntityProperty("3", std::move(entity3)));
entity1->properties.push_back(CreateEntityProperty("2", std::move(entity2)));
entity4->properties.push_back(CreateStringProperty("name", "matroska"));
entity3->properties.push_back(
CreateEntityProperty("containedInPlace", std::move(entity4)));
entity2->properties.push_back(
CreateEntityProperty("addressCountry", std::move(entity3)));
entity1->properties.push_back(
CreateEntityProperty("address", std::move(entity2)));
expected->properties.push_back(CreateEntityProperty("1", std::move(entity1)));
expected->properties.push_back(
CreateEntityProperty("actor", std::move(entity1)));
expected->properties.push_back(CreateStringProperty("name", "a video!"));
EXPECT_EQ(expected, extracted);
......
......@@ -14,6 +14,11 @@ TEST(GenerateSchemaOrgTest, EntityName) {
EXPECT_STREQ(entity::kAboutPage, "AboutPage");
}
TEST(GenerateSchemaOrgTest, IsValidEntityName) {
EXPECT_TRUE(entity::IsValidEntityName(entity::kAboutPage));
EXPECT_FALSE(entity::IsValidEntityName("a made up name"));
}
TEST(GenerateSchemaOrgTest, PropertyName) {
EXPECT_STREQ(property::kAcceptedAnswer, "acceptedAnswer");
}
......
......@@ -6,6 +6,9 @@
// Do not edit.
#include "components/schema_org/{{ header_file }}.h"
#include "base/containers/flat_set.h"
#include "base/no_destructor.h"
#include "base/strings/string_piece.h"
namespace schema_org {
namespace entity {
......@@ -14,5 +17,15 @@ namespace entity {
const char k{{entity[0]|upper}}{{entity[1:]}}[] = "{{entity}}";
{% endfor %}
bool IsValidEntityName(const std::string& entity_name) {
static const base::NoDestructor<base::flat_set<base::StringPiece>>
kValidEntityNames(base::flat_set<base::StringPiece>({
{%for entity in entities %}
k{{entity[0]|upper}}{{entity[1:]}},
{% endfor %}
}));
return kValidEntityNames->find(entity_name) != kValidEntityNames->end();
}
} // entity
} // schema_org
......@@ -17,6 +17,8 @@ namespace entity {
extern const char k{{entity[0]|upper}}{{entity[1:]}}[];
{% endfor %}
bool IsValidEntityName(const std::string& entity_name);
} // namespace entity
} // namespace schema_org
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/schema_org/validator.h"
#include <vector>
#include "components/schema_org/common/metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/schema_org_property_names.h"
namespace schema_org {
using mojom::Entity;
using mojom::EntityPtr;
// static
bool ValidateEntity(Entity* entity) {
if (!entity::IsValidEntityName(entity->type)) {
return false;
}
// Cycle through properties and remove any that have the wrong type.
auto it = entity->properties.begin();
while (it != entity->properties.end()) {
property::PropertyConfiguration config =
property::GetPropertyConfiguration((*it)->name);
if ((*it)->values->is_string_values() && !config.text) {
it = entity->properties.erase(it);
} else if ((*it)->values->is_double_values() && !config.number) {
it = entity->properties.erase(it);
} else if ((*it)->values->is_time_values() && !config.time) {
it = entity->properties.erase(it);
} else if ((*it)->values->is_date_time_values() && !config.date_time &&
!config.date) {
it = entity->properties.erase(it);
} else if ((*it)->values->is_entity_values()) {
if (config.thing_types.empty()) {
// Property is not supposed to have an entity type.
it = entity->properties.erase(it);
} else {
// Check all the entities nested in this property. Remove any invalid
// ones.
bool has_valid_entities = false;
auto nested_it = (*it)->values->get_entity_values().begin();
while (nested_it != (*it)->values->get_entity_values().end()) {
auto& nested_entity = *nested_it;
if (!ValidateEntity(nested_entity.get())) {
nested_it = (*it)->values->get_entity_values().erase(nested_it);
} else {
has_valid_entities = true;
++nested_it;
}
}
// If there were no valid entity values for this property, remove the
// whole property.
if (!has_valid_entities) {
it = entity->properties.erase(it);
} else {
++it;
}
}
} else {
++it;
}
}
return true;
}
} // namespace schema_org
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_SCHEMA_ORG_VALIDATOR_H_
#define COMPONENTS_SCHEMA_ORG_VALIDATOR_H_
#include "components/schema_org/common/metadata.mojom-forward.h"
namespace schema_org {
// Validates and cleans up the Schema.org entity in-place. Invalid properties
// will be removed from the entity. Returns true if the entity was valid.
bool ValidateEntity(mojom::Entity* entity);
} // namespace schema_org
#endif // COMPONENTS_SCHEMA_ORG_VALIDATOR_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <memory>
#include <utility>
#include <vector>
#include "components/schema_org/common/metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/schema_org_property_names.h"
#include "components/schema_org/validator.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace schema_org {
using mojom::Entity;
using mojom::EntityPtr;
using mojom::Property;
using mojom::PropertyPtr;
using mojom::Values;
class SchemaOrgValidatorTest : public testing::Test {};
TEST_F(SchemaOrgValidatorTest, InvalidEntityType) {
EntityPtr entity = Entity::New();
entity->type = "random entity type";
bool validated_entity = ValidateEntity(entity.get());
EXPECT_FALSE(validated_entity);
}
TEST_F(SchemaOrgValidatorTest, ValidStringPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAccessMode;
property->values = Values::New();
property->values->set_string_values({"foo"});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidStringPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->set_string_values({"foo"});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidNumberPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kSingleFamilyResidence;
PropertyPtr property = Property::New();
property->name = property::kAdditionalNumberOfGuests;
property->values = Values::New();
property->values->set_double_values({1.0});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidNumberPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->set_double_values({1.0});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidDateTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kLodgingBusiness;
PropertyPtr property = Property::New();
property->name = property::kCheckinTime;
property->values = Values::New();
property->values->set_date_time_values(
{base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromMilliseconds(12999772800000))});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidDateTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->set_date_time_values(
{base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromMilliseconds(12999772800000))});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kLodgingBusiness;
PropertyPtr property = Property::New();
property->name = property::kCheckinTime;
property->values = Values::New();
property->values->set_time_values(
{base::TimeDelta::FromMilliseconds(12999772800000)});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->set_time_values(
{base::TimeDelta::FromMilliseconds(12999772800000)});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value = Entity::New();
value->type = entity::kPostalAddress;
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAccessMode;
property->values = Values::New();
EntityPtr value = Entity::New();
value->type = entity::kPostalAddress;
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidRepeatedEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value1 = Entity::New();
value1->type = entity::kPostalAddress;
EntityPtr value2 = Entity::New();
value2->type = entity::kPostalAddress;
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value1));
property->values->get_entity_values().push_back(std::move(value2));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
EXPECT_EQ(2u, entity->properties[0]->values->get_entity_values().size());
}
// If one value of a repeated property is invalid but the other is not,
// validator should keep the outer property and remove only the invalid nested
// property.
TEST_F(SchemaOrgValidatorTest, MixedValidityRepeatedEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value1 = Entity::New();
value1->type = entity::kPostalAddress;
EntityPtr value2 = Entity::New();
value2->type = "bad address";
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value1));
property->values->get_entity_values().push_back(std::move(value2));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
EXPECT_EQ(1u, entity->properties[0]->values->get_entity_values().size());
}
TEST_F(SchemaOrgValidatorTest, InvalidRepeatedEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value1 = Entity::New();
value1->type = "this is not a real type";
EntityPtr value2 = Entity::New();
value2->type = "bad address type";
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value1));
property->values->get_entity_values().push_back(std::move(value2));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
} // namespace schema_org
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment