Commit d828e2eb authored by Sam Bowen's avatar Sam Bowen Committed by Commit Bot

Reland: "Add validator for schema org entities and types in doc metadata mojo."

"
* Add a validator that removes properties that are not the right type.
* Add double, time, and timedelta types in the document metadata mojo
  structure.
* Use the new mojo types in document metadata extractor.
"

This is a reland of https://crrev.com/c/2062907 which was reverted in
https://crrev.com/c/2095670.

The substantial changes are two-fold:
(1) add build dep on base in schema_org_properties. This should fix the
    try bot issue which was somehow skipped.
(2) Rebase on top of https://crrev.com/c/2073337 which was submitted and
    contains some changes to the mojo structure.

Change-Id: I7f47d74e8833f76d4077028eac0dcb43790a1d26
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2097059Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarBecca Hughes <beccahughes@chromium.org>
Commit-Queue: Sam Bowen <sgbowen@google.com>
Cr-Commit-Position: refs/heads/master@{#748945}
parent 138dae27
...@@ -7,11 +7,13 @@ source_set("unit_tests") { ...@@ -7,11 +7,13 @@ source_set("unit_tests") {
sources = [ sources = [
"extractor_unittest.cc", "extractor_unittest.cc",
"generate_schema_org_code_unittest.cc", "generate_schema_org_code_unittest.cc",
"validator_unittest.cc",
] ]
deps = [ deps = [
":extractor", ":extractor",
":generate_schema_org_code", ":generate_schema_org_code",
":schema_org",
":schema_org_properties", ":schema_org_properties",
"//base", "//base",
"//components/schema_org/common:improved_mojom", "//components/schema_org/common:improved_mojom",
...@@ -68,6 +70,21 @@ static_library("extractor") { ...@@ -68,6 +70,21 @@ static_library("extractor") {
] ]
deps = [ deps = [
"//components/schema_org:generate_schema_org_code",
"//components/schema_org:schema_org",
"//components/schema_org:schema_org_properties",
"//components/schema_org/common:improved_mojom",
]
}
static_library("schema_org") {
sources = [
"validator.cc",
"validator.h",
]
deps = [
"//base",
"//components/schema_org:generate_schema_org_code", "//components/schema_org:generate_schema_org_code",
"//components/schema_org:schema_org_properties", "//components/schema_org:schema_org_properties",
"//components/schema_org/common:improved_mojom", "//components/schema_org/common:improved_mojom",
......
...@@ -15,5 +15,8 @@ mojom("improved_mojom") { ...@@ -15,5 +15,8 @@ mojom("improved_mojom") {
generate_java = true generate_java = true
sources = [ "improved_metadata.mojom" ] sources = [ "improved_metadata.mojom" ]
public_deps = [ "//url/mojom:url_mojom_gurl" ] public_deps = [
"//mojo/public/mojom/base",
"//url/mojom:url_mojom_gurl",
]
} }
...@@ -10,12 +10,17 @@ ...@@ -10,12 +10,17 @@
module schema_org.improved.mojom; module schema_org.improved.mojom;
import "mojo/public/mojom/base/time.mojom";
// A property can have arrays of different types simultaneously. Non-array // A property can have arrays of different types simultaneously. Non-array
// values are converted to arrays of one element. // values are converted to arrays of one element.
struct Values { struct Values {
array<bool> bool_values; array<bool> bool_values;
array<int64> long_values; array<int64> long_values;
array<string> string_values; array<string> string_values;
array<double> double_values;
array<mojo_base.mojom.Time> date_time_values;
array<mojo_base.mojom.TimeDelta> time_values;
array<Entity> entity_values; array<Entity> entity_values;
}; };
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#include "base/values.h" #include "base/values.h"
#include "components/schema_org/common/improved_metadata.mojom.h" #include "components/schema_org/common/improved_metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h" #include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/validator.h"
namespace schema_org { namespace schema_org {
...@@ -54,7 +56,56 @@ bool IsSupportedType(const std::string& type) { ...@@ -54,7 +56,56 @@ bool IsSupportedType(const std::string& type) {
void ExtractEntity(const base::DictionaryValue&, Entity*, int recursion_level); void ExtractEntity(const base::DictionaryValue&, Entity*, int recursion_level);
// Parses a string into a property value. The string may be parsed as a double,
// date, or time, depending on the types that the property supports. If the
// property supports text, uses the string itself.
bool ParseStringValue(const std::string& property_type,
base::StringPiece value,
Values* values) {
value = value.substr(0, kMaxStringLength);
schema_org::property::PropertyConfiguration prop_config =
schema_org::property::GetPropertyConfiguration(property_type);
if (prop_config.text) {
values->string_values.push_back(value.as_string());
return true;
}
if (prop_config.number) {
double d;
bool parsed_double = base::StringToDouble(value, &d);
if (parsed_double) {
values->double_values.push_back(d);
return true;
}
}
if (prop_config.date_time || prop_config.date) {
base::Time time;
bool parsed_time = base::Time::FromString(value.data(), &time);
if (parsed_time) {
values->date_time_values.push_back(time);
return true;
}
}
if (prop_config.time) {
base::Time time_of_day;
base::Time start_of_day;
bool parsed_time = base::Time::FromString(
("1970-01-01T" + value.as_string()).c_str(), &time_of_day);
bool parsed_day_start =
base::Time::FromString("1970-01-01T00:00:00", &start_of_day);
base::TimeDelta time = time_of_day - start_of_day;
// The string failed to parse as a DateTime, but did parse as a Time. Use
// this value instead.
if (parsed_time && parsed_day_start) {
values->time_values.push_back(time);
return true;
}
}
return false;
}
bool ParseRepeatedValue(const base::Value::ConstListView& arr, bool ParseRepeatedValue(const base::Value::ConstListView& arr,
const std::string& property_type,
Values* values, Values* values,
int recursion_level) { int recursion_level) {
DCHECK(values); DCHECK(values);
...@@ -63,34 +114,27 @@ bool ParseRepeatedValue(const base::Value::ConstListView& arr, ...@@ -63,34 +114,27 @@ bool ParseRepeatedValue(const base::Value::ConstListView& arr,
} }
for (size_t j = 0; j < std::min(arr.size(), kMaxRepeatedSize); ++j) { for (size_t j = 0; j < std::min(arr.size(), kMaxRepeatedSize); ++j) {
auto& listItem = arr[j]; auto& list_item = arr[j];
switch (listItem.type()) { switch (list_item.type()) {
case base::Value::Type::BOOLEAN: { case base::Value::Type::BOOLEAN: {
bool v; values->bool_values.push_back(list_item.GetBool());
listItem.GetAsBoolean(&v);
values->bool_values.push_back(v);
} break; } break;
case base::Value::Type::INTEGER: { case base::Value::Type::INTEGER: {
int v = listItem.GetInt(); values->long_values.push_back(list_item.GetInt());
values->long_values.push_back(v);
} break; } break;
case base::Value::Type::DOUBLE: { case base::Value::Type::DOUBLE: {
// App Indexing doesn't support double type, so just encode its decimal values->double_values.push_back(list_item.GetDouble());
// value as a string instead.
double v = listItem.GetDouble();
std::string s = base::NumberToString(v);
s = s.substr(0, kMaxStringLength);
values->string_values.push_back(s);
} break; } break;
case base::Value::Type::STRING: { case base::Value::Type::STRING: {
std::string v = listItem.GetString(); base::StringPiece v = list_item.GetString();
v = v.substr(0, kMaxStringLength); if (!ParseStringValue(property_type, v, values)) {
values->string_values.push_back(v); return false;
}
} break; } break;
case base::Value::Type::DICTIONARY: { case base::Value::Type::DICTIONARY: {
const base::DictionaryValue* dict_value = nullptr; const base::DictionaryValue* dict_value = nullptr;
if (listItem.GetAsDictionary(&dict_value)) { if (list_item.GetAsDictionary(&dict_value)) {
auto entity = Entity::New(); auto entity = Entity::New();
ExtractEntity(*dict_value, entity.get(), recursion_level + 1); ExtractEntity(*dict_value, entity.get(), recursion_level + 1);
values->entity_values.push_back(std::move(entity)); values->entity_values.push_back(std::move(entity));
...@@ -130,40 +174,48 @@ void ExtractEntity(const base::DictionaryValue& val, ...@@ -130,40 +174,48 @@ void ExtractEntity(const base::DictionaryValue& val,
} }
property->values = Values::New(); property->values = Values::New();
if (entry.second.is_bool()) { switch (entry.second.type()) {
bool v; case base::Value::Type::BOOLEAN:
val.GetBoolean(entry.first, &v); property->values->bool_values.push_back(entry.second.GetBool());
property->values->bool_values.push_back(v); break;
} else if (entry.second.is_int()) { case base::Value::Type::INTEGER:
int v; property->values->long_values.push_back(entry.second.GetInt());
val.GetInteger(entry.first, &v); break;
property->values->long_values.push_back(v); case base::Value::Type::DOUBLE:
} else if (entry.second.is_double()) { property->values->double_values.push_back(entry.second.GetDouble());
double v; break;
val.GetDouble(entry.first, &v); case base::Value::Type::STRING: {
std::string s = base::NumberToString(v); base::StringPiece v = entry.second.GetString();
s = s.substr(0, kMaxStringLength); if (!(ParseStringValue(property->name, v, property->values.get()))) {
property->values->string_values.push_back(s); continue;
} else if (entry.second.is_string()) { }
std::string v; break;
val.GetString(entry.first, &v); }
v = v.substr(0, kMaxStringLength); case base::Value::Type::DICTIONARY: {
property->values->string_values.push_back(v);
} else if (entry.second.is_dict()) {
if (recursion_level + 1 >= kMaxDepth) { if (recursion_level + 1 >= kMaxDepth) {
continue; continue;
} }
const base::DictionaryValue* dict_value = nullptr; const base::DictionaryValue* dict_value = nullptr;
if (!entry.second.GetAsDictionary(&dict_value)) { if (!entry.second.GetAsDictionary(&dict_value)) {
continue; continue;
} }
auto nested_entity = Entity::New(); auto nested_entity = Entity::New();
ExtractEntity(*dict_value, nested_entity.get(), recursion_level + 1); ExtractEntity(*dict_value, nested_entity.get(), recursion_level + 1);
property->values->entity_values.push_back(std::move(nested_entity)); property->values->entity_values.push_back(std::move(nested_entity));
} else if (entry.second.is_list()) { break;
const auto& list_view = entry.second.GetList(); }
if (!ParseRepeatedValue(list_view, property->values.get(), case base::Value::Type::LIST: {
recursion_level)) { const base::Value::ConstListView list_view = entry.second.GetList();
if (!ParseRepeatedValue(list_view, property->name,
property->values.get(), recursion_level)) {
continue;
}
break;
}
default: {
// Unsupported value type. Skip this property.
continue; continue;
} }
} }
......
...@@ -37,6 +37,14 @@ class SchemaOrgExtractorTest : public testing::Test { ...@@ -37,6 +37,14 @@ class SchemaOrgExtractorTest : public testing::Test {
PropertyPtr CreateLongProperty(const std::string& name, const int64_t& value); PropertyPtr CreateLongProperty(const std::string& name, const int64_t& value);
PropertyPtr CreateDoubleProperty(const std::string& name, double value);
PropertyPtr CreateDateTimeProperty(const std::string& name,
const base::Time& value);
PropertyPtr CreateTimeProperty(const std::string& name,
const base::TimeDelta& value);
PropertyPtr CreateEntityProperty(const std::string& name, EntityPtr value); PropertyPtr CreateEntityProperty(const std::string& name, EntityPtr value);
}; };
...@@ -69,6 +77,36 @@ PropertyPtr SchemaOrgExtractorTest::CreateLongProperty(const std::string& name, ...@@ -69,6 +77,36 @@ PropertyPtr SchemaOrgExtractorTest::CreateLongProperty(const std::string& name,
return property; return property;
} }
PropertyPtr SchemaOrgExtractorTest::CreateDoubleProperty(
const std::string& name,
double value) {
PropertyPtr property = Property::New();
property->name = name;
property->values = Values::New();
property->values->double_values.push_back(value);
return property;
}
PropertyPtr SchemaOrgExtractorTest::CreateDateTimeProperty(
const std::string& name,
const base::Time& value) {
PropertyPtr property = Property::New();
property->name = name;
property->values = Values::New();
property->values->date_time_values.push_back(value);
return property;
}
PropertyPtr SchemaOrgExtractorTest::CreateTimeProperty(
const std::string& name,
const base::TimeDelta& value) {
PropertyPtr property = Property::New();
property->name = name;
property->values = Values::New();
property->values->time_values.push_back(value);
return property;
}
PropertyPtr SchemaOrgExtractorTest::CreateEntityProperty( PropertyPtr SchemaOrgExtractorTest::CreateEntityProperty(
const std::string& name, const std::string& name,
EntityPtr value) { EntityPtr value) {
...@@ -95,7 +133,7 @@ TEST_F(SchemaOrgExtractorTest, Basic) { ...@@ -95,7 +133,7 @@ TEST_F(SchemaOrgExtractorTest, Basic) {
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
} }
TEST_F(SchemaOrgExtractorTest, booleanValue) { TEST_F(SchemaOrgExtractorTest, BooleanValue) {
EntityPtr extracted = EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"requiresSubscription\": true }"); Extract("{\"@type\": \"VideoObject\", \"requiresSubscription\": true }");
ASSERT_FALSE(extracted.is_null()); ASSERT_FALSE(extracted.is_null());
...@@ -108,7 +146,7 @@ TEST_F(SchemaOrgExtractorTest, booleanValue) { ...@@ -108,7 +146,7 @@ TEST_F(SchemaOrgExtractorTest, booleanValue) {
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
} }
TEST_F(SchemaOrgExtractorTest, longValue) { TEST_F(SchemaOrgExtractorTest, LongValue) {
EntityPtr extracted = EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"position\": 111 }"); Extract("{\"@type\": \"VideoObject\", \"position\": 111 }");
ASSERT_FALSE(extracted.is_null()); ASSERT_FALSE(extracted.is_null());
...@@ -120,14 +158,75 @@ TEST_F(SchemaOrgExtractorTest, longValue) { ...@@ -120,14 +158,75 @@ TEST_F(SchemaOrgExtractorTest, longValue) {
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
} }
TEST_F(SchemaOrgExtractorTest, doubleValue) { TEST_F(SchemaOrgExtractorTest, DoubleValue) {
EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"copyrightYear\": 1999.5 }");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(CreateDoubleProperty("copyrightYear", 1999.5));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, StringValueRepresentingDouble) {
EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\",\"copyrightYear\": \"1999.5\"}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(CreateDoubleProperty("copyrightYear", 1999.5));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, StringValueRepresentingTime) {
EntityPtr extracted = EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"width\": 111.5 }"); Extract("{\"@type\": \"VideoObject\",\"startTime\": \"05:30:00\"}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(CreateTimeProperty(
"startTime", base::TimeDelta::FromMinutes(60 * 5 + 30)));
EXPECT_EQ(expected, extracted);
}
// startTime can be a DateTime or a Time. If it parses as DateTime successfully,
// we should use that type.
TEST_F(SchemaOrgExtractorTest, StringValueRepresentingDateTimeOrTime) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\",\"startTime\": "
"\"2012-12-12T00:00:00 GMT\"}");
ASSERT_FALSE(extracted.is_null()); ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New(); EntityPtr expected = Entity::New();
expected->type = "VideoObject"; expected->type = "VideoObject";
expected->properties.push_back(CreateStringProperty("width", "111.5")); expected->properties.push_back(CreateDateTimeProperty(
"startTime", base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromMilliseconds(12999744000000))));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, StringValueRepresentingDateTime) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\",\"dateCreated\": "
"\"2012-12-12T00:00:00 GMT\"}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(CreateDateTimeProperty(
"dateCreated", base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromMilliseconds(12999744000000))));
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
} }
...@@ -279,27 +378,43 @@ TEST_F(SchemaOrgExtractorTest, TruncateTooManyValuesInField) { ...@@ -279,27 +378,43 @@ TEST_F(SchemaOrgExtractorTest, TruncateTooManyValuesInField) {
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
} }
TEST_F(SchemaOrgExtractorTest, truncateTooManyFields) { TEST_F(SchemaOrgExtractorTest, TruncateTooManyProperties) {
std::stringstream tooManyFields; // Create an entity with more than the supported number of properties. All the
for (int i = 0; i < 26; ++i) { // properties must be valid to be included. 26 properties below, should
tooManyFields << "\"" << i << "\": \"a\""; // truncate to 25.
if (i != 25) { EntityPtr extracted = Extract(
tooManyFields << ","; "{\"@type\": \"VideoObject\","
} "\"name\": \"a video!\","
} "\"transcript\":\"a short movie\","
EntityPtr extracted = "\"videoFrameSize\":\"1200x800\","
Extract("{\"@type\": \"VideoObject\"," + tooManyFields.str() + "}"); "\"videoQuality\":\"high\","
ASSERT_FALSE(extracted.is_null()); "\"bitrate\":\"24mbps\","
"\"contentSize\":\"8MB\","
EntityPtr expected = Entity::New(); "\"encodingFormat\":\"H264\","
expected->type = "VideoObject"; "\"accessMode\":\"visual\","
"\"accessibilitySummary\":\"short description\","
"\"alternativeHeadline\":\"OR other title\","
"\"award\":\"best picture\","
"\"educationalUse\":\"assignment\","
"\"headline\":\"headline\","
"\"interactivityType\":\"active\","
"\"keywords\":\"video\","
"\"learningResourceType\":\"presentation\","
"\"material\":\"film\","
"\"mentions\":\"other work\","
"\"schemaVersion\":\"http://schema.org/version/2.0/\","
"\"text\":\"a short work\","
"\"typicalAgeRange\":\"5-\","
"\"version\":\"5\","
"\"alternateName\":\"other title\","
"\"description\":\"a short description\","
"\"disambiguatingDescription\":\"clarifying point\","
"\"identifier\":\"ID12345\""
"}");
for (int i = 0; i < 25; ++i) { ASSERT_FALSE(extracted.is_null());
expected->properties.push_back(
CreateStringProperty(base::NumberToString(i), "a"));
}
EXPECT_EQ(expected->properties.size(), extracted->properties.size()); EXPECT_EQ(25u, extracted->properties.size());
} }
TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithEmptyArray) { TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithEmptyArray) {
...@@ -326,12 +441,12 @@ TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithNestedArray) { ...@@ -326,12 +441,12 @@ TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithNestedArray) {
TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) { TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) {
EntityPtr extracted = Extract( EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", \"name\": \"a video!\"," "{\"@type\": \"VideoObject\", \"name\": \"a video!\","
"\"1\": {" "\"actor\": {"
" \"2\": {" " \"address\": {"
" \"3\": {" " \"addressCountry\": {"
" \"4\": {" " \"containedInPlace\": {"
" \"5\": {" " \"containedInPlace\": {"
" \"6\": 7" " \"name\": \"matroska\""
" }" " }"
" }" " }"
" }" " }"
...@@ -352,10 +467,14 @@ TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) { ...@@ -352,10 +467,14 @@ TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) {
EntityPtr entity4 = Entity::New(); EntityPtr entity4 = Entity::New();
entity4->type = "Thing"; entity4->type = "Thing";
entity3->properties.push_back(CreateEntityProperty("4", std::move(entity4))); entity3->properties.push_back(
entity2->properties.push_back(CreateEntityProperty("3", std::move(entity3))); CreateEntityProperty("containedInPlace", std::move(entity4)));
entity1->properties.push_back(CreateEntityProperty("2", std::move(entity2))); entity2->properties.push_back(
expected->properties.push_back(CreateEntityProperty("1", std::move(entity1))); CreateEntityProperty("addressCountry", std::move(entity3)));
entity1->properties.push_back(
CreateEntityProperty("address", std::move(entity2)));
expected->properties.push_back(
CreateEntityProperty("actor", std::move(entity1)));
expected->properties.push_back(CreateStringProperty("name", "a video!")); expected->properties.push_back(CreateStringProperty("name", "a video!"));
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
...@@ -364,11 +483,11 @@ TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) { ...@@ -364,11 +483,11 @@ TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) {
TEST_F(SchemaOrgExtractorTest, MaxNestingDepthWithTerminalProperty) { TEST_F(SchemaOrgExtractorTest, MaxNestingDepthWithTerminalProperty) {
EntityPtr extracted = Extract( EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", \"name\": \"a video!\"," "{\"@type\": \"VideoObject\", \"name\": \"a video!\","
"\"1\": {" "\"actor\": {"
" \"2\": {" " \"address\": {"
" \"3\": {" " \"addressCountry\": {"
" \"4\": {" " \"containedInPlace\": {"
" \"5\": 6" " \"name\": \"matroska\""
" }" " }"
" }" " }"
" }" " }"
...@@ -388,12 +507,16 @@ TEST_F(SchemaOrgExtractorTest, MaxNestingDepthWithTerminalProperty) { ...@@ -388,12 +507,16 @@ TEST_F(SchemaOrgExtractorTest, MaxNestingDepthWithTerminalProperty) {
EntityPtr entity4 = Entity::New(); EntityPtr entity4 = Entity::New();
entity4->type = "Thing"; entity4->type = "Thing";
entity4->properties.push_back(CreateLongProperty("5", 6)); entity4->properties.push_back(CreateStringProperty("name", "matroska"));
entity3->properties.push_back(CreateEntityProperty("4", std::move(entity4))); entity3->properties.push_back(
entity2->properties.push_back(CreateEntityProperty("3", std::move(entity3))); CreateEntityProperty("containedInPlace", std::move(entity4)));
entity1->properties.push_back(CreateEntityProperty("2", std::move(entity2))); entity2->properties.push_back(
CreateEntityProperty("addressCountry", std::move(entity3)));
entity1->properties.push_back(
CreateEntityProperty("address", std::move(entity2)));
expected->properties.push_back(CreateEntityProperty("1", std::move(entity1))); expected->properties.push_back(
CreateEntityProperty("actor", std::move(entity1)));
expected->properties.push_back(CreateStringProperty("name", "a video!")); expected->properties.push_back(CreateStringProperty("name", "a video!"));
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
......
...@@ -14,6 +14,11 @@ TEST(GenerateSchemaOrgTest, EntityName) { ...@@ -14,6 +14,11 @@ TEST(GenerateSchemaOrgTest, EntityName) {
EXPECT_STREQ(entity::kAboutPage, "AboutPage"); EXPECT_STREQ(entity::kAboutPage, "AboutPage");
} }
TEST(GenerateSchemaOrgTest, IsValidEntityName) {
EXPECT_TRUE(entity::IsValidEntityName(entity::kAboutPage));
EXPECT_FALSE(entity::IsValidEntityName("a made up name"));
}
TEST(GenerateSchemaOrgTest, PropertyName) { TEST(GenerateSchemaOrgTest, PropertyName) {
EXPECT_STREQ(property::kAcceptedAnswer, "acceptedAnswer"); EXPECT_STREQ(property::kAcceptedAnswer, "acceptedAnswer");
} }
......
...@@ -6,6 +6,9 @@ ...@@ -6,6 +6,9 @@
// Do not edit. // Do not edit.
#include "components/schema_org/{{ header_file }}.h" #include "components/schema_org/{{ header_file }}.h"
#include "base/containers/flat_set.h"
#include "base/no_destructor.h"
#include "base/strings/string_piece.h"
namespace schema_org { namespace schema_org {
namespace entity { namespace entity {
...@@ -14,5 +17,15 @@ namespace entity { ...@@ -14,5 +17,15 @@ namespace entity {
const char k{{entity[0]|upper}}{{entity[1:]}}[] = "{{entity}}"; const char k{{entity[0]|upper}}{{entity[1:]}}[] = "{{entity}}";
{% endfor %} {% endfor %}
bool IsValidEntityName(const std::string& entity_name) {
static const base::NoDestructor<base::flat_set<base::StringPiece>>
kValidEntityNames(base::flat_set<base::StringPiece>({
{%for entity in entities %}
k{{entity[0]|upper}}{{entity[1:]}},
{% endfor %}
}));
return kValidEntityNames->find(entity_name) != kValidEntityNames->end();
}
} // entity } // entity
} // schema_org } // schema_org
...@@ -17,6 +17,8 @@ namespace entity { ...@@ -17,6 +17,8 @@ namespace entity {
extern const char k{{entity[0]|upper}}{{entity[1:]}}[]; extern const char k{{entity[0]|upper}}{{entity[1:]}}[];
{% endfor %} {% endfor %}
bool IsValidEntityName(const std::string& entity_name);
} // namespace entity } // namespace entity
} // namespace schema_org } // namespace schema_org
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/schema_org/validator.h"
#include <vector>
#include "components/schema_org/common/improved_metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/schema_org_property_names.h"
namespace schema_org {
using improved::mojom::Entity;
using improved::mojom::EntityPtr;
// static
bool ValidateEntity(Entity* entity) {
if (!entity::IsValidEntityName(entity->type)) {
return false;
}
// Cycle through properties and remove any that have the wrong type.
auto it = entity->properties.begin();
while (it != entity->properties.end()) {
property::PropertyConfiguration config =
property::GetPropertyConfiguration((*it)->name);
if (!(*it)->values->string_values.empty() && !config.text) {
it = entity->properties.erase(it);
} else if (!(*it)->values->double_values.empty() && !config.number) {
it = entity->properties.erase(it);
} else if (!(*it)->values->time_values.empty() && !config.time) {
it = entity->properties.erase(it);
} else if (!(*it)->values->date_time_values.empty() && !config.date_time &&
!config.date) {
it = entity->properties.erase(it);
} else if (!(*it)->values->entity_values.empty()) {
if (config.thing_types.empty()) {
// Property is not supposed to have an entity type.
it = entity->properties.erase(it);
} else {
// Check all the entities nested in this property. Remove any invalid
// ones.
bool has_valid_entities = false;
auto nested_it = (*it)->values->entity_values.begin();
while (nested_it != (*it)->values->entity_values.end()) {
auto& nested_entity = *nested_it;
if (!ValidateEntity(nested_entity.get())) {
nested_it = (*it)->values->entity_values.erase(nested_it);
} else {
has_valid_entities = true;
++nested_it;
}
}
// If there were no valid entity values for this property, remove the
// whole property.
if (!has_valid_entities) {
it = entity->properties.erase(it);
} else {
++it;
}
}
} else {
++it;
}
}
return true;
}
} // namespace schema_org
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_SCHEMA_ORG_VALIDATOR_H_
#define COMPONENTS_SCHEMA_ORG_VALIDATOR_H_
#include "components/schema_org/common/improved_metadata.mojom-forward.h"
namespace schema_org {
// Validates and cleans up the Schema.org entity in-place. Invalid properties
// will be removed from the entity. Returns true if the entity was valid.
bool ValidateEntity(improved::mojom::Entity* entity);
} // namespace schema_org
#endif // COMPONENTS_SCHEMA_ORG_VALIDATOR_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <memory>
#include <utility>
#include <vector>
#include "components/schema_org/common/improved_metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/schema_org_property_names.h"
#include "components/schema_org/validator.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace schema_org {
using improved::mojom::Entity;
using improved::mojom::EntityPtr;
using improved::mojom::Property;
using improved::mojom::PropertyPtr;
using improved::mojom::Values;
class SchemaOrgValidatorTest : public testing::Test {};
TEST_F(SchemaOrgValidatorTest, InvalidEntityType) {
EntityPtr entity = Entity::New();
entity->type = "random entity type";
bool validated_entity = ValidateEntity(entity.get());
EXPECT_FALSE(validated_entity);
}
TEST_F(SchemaOrgValidatorTest, ValidStringPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAccessMode;
property->values = Values::New();
property->values->string_values.push_back("foo");
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidStringPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->string_values.push_back("foo");
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidNumberPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kSingleFamilyResidence;
PropertyPtr property = Property::New();
property->name = property::kAdditionalNumberOfGuests;
property->values = Values::New();
property->values->double_values.push_back(1.0);
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidNumberPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->double_values.push_back(1.0);
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidDateTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kLodgingBusiness;
PropertyPtr property = Property::New();
property->name = property::kCheckinTime;
property->values = Values::New();
property->values->date_time_values.push_back(
base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromMilliseconds(12999772800000)));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidDateTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->date_time_values.push_back(
base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromMilliseconds(12999772800000)));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kLodgingBusiness;
PropertyPtr property = Property::New();
property->name = property::kCheckinTime;
property->values = Values::New();
property->values->time_values.push_back(
base::TimeDelta::FromMilliseconds(12999772800000));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->time_values.push_back(
base::TimeDelta::FromMilliseconds(12999772800000));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value = Entity::New();
value->type = entity::kPostalAddress;
property->values->entity_values.push_back(std::move(value));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAccessMode;
property->values = Values::New();
EntityPtr value = Entity::New();
value->type = entity::kPostalAddress;
property->values->entity_values.push_back(std::move(value));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidRepeatedEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value1 = Entity::New();
value1->type = entity::kPostalAddress;
EntityPtr value2 = Entity::New();
value2->type = entity::kPostalAddress;
property->values->entity_values.push_back(std::move(value1));
property->values->entity_values.push_back(std::move(value2));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
EXPECT_EQ(2u, entity->properties[0]->values->entity_values.size());
}
// If one value of a repeated property is invalid but the other is not,
// validator should keep the outer property and remove only the invalid nested
// property.
TEST_F(SchemaOrgValidatorTest, MixedValidityRepeatedEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value1 = Entity::New();
value1->type = entity::kPostalAddress;
EntityPtr value2 = Entity::New();
value2->type = "bad address";
property->values->entity_values.push_back(std::move(value1));
property->values->entity_values.push_back(std::move(value2));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
EXPECT_EQ(1u, entity->properties[0]->values->entity_values.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidRepeatedEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value1 = Entity::New();
value1->type = "this is not a real type";
EntityPtr value2 = Entity::New();
value2->type = "bad address type";
property->values->entity_values.push_back(std::move(value1));
property->values->entity_values.push_back(std::move(value2));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
} // namespace schema_org
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment