Commit 28cff5e2 authored by Sam Bowen's avatar Sam Bowen Committed by Commit Bot

Add validator for schema org entities and types in doc metadata mojo.

* Add a validator that removes properties that are not the right type.
* Add double, time, and timedelta types in the document metadata mojo
  structure.
* Use the new mojo types in document metadata extractor.

Downstream clank will not be able to read fields parsed into the new
type, but will just skip over them. We should consider updating support
there.
http://cs/clank/java/src/com/google/android/apps/chrome/icing/AppIndexingReporterInternal.java?l=133

Bug: 1044250
Change-Id: Icfd7f0db74abf7bd1261b30dc416c94a7a23b4c0
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2062907Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarBecca Hughes <beccahughes@chromium.org>
Commit-Queue: Sam Bowen <sgbowen@google.com>
Cr-Commit-Position: refs/heads/master@{#748357}
parent 57d82214
...@@ -7,11 +7,13 @@ source_set("unit_tests") { ...@@ -7,11 +7,13 @@ source_set("unit_tests") {
sources = [ sources = [
"extractor_unittest.cc", "extractor_unittest.cc",
"generate_schema_org_code_unittest.cc", "generate_schema_org_code_unittest.cc",
"validator_unittest.cc",
] ]
deps = [ deps = [
":extractor", ":extractor",
":generate_schema_org_code", ":generate_schema_org_code",
":schema_org",
":schema_org_properties", ":schema_org_properties",
"//base", "//base",
"//components/schema_org/common:mojom", "//components/schema_org/common:mojom",
...@@ -67,6 +69,20 @@ static_library("extractor") { ...@@ -67,6 +69,20 @@ static_library("extractor") {
"extractor.h", "extractor.h",
] ]
deps = [
"//components/schema_org:generate_schema_org_code",
"//components/schema_org:schema_org",
"//components/schema_org:schema_org_properties",
"//components/schema_org/common:mojom",
]
}
static_library("schema_org") {
sources = [
"validator.cc",
"validator.h",
]
deps = [ deps = [
"//components/schema_org:generate_schema_org_code", "//components/schema_org:generate_schema_org_code",
"//components/schema_org:schema_org_properties", "//components/schema_org:schema_org_properties",
......
...@@ -8,5 +8,8 @@ mojom("mojom") { ...@@ -8,5 +8,8 @@ mojom("mojom") {
generate_java = true generate_java = true
sources = [ "metadata.mojom" ] sources = [ "metadata.mojom" ]
public_deps = [ "//url/mojom:url_mojom_gurl" ] public_deps = [
"//mojo/public/mojom/base",
"//url/mojom:url_mojom_gurl",
]
} }
...@@ -4,12 +4,17 @@ ...@@ -4,12 +4,17 @@
module schema_org.mojom; module schema_org.mojom;
import "mojo/public/mojom/base/time.mojom";
// Due to the restriction of AppIndexing, all elements should be of the // Due to the restriction of AppIndexing, all elements should be of the
// same type. Non-array values are converted to arrays of one element. // same type. Non-array values are converted to arrays of one element.
union Values { union Values {
array<bool> bool_values; array<bool> bool_values;
array<int64> long_values; array<int64> long_values;
array<string> string_values; array<string> string_values;
array<double> double_values;
array<mojo_base.mojom.Time> date_time_values;
array<mojo_base.mojom.TimeDelta> time_values;
array<Entity> entity_values; array<Entity> entity_values;
}; };
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#include "base/values.h" #include "base/values.h"
#include "components/schema_org/common/metadata.mojom.h" #include "components/schema_org/common/metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h" #include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/validator.h"
namespace schema_org { namespace schema_org {
...@@ -45,11 +47,111 @@ bool IsSupportedType(const std::string& type) { ...@@ -45,11 +47,111 @@ bool IsSupportedType(const std::string& type) {
return kSupportedTypes.find(type) != kSupportedTypes.end(); return kSupportedTypes.find(type) != kSupportedTypes.end();
} }
void ExtractEntity(base::DictionaryValue*, mojom::Entity&, int recursionLevel); void ExtractEntity(base::DictionaryValue*, mojom::Entity&, int recursion_level);
// Initializes a vector of the appropriate type for the property.
bool InitializeStringValue(const std::string& property_type,
mojom::Values* values) {
schema_org::property::PropertyConfiguration prop_config =
schema_org::property::GetPropertyConfiguration(property_type);
if (prop_config.text) {
values->set_string_values(std::vector<std::string>());
} else if (prop_config.number) {
values->set_double_values(std::vector<double>());
} else if (prop_config.date_time || prop_config.date) {
values->set_date_time_values(std::vector<base::Time>());
} else if (prop_config.time) {
values->set_time_values(std::vector<base::TimeDelta>());
} else {
return false;
}
return true;
}
// Parses a string into a property value. The string may be parsed as a double,
// date, or time, depending on the types that the property supports. If the
// property supports text, uses the string itself.
bool ParseStringValue(const std::string& property_type,
base::StringPiece value,
mojom::Values* values) {
value = value.substr(0, kMaxStringLength);
schema_org::property::PropertyConfiguration prop_config =
schema_org::property::GetPropertyConfiguration(property_type);
if (prop_config.text) {
values->get_string_values().push_back(value.as_string());
return true;
}
if (prop_config.number) {
double d;
bool parsed_double = base::StringToDouble(value, &d);
if (parsed_double) {
values->get_double_values().push_back(d);
return true;
}
}
if (prop_config.date_time || prop_config.date) {
base::Time time;
bool parsed_time = base::Time::FromString(value.data(), &time);
if (parsed_time) {
values->get_date_time_values().push_back(time);
return true;
}
}
if (prop_config.time) {
base::Time time_of_day;
base::Time start_of_day;
bool parsed_time = base::Time::FromString(
("1970-01-01T" + value.as_string()).c_str(), &time_of_day);
bool parsed_day_start =
base::Time::FromString("1970-01-01T00:00:00", &start_of_day);
base::TimeDelta time = time_of_day - start_of_day;
// The string failed to parse as a DateTime, but did parse as a Time. Use
// this value, initializing the vector first. (We cannot initialize it in
// advance like the others, because we do not know if parsing will work in
// advance.)
if (parsed_time && parsed_day_start) {
if (!values->is_time_values()) {
values->set_time_values(std::vector<base::TimeDelta>());
}
values->get_time_values().push_back(time);
return true;
}
}
return false;
}
// Parses a property with multiple string values and places the result in
// values. This will be an array of a single type, depending on what kinds of
// types the property supports.
bool ParseRepeatedString(const base::Value::ListView& arr,
const std::string& property_type,
mojom::Values* values) {
if (!InitializeStringValue(property_type, values)) {
return false;
}
for (size_t j = 0; j < std::min(arr.size(), kMaxRepeatedSize); ++j) {
auto& list_item = arr[j];
if (list_item.type() != base::Value::Type::STRING) {
return false;
}
base::StringPiece v = list_item.GetString();
if (!ParseStringValue(property_type, v, values)) {
return false;
}
}
return true;
}
// Parses a repeated property value and places the result in values. The result
// will be an array of a single type.
bool ParseRepeatedValue(base::Value::ListView& arr, bool ParseRepeatedValue(base::Value::ListView& arr,
mojom::Values& values, const std::string& property_type,
int recursionLevel) { mojom::Values* values,
int recursion_level) {
if (arr.empty()) { if (arr.empty()) {
return false; return false;
} }
...@@ -58,30 +160,29 @@ bool ParseRepeatedValue(base::Value::ListView& arr, ...@@ -58,30 +160,29 @@ bool ParseRepeatedValue(base::Value::ListView& arr,
base::Value::Type type = base::Value::Type::NONE; base::Value::Type type = base::Value::Type::NONE;
for (size_t j = 0; j < std::min(arr.size(), kMaxRepeatedSize); ++j) { for (size_t j = 0; j < std::min(arr.size(), kMaxRepeatedSize); ++j) {
auto& listItem = arr[j]; auto& list_item = arr[j];
if (is_first_item) { if (is_first_item) {
is_first_item = false; is_first_item = false;
type = listItem.type(); type = list_item.type();
switch (type) { switch (type) {
case base::Value::Type::BOOLEAN: case base::Value::Type::BOOLEAN:
values.set_bool_values(std::vector<bool>()); values->set_bool_values(std::vector<bool>());
break; break;
case base::Value::Type::INTEGER: case base::Value::Type::INTEGER:
values.set_long_values(std::vector<int64_t>()); values->set_long_values(std::vector<int64_t>());
break; break;
case base::Value::Type::DOUBLE: case base::Value::Type::DOUBLE:
// App Indexing doesn't support double type, so just encode its // App Indexing doesn't support double type, so just encode its
// decimal value as a string instead. // decimal value as a string instead.
values.set_string_values(std::vector<std::string>()); values->set_string_values(std::vector<std::string>());
break; break;
case base::Value::Type::STRING: case base::Value::Type::STRING:
values.set_string_values(std::vector<std::string>()); return ParseRepeatedString(arr, property_type, values);
break;
case base::Value::Type::DICTIONARY: case base::Value::Type::DICTIONARY:
if (recursionLevel + 1 >= kMaxDepth) { if (recursion_level + 1 >= kMaxDepth) {
return false; return false;
} }
values.set_entity_values(std::vector<mojom::EntityPtr>()); values->set_entity_values(std::vector<mojom::EntityPtr>());
break; break;
case base::Value::Type::LIST: case base::Value::Type::LIST:
// App Indexing doesn't support nested arrays. // App Indexing doesn't support nested arrays.
...@@ -92,41 +193,36 @@ bool ParseRepeatedValue(base::Value::ListView& arr, ...@@ -92,41 +193,36 @@ bool ParseRepeatedValue(base::Value::ListView& arr,
} }
} }
if (listItem.type() != type) { if (list_item.type() != type) {
// App Indexing doesn't support mixed types. If there are mixed // App Indexing doesn't support mixed types. If there are mixed
// types in the parsed object, we will drop the property. // types in the parsed object, we will drop the property.
return false; return false;
} }
switch (listItem.type()) { switch (list_item.type()) {
case base::Value::Type::BOOLEAN: { case base::Value::Type::BOOLEAN: {
bool v; bool v;
listItem.GetAsBoolean(&v); list_item.GetAsBoolean(&v);
values.get_bool_values().push_back(v); values->get_bool_values().push_back(v);
} break; } break;
case base::Value::Type::INTEGER: { case base::Value::Type::INTEGER: {
int v = listItem.GetInt(); int v = list_item.GetInt();
values.get_long_values().push_back(v); values->get_long_values().push_back(v);
} break; } break;
case base::Value::Type::DOUBLE: { case base::Value::Type::DOUBLE: {
// App Indexing doesn't support double type, so just encode its decimal // App Indexing doesn't support double type, so just encode its decimal
// value as a string instead. // value as a string instead.
double v = listItem.GetDouble(); double v = list_item.GetDouble();
std::string s = base::NumberToString(v); std::string s = base::NumberToString(v);
s = s.substr(0, kMaxStringLength); s = s.substr(0, kMaxStringLength);
values.get_string_values().push_back(s); values->get_string_values().push_back(std::move(s));
} break;
case base::Value::Type::STRING: {
std::string v = listItem.GetString();
v = v.substr(0, kMaxStringLength);
values.get_string_values().push_back(v);
} break; } break;
case base::Value::Type::DICTIONARY: { case base::Value::Type::DICTIONARY: {
values.get_entity_values().push_back(mojom::Entity::New()); values->get_entity_values().push_back(mojom::Entity::New());
base::DictionaryValue* dict_value = nullptr; base::DictionaryValue* dict_value = nullptr;
if (listItem.GetAsDictionary(&dict_value)) { if (list_item.GetAsDictionary(&dict_value)) {
ExtractEntity(dict_value, *(values.get_entity_values().at(j)), ExtractEntity(dict_value, *(values->get_entity_values().at(j)),
recursionLevel + 1); recursion_level + 1);
} }
} break; } break;
default: default:
...@@ -138,8 +234,8 @@ bool ParseRepeatedValue(base::Value::ListView& arr, ...@@ -138,8 +234,8 @@ bool ParseRepeatedValue(base::Value::ListView& arr,
void ExtractEntity(base::DictionaryValue* val, void ExtractEntity(base::DictionaryValue* val,
mojom::Entity& entity, mojom::Entity& entity,
int recursionLevel) { int recursion_level) {
if (recursionLevel >= kMaxDepth) { if (recursion_level >= kMaxDepth) {
return; return;
} }
...@@ -160,41 +256,50 @@ void ExtractEntity(base::DictionaryValue* val, ...@@ -160,41 +256,50 @@ void ExtractEntity(base::DictionaryValue* val,
} }
property->values = mojom::Values::New(); property->values = mojom::Values::New();
if (entry.second.is_bool()) { switch (entry.second.type()) {
bool v; case base::Value::Type::BOOLEAN:
val->GetBoolean(entry.first, &v); property->values->set_bool_values({entry.second.GetBool()});
property->values->set_bool_values({v}); break;
} else if (entry.second.is_int()) { case base::Value::Type::INTEGER:
int v; property->values->set_long_values({entry.second.GetInt()});
val->GetInteger(entry.first, &v); break;
property->values->set_long_values({v}); case base::Value::Type::DOUBLE:
} else if (entry.second.is_double()) { property->values->set_double_values({entry.second.GetDouble()});
double v; break;
val->GetDouble(entry.first, &v); case base::Value::Type::STRING: {
std::string s = base::NumberToString(v); base::StringPiece v = entry.second.GetString();
s = s.substr(0, kMaxStringLength); if (!(InitializeStringValue(property->name, property->values.get()) &&
property->values->set_string_values({s}); ParseStringValue(property->name, v, property->values.get()))) {
} else if (entry.second.is_string()) { continue;
std::string v; }
val->GetString(entry.first, &v); break;
v = v.substr(0, kMaxStringLength);
property->values->set_string_values({v});
} else if (entry.second.is_dict()) {
if (recursionLevel + 1 >= kMaxDepth) {
continue;
} }
property->values->set_entity_values(std::vector<mojom::EntityPtr>()); case base::Value::Type::DICTIONARY: {
property->values->get_entity_values().push_back(mojom::Entity::New()); if (recursion_level + 1 >= kMaxDepth) {
continue;
}
property->values->set_entity_values(std::vector<mojom::EntityPtr>());
property->values->get_entity_values().push_back(mojom::Entity::New());
base::DictionaryValue* dict_value = nullptr; base::DictionaryValue* dict_value = nullptr;
if (!entry.second.GetAsDictionary(&dict_value)) { if (!entry.second.GetAsDictionary(&dict_value)) {
continue; continue;
}
ExtractEntity(dict_value,
*(property->values->get_entity_values().at(0)),
recursion_level + 1);
break;
} }
ExtractEntity(dict_value, *(property->values->get_entity_values().at(0)), case base::Value::Type::LIST: {
recursionLevel + 1); base::Value::ListView list_view = entry.second.GetList();
} else if (entry.second.is_list()) { if (!ParseRepeatedValue(list_view, property->name,
base::Value::ListView list_view = entry.second.GetList(); property->values.get(), recursion_level)) {
if (!ParseRepeatedValue(list_view, *(property->values), recursionLevel)) { continue;
}
break;
}
default: {
// Unsupported value type. Skip this property.
continue; continue;
} }
} }
...@@ -225,7 +330,14 @@ mojom::EntityPtr Extractor::Extract(const std::string& content) { ...@@ -225,7 +330,14 @@ mojom::EntityPtr Extractor::Extract(const std::string& content) {
return nullptr; return nullptr;
} }
return ExtractTopLevelEntity(dict_value); mojom::EntityPtr entity = ExtractTopLevelEntity(dict_value);
bool is_valid = false;
if (!entity.is_null()) {
is_valid = ValidateEntity(entity.get());
}
return is_valid ? std::move(entity) : nullptr;
} }
} // namespace schema_org } // namespace schema_org
...@@ -37,6 +37,14 @@ class SchemaOrgExtractorTest : public testing::Test { ...@@ -37,6 +37,14 @@ class SchemaOrgExtractorTest : public testing::Test {
PropertyPtr CreateLongProperty(const std::string& name, const int64_t& value); PropertyPtr CreateLongProperty(const std::string& name, const int64_t& value);
PropertyPtr CreateDoubleProperty(const std::string& name, double value);
PropertyPtr CreateDateTimeProperty(const std::string& name,
const base::Time& value);
PropertyPtr CreateTimeProperty(const std::string& name,
const base::TimeDelta& value);
PropertyPtr CreateEntityProperty(const std::string& name, EntityPtr value); PropertyPtr CreateEntityProperty(const std::string& name, EntityPtr value);
}; };
...@@ -69,6 +77,36 @@ PropertyPtr SchemaOrgExtractorTest::CreateLongProperty(const std::string& name, ...@@ -69,6 +77,36 @@ PropertyPtr SchemaOrgExtractorTest::CreateLongProperty(const std::string& name,
return property; return property;
} }
PropertyPtr SchemaOrgExtractorTest::CreateDoubleProperty(
const std::string& name,
double value) {
PropertyPtr property = Property::New();
property->name = name;
property->values = Values::New();
property->values->set_double_values({value});
return property;
}
PropertyPtr SchemaOrgExtractorTest::CreateDateTimeProperty(
const std::string& name,
const base::Time& value) {
PropertyPtr property = Property::New();
property->name = name;
property->values = Values::New();
property->values->set_date_time_values({value});
return property;
}
PropertyPtr SchemaOrgExtractorTest::CreateTimeProperty(
const std::string& name,
const base::TimeDelta& value) {
PropertyPtr property = Property::New();
property->name = name;
property->values = Values::New();
property->values->set_time_values({value});
return property;
}
PropertyPtr SchemaOrgExtractorTest::CreateEntityProperty( PropertyPtr SchemaOrgExtractorTest::CreateEntityProperty(
const std::string& name, const std::string& name,
EntityPtr value) { EntityPtr value) {
...@@ -96,7 +134,7 @@ TEST_F(SchemaOrgExtractorTest, Basic) { ...@@ -96,7 +134,7 @@ TEST_F(SchemaOrgExtractorTest, Basic) {
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
} }
TEST_F(SchemaOrgExtractorTest, booleanValue) { TEST_F(SchemaOrgExtractorTest, BooleanValue) {
EntityPtr extracted = EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"requiresSubscription\": true }"); Extract("{\"@type\": \"VideoObject\", \"requiresSubscription\": true }");
ASSERT_FALSE(extracted.is_null()); ASSERT_FALSE(extracted.is_null());
...@@ -109,7 +147,7 @@ TEST_F(SchemaOrgExtractorTest, booleanValue) { ...@@ -109,7 +147,7 @@ TEST_F(SchemaOrgExtractorTest, booleanValue) {
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
} }
TEST_F(SchemaOrgExtractorTest, longValue) { TEST_F(SchemaOrgExtractorTest, LongValue) {
EntityPtr extracted = EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"position\": 111 }"); Extract("{\"@type\": \"VideoObject\", \"position\": 111 }");
ASSERT_FALSE(extracted.is_null()); ASSERT_FALSE(extracted.is_null());
...@@ -121,14 +159,75 @@ TEST_F(SchemaOrgExtractorTest, longValue) { ...@@ -121,14 +159,75 @@ TEST_F(SchemaOrgExtractorTest, longValue) {
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
} }
TEST_F(SchemaOrgExtractorTest, doubleValue) { TEST_F(SchemaOrgExtractorTest, DoubleValue) {
EntityPtr extracted = EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"width\": 111.5 }"); Extract("{\"@type\": \"VideoObject\", \"copyrightYear\": 1999.5 }");
ASSERT_FALSE(extracted.is_null()); ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New(); EntityPtr expected = Entity::New();
expected->type = "VideoObject"; expected->type = "VideoObject";
expected->properties.push_back(CreateStringProperty("width", "111.5")); expected->properties.push_back(CreateDoubleProperty("copyrightYear", 1999.5));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, StringValueRepresentingDouble) {
EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\",\"copyrightYear\": \"1999.5\"}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(CreateDoubleProperty("copyrightYear", 1999.5));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, StringValueRepresentingTime) {
EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\",\"startTime\": \"05:30:00\"}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(CreateTimeProperty(
"startTime", base::TimeDelta::FromMinutes(60 * 5 + 30)));
EXPECT_EQ(expected, extracted);
}
// startTime can be a DateTime or a Time. If it parses as DateTime successfully,
// we should use that type.
TEST_F(SchemaOrgExtractorTest, StringValueRepresentingDateTimeOrTime) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\",\"startTime\": "
"\"2012-12-12T00:00:00 GMT\"}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(CreateDateTimeProperty(
"startTime", base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromMilliseconds(12999744000000))));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, StringValueRepresentingDateTime) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\",\"dateCreated\": "
"\"2012-12-12T00:00:00 GMT\"}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(CreateDateTimeProperty(
"dateCreated", base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromMilliseconds(12999744000000))));
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
} }
...@@ -265,27 +364,43 @@ TEST_F(SchemaOrgExtractorTest, TruncateTooManyValuesInField) { ...@@ -265,27 +364,43 @@ TEST_F(SchemaOrgExtractorTest, TruncateTooManyValuesInField) {
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
} }
TEST_F(SchemaOrgExtractorTest, truncateTooManyFields) { TEST_F(SchemaOrgExtractorTest, TruncateTooManyProperties) {
std::stringstream tooManyFields; // Create an entity with more than the supported number of properties. All the
for (int i = 0; i < 26; ++i) { // properties must be valid to be included. 26 properties below, should
tooManyFields << "\"" << i << "\": \"a\""; // truncate to 25.
if (i != 25) { EntityPtr extracted = Extract(
tooManyFields << ","; "{\"@type\": \"VideoObject\","
} "\"name\": \"a video!\","
} "\"transcript\":\"a short movie\","
EntityPtr extracted = "\"videoFrameSize\":\"1200x800\","
Extract("{\"@type\": \"VideoObject\"," + tooManyFields.str() + "}"); "\"videoQuality\":\"high\","
ASSERT_FALSE(extracted.is_null()); "\"bitrate\":\"24mbps\","
"\"contentSize\":\"8MB\","
EntityPtr expected = Entity::New(); "\"encodingFormat\":\"H264\","
expected->type = "VideoObject"; "\"accessMode\":\"visual\","
"\"accessibilitySummary\":\"short description\","
"\"alternativeHeadline\":\"OR other title\","
"\"award\":\"best picture\","
"\"educationalUse\":\"assignment\","
"\"headline\":\"headline\","
"\"interactivityType\":\"active\","
"\"keywords\":\"video\","
"\"learningResourceType\":\"presentation\","
"\"material\":\"film\","
"\"mentions\":\"other work\","
"\"schemaVersion\":\"http://schema.org/version/2.0/\","
"\"text\":\"a short work\","
"\"typicalAgeRange\":\"5-\","
"\"version\":\"5\","
"\"alternateName\":\"other title\","
"\"description\":\"a short description\","
"\"disambiguatingDescription\":\"clarifying point\","
"\"identifier\":\"ID12345\""
"}");
for (int i = 0; i < 25; ++i) { ASSERT_FALSE(extracted.is_null());
expected->properties.push_back(
CreateStringProperty(base::NumberToString(i), "a"));
}
EXPECT_EQ(expected->properties.size(), extracted->properties.size()); EXPECT_EQ(25u, extracted->properties.size());
} }
TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithEmptyArray) { TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithEmptyArray) {
...@@ -323,12 +438,12 @@ TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithNestedArray) { ...@@ -323,12 +438,12 @@ TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithNestedArray) {
TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) { TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) {
EntityPtr extracted = Extract( EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", \"name\": \"a video!\"," "{\"@type\": \"VideoObject\", \"name\": \"a video!\","
"\"1\": {" "\"actor\": {"
" \"2\": {" " \"address\": {"
" \"3\": {" " \"addressCountry\": {"
" \"4\": {" " \"containedInPlace\": {"
" \"5\": {" " \"containedInPlace\": {"
" \"6\": 7" " \"name\": \"matroska\""
" }" " }"
" }" " }"
" }" " }"
...@@ -349,10 +464,14 @@ TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) { ...@@ -349,10 +464,14 @@ TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) {
EntityPtr entity4 = Entity::New(); EntityPtr entity4 = Entity::New();
entity4->type = "Thing"; entity4->type = "Thing";
entity3->properties.push_back(CreateEntityProperty("4", std::move(entity4))); entity3->properties.push_back(
entity2->properties.push_back(CreateEntityProperty("3", std::move(entity3))); CreateEntityProperty("containedInPlace", std::move(entity4)));
entity1->properties.push_back(CreateEntityProperty("2", std::move(entity2))); entity2->properties.push_back(
expected->properties.push_back(CreateEntityProperty("1", std::move(entity1))); CreateEntityProperty("addressCountry", std::move(entity3)));
entity1->properties.push_back(
CreateEntityProperty("address", std::move(entity2)));
expected->properties.push_back(
CreateEntityProperty("actor", std::move(entity1)));
expected->properties.push_back(CreateStringProperty("name", "a video!")); expected->properties.push_back(CreateStringProperty("name", "a video!"));
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
...@@ -361,11 +480,11 @@ TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) { ...@@ -361,11 +480,11 @@ TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) {
TEST_F(SchemaOrgExtractorTest, MaxNestingDepthWithTerminalProperty) { TEST_F(SchemaOrgExtractorTest, MaxNestingDepthWithTerminalProperty) {
EntityPtr extracted = Extract( EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", \"name\": \"a video!\"," "{\"@type\": \"VideoObject\", \"name\": \"a video!\","
"\"1\": {" "\"actor\": {"
" \"2\": {" " \"address\": {"
" \"3\": {" " \"addressCountry\": {"
" \"4\": {" " \"containedInPlace\": {"
" \"5\": 6" " \"name\": \"matroska\""
" }" " }"
" }" " }"
" }" " }"
...@@ -385,12 +504,16 @@ TEST_F(SchemaOrgExtractorTest, MaxNestingDepthWithTerminalProperty) { ...@@ -385,12 +504,16 @@ TEST_F(SchemaOrgExtractorTest, MaxNestingDepthWithTerminalProperty) {
EntityPtr entity4 = Entity::New(); EntityPtr entity4 = Entity::New();
entity4->type = "Thing"; entity4->type = "Thing";
entity4->properties.push_back(CreateLongProperty("5", 6)); entity4->properties.push_back(CreateStringProperty("name", "matroska"));
entity3->properties.push_back(CreateEntityProperty("4", std::move(entity4))); entity3->properties.push_back(
entity2->properties.push_back(CreateEntityProperty("3", std::move(entity3))); CreateEntityProperty("containedInPlace", std::move(entity4)));
entity1->properties.push_back(CreateEntityProperty("2", std::move(entity2))); entity2->properties.push_back(
CreateEntityProperty("addressCountry", std::move(entity3)));
entity1->properties.push_back(
CreateEntityProperty("address", std::move(entity2)));
expected->properties.push_back(CreateEntityProperty("1", std::move(entity1))); expected->properties.push_back(
CreateEntityProperty("actor", std::move(entity1)));
expected->properties.push_back(CreateStringProperty("name", "a video!")); expected->properties.push_back(CreateStringProperty("name", "a video!"));
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
......
...@@ -14,6 +14,11 @@ TEST(GenerateSchemaOrgTest, EntityName) { ...@@ -14,6 +14,11 @@ TEST(GenerateSchemaOrgTest, EntityName) {
EXPECT_STREQ(entity::kAboutPage, "AboutPage"); EXPECT_STREQ(entity::kAboutPage, "AboutPage");
} }
TEST(GenerateSchemaOrgTest, IsValidEntityName) {
EXPECT_TRUE(entity::IsValidEntityName(entity::kAboutPage));
EXPECT_FALSE(entity::IsValidEntityName("a made up name"));
}
TEST(GenerateSchemaOrgTest, PropertyName) { TEST(GenerateSchemaOrgTest, PropertyName) {
EXPECT_STREQ(property::kAcceptedAnswer, "acceptedAnswer"); EXPECT_STREQ(property::kAcceptedAnswer, "acceptedAnswer");
} }
......
...@@ -6,6 +6,9 @@ ...@@ -6,6 +6,9 @@
// Do not edit. // Do not edit.
#include "components/schema_org/{{ header_file }}.h" #include "components/schema_org/{{ header_file }}.h"
#include "base/containers/flat_set.h"
#include "base/no_destructor.h"
#include "base/strings/string_piece.h"
namespace schema_org { namespace schema_org {
namespace entity { namespace entity {
...@@ -14,5 +17,15 @@ namespace entity { ...@@ -14,5 +17,15 @@ namespace entity {
const char k{{entity[0]|upper}}{{entity[1:]}}[] = "{{entity}}"; const char k{{entity[0]|upper}}{{entity[1:]}}[] = "{{entity}}";
{% endfor %} {% endfor %}
bool IsValidEntityName(const std::string& entity_name) {
static const base::NoDestructor<base::flat_set<base::StringPiece>>
kValidEntityNames(base::flat_set<base::StringPiece>({
{%for entity in entities %}
k{{entity[0]|upper}}{{entity[1:]}},
{% endfor %}
}));
return kValidEntityNames->find(entity_name) != kValidEntityNames->end();
}
} // entity } // entity
} // schema_org } // schema_org
...@@ -17,6 +17,8 @@ namespace entity { ...@@ -17,6 +17,8 @@ namespace entity {
extern const char k{{entity[0]|upper}}{{entity[1:]}}[]; extern const char k{{entity[0]|upper}}{{entity[1:]}}[];
{% endfor %} {% endfor %}
bool IsValidEntityName(const std::string& entity_name);
} // namespace entity } // namespace entity
} // namespace schema_org } // namespace schema_org
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/schema_org/validator.h"
#include <vector>
#include "components/schema_org/common/metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/schema_org_property_names.h"
namespace schema_org {
using mojom::Entity;
using mojom::EntityPtr;
// static
bool ValidateEntity(Entity* entity) {
if (!entity::IsValidEntityName(entity->type)) {
return false;
}
// Cycle through properties and remove any that have the wrong type.
auto it = entity->properties.begin();
while (it != entity->properties.end()) {
property::PropertyConfiguration config =
property::GetPropertyConfiguration((*it)->name);
if ((*it)->values->is_string_values() && !config.text) {
it = entity->properties.erase(it);
} else if ((*it)->values->is_double_values() && !config.number) {
it = entity->properties.erase(it);
} else if ((*it)->values->is_time_values() && !config.time) {
it = entity->properties.erase(it);
} else if ((*it)->values->is_date_time_values() && !config.date_time &&
!config.date) {
it = entity->properties.erase(it);
} else if ((*it)->values->is_entity_values()) {
if (config.thing_types.empty()) {
// Property is not supposed to have an entity type.
it = entity->properties.erase(it);
} else {
// Check all the entities nested in this property. Remove any invalid
// ones.
bool has_valid_entities = false;
auto nested_it = (*it)->values->get_entity_values().begin();
while (nested_it != (*it)->values->get_entity_values().end()) {
auto& nested_entity = *nested_it;
if (!ValidateEntity(nested_entity.get())) {
nested_it = (*it)->values->get_entity_values().erase(nested_it);
} else {
has_valid_entities = true;
++nested_it;
}
}
// If there were no valid entity values for this property, remove the
// whole property.
if (!has_valid_entities) {
it = entity->properties.erase(it);
} else {
++it;
}
}
} else {
++it;
}
}
return true;
}
} // namespace schema_org
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_SCHEMA_ORG_VALIDATOR_H_
#define COMPONENTS_SCHEMA_ORG_VALIDATOR_H_
#include "components/schema_org/common/metadata.mojom-forward.h"
namespace schema_org {
// Validates and cleans up the Schema.org entity in-place. Invalid properties
// will be removed from the entity. Returns true if the entity was valid.
bool ValidateEntity(mojom::Entity* entity);
} // namespace schema_org
#endif // COMPONENTS_SCHEMA_ORG_VALIDATOR_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <memory>
#include <utility>
#include <vector>
#include "components/schema_org/common/metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/schema_org_property_names.h"
#include "components/schema_org/validator.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace schema_org {
using mojom::Entity;
using mojom::EntityPtr;
using mojom::Property;
using mojom::PropertyPtr;
using mojom::Values;
class SchemaOrgValidatorTest : public testing::Test {};
TEST_F(SchemaOrgValidatorTest, InvalidEntityType) {
EntityPtr entity = Entity::New();
entity->type = "random entity type";
bool validated_entity = ValidateEntity(entity.get());
EXPECT_FALSE(validated_entity);
}
TEST_F(SchemaOrgValidatorTest, ValidStringPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAccessMode;
property->values = Values::New();
property->values->set_string_values({"foo"});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidStringPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->set_string_values({"foo"});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidNumberPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kSingleFamilyResidence;
PropertyPtr property = Property::New();
property->name = property::kAdditionalNumberOfGuests;
property->values = Values::New();
property->values->set_double_values({1.0});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidNumberPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->set_double_values({1.0});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidDateTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kLodgingBusiness;
PropertyPtr property = Property::New();
property->name = property::kCheckinTime;
property->values = Values::New();
property->values->set_date_time_values(
{base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromMilliseconds(12999772800000))});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidDateTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->set_date_time_values(
{base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromMilliseconds(12999772800000))});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kLodgingBusiness;
PropertyPtr property = Property::New();
property->name = property::kCheckinTime;
property->values = Values::New();
property->values->set_time_values(
{base::TimeDelta::FromMilliseconds(12999772800000)});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->set_time_values(
{base::TimeDelta::FromMilliseconds(12999772800000)});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value = Entity::New();
value->type = entity::kPostalAddress;
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAccessMode;
property->values = Values::New();
EntityPtr value = Entity::New();
value->type = entity::kPostalAddress;
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidRepeatedEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value1 = Entity::New();
value1->type = entity::kPostalAddress;
EntityPtr value2 = Entity::New();
value2->type = entity::kPostalAddress;
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value1));
property->values->get_entity_values().push_back(std::move(value2));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
EXPECT_EQ(2u, entity->properties[0]->values->get_entity_values().size());
}
// If one value of a repeated property is invalid but the other is not,
// validator should keep the outer property and remove only the invalid nested
// property.
TEST_F(SchemaOrgValidatorTest, MixedValidityRepeatedEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value1 = Entity::New();
value1->type = entity::kPostalAddress;
EntityPtr value2 = Entity::New();
value2->type = "bad address";
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value1));
property->values->get_entity_values().push_back(std::move(value2));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
EXPECT_EQ(1u, entity->properties[0]->values->get_entity_values().size());
}
TEST_F(SchemaOrgValidatorTest, InvalidRepeatedEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value1 = Entity::New();
value1->type = "this is not a real type";
EntityPtr value2 = Entity::New();
value2->type = "bad address type";
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value1));
property->values->get_entity_values().push_back(std::move(value2));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
} // namespace schema_org
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment