Commit 1c31841c authored by Sam Bowen's avatar Sam Bowen Committed by Commit Bot

Allow strings and URLs in place of entities.

* Allow string whenever a property is text, Thing, or Enum type.
* Allow URL whenever a property is Thing or Enum type, /as long as/ the
  URL is a schema.org ID of an appropriate thing or enum.

https://schema.org/docs/gs.html#schemaorg_expected

Change-Id: I72edef99f705c64838f09829de6c34058b10b860
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2144401Reviewed-by: default avatarBecca Hughes <beccahughes@chromium.org>
Commit-Queue: Sam Bowen <sgbowen@google.com>
Cr-Commit-Position: refs/heads/master@{#758367}
parent 83fdc9b8
......@@ -75,14 +75,6 @@ bool ParseStringValue(const std::string& property_type,
schema_org::property::PropertyConfiguration prop_config =
schema_org::property::GetPropertyConfiguration(property_type);
if (prop_config.text) {
values->string_values.push_back(value.as_string());
return true;
}
if (prop_config.url) {
values->url_values.push_back(GURL(value));
return true;
}
if (prop_config.number) {
double d;
bool parsed_double = base::StringToDouble(value, &d);
......@@ -133,11 +125,17 @@ bool ParseStringValue(const std::string& property_type,
return true;
}
}
if (!prop_config.enum_types.empty()) {
if (!prop_config.thing_types.empty() || !prop_config.enum_types.empty() ||
prop_config.url) {
auto url = GURL(value);
if (!url.is_valid())
return false;
values->url_values.push_back(url);
if (url.is_valid()) {
values->url_values.push_back(url);
return true;
}
}
if (!prop_config.thing_types.empty() || !prop_config.enum_types.empty() ||
prop_config.text) {
values->string_values.push_back(value.as_string());
return true;
}
return false;
......
......@@ -312,6 +312,22 @@ TEST_F(SchemaOrgExtractorTest, StringValueRepresentingEnum) {
EXPECT_EQ(expected, extracted);
}
// The extractor should accept a string value for a property that has an
// expected entity type. https://schema.org/docs/gs.html#schemaorg_expected
TEST_F(SchemaOrgExtractorTest, StringValueForThingType) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\",\"author\": \"Google Chrome Developers\"}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(
CreateStringProperty("author", "Google Chrome Developers"));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, UrlValue) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", "
......
......@@ -46,7 +46,10 @@ bool ValidateEntity(Entity* entity) {
property::PropertyConfiguration config =
property::GetPropertyConfiguration((*it)->name);
if (!(*it)->values->string_values.empty() && !config.text) {
bool allows_text = config.text || !config.thing_types.empty() ||
!config.enum_types.empty();
if (!(*it)->values->string_values.empty() && !allows_text) {
it = entity->properties.erase(it);
} else if (!(*it)->values->double_values.empty() && !config.number) {
it = entity->properties.erase(it);
......@@ -110,6 +113,31 @@ bool ValidateEntity(Entity* entity) {
} else {
++it;
}
} else if (!config.thing_types.empty()) {
// Check all the url values in this property. Remove any ones that
// aren't a valid URL to an item in thing_types, or a descendant of an
// item in thing_types.
bool has_valid_entities = false;
auto nested_it = (*it)->values->url_values.begin();
while (nested_it != (*it)->values->url_values.end()) {
auto& url = *nested_it;
auto type_name = ObjectNameFromId(url.spec());
if (!type_name.has_value() ||
!EntityPropertyIsValidType(config, type_name.value())) {
nested_it = (*it)->values->url_values.erase(nested_it);
} else {
has_valid_entities = true;
++nested_it;
}
}
// If there were no valid url values representing entity types for this
// property, remove the whole property.
if (!has_valid_entities) {
it = entity->properties.erase(it);
} else {
++it;
}
} else {
// This property shouldn't have any url values according to the config.
it = entity->properties.erase(it);
......
......@@ -48,12 +48,28 @@ TEST_F(SchemaOrgValidatorTest, ValidStringPropertyValue) {
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, AllowsStringPropertyValueForThingType) {
EntityPtr entity = Entity::New();
entity->type = entity::kCreativeWork;
PropertyPtr property = Property::New();
property->name = property::kAuthor;
property->values = Values::New();
property->values->string_values.push_back("foo");
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidStringPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
entity->type = entity::kSingleFamilyResidence;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->name = property::kAdditionalNumberOfGuests;
property->values = Values::New();
property->values->string_values.push_back("foo");
......@@ -225,6 +241,25 @@ TEST_F(SchemaOrgValidatorTest, ValidEntityPropertyValueDescendedType) {
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, ValidUrlPropertyValueDescendedType) {
EntityPtr entity = Entity::New();
entity->type = entity::kVideoObject;
PropertyPtr property = Property::New();
property->name = property::kThumbnail;
property->values = Values::New();
// Barcode is descended from ImageObject, an acceptable type for the thumbnail
// property. So barcode should also be accepted.
property->values->url_values.push_back(GURL("http://schema.org/Barcode"));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, ValidRepeatedEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment