Commit 8bfcd04e authored by Sam Bowen's avatar Sam Bowen Committed by Commit Bot

Support enums in generated schema.org code and extractor

Generate code for types inheriting from Enumeration and use this code in
the extractor and the validator. The extractor will accept any strings
that start with "http[s]://schema.org", while the validator will ensure
the strings match one of the enum options for one of the enum types in
the property config.

Previously these were treated as a nested entity type, but we will only
ever find enum as string values.

Bug: 1065511
Change-Id: I53b7d3cf290c94f44ede7776eb2cb287c21e513c
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2128527
Commit-Queue: Sam Bowen <sgbowen@google.com>
Reviewed-by: default avatarBecca Hughes <beccahughes@chromium.org>
Cr-Commit-Position: refs/heads/master@{#755189}
parent 94681326
...@@ -27,6 +27,8 @@ action("generate_schema_org_code") { ...@@ -27,6 +27,8 @@ action("generate_schema_org_code") {
sources = [ sources = [
"templates/schema_org_entity_names.cc.tmpl", "templates/schema_org_entity_names.cc.tmpl",
"templates/schema_org_entity_names.h.tmpl", "templates/schema_org_entity_names.h.tmpl",
"templates/schema_org_enums.cc.tmpl",
"templates/schema_org_enums.h.tmpl",
"templates/schema_org_property_configurations.cc.tmpl", "templates/schema_org_property_configurations.cc.tmpl",
"templates/schema_org_property_configurations.h.tmpl", "templates/schema_org_property_configurations.h.tmpl",
"templates/schema_org_property_names.cc.tmpl", "templates/schema_org_property_names.cc.tmpl",
...@@ -47,6 +49,8 @@ action("generate_schema_org_code") { ...@@ -47,6 +49,8 @@ action("generate_schema_org_code") {
"$target_gen_dir/schema_org_property_configurations.cc", "$target_gen_dir/schema_org_property_configurations.cc",
"$target_gen_dir/schema_org_property_names.h", "$target_gen_dir/schema_org_property_names.h",
"$target_gen_dir/schema_org_property_names.cc", "$target_gen_dir/schema_org_property_names.cc",
"$target_gen_dir/schema_org_enums.h",
"$target_gen_dir/schema_org_enums.cc",
] ]
} }
...@@ -54,6 +58,8 @@ static_library("schema_org_properties") { ...@@ -54,6 +58,8 @@ static_library("schema_org_properties") {
sources = [ sources = [
"$target_gen_dir/schema_org_entity_names.cc", "$target_gen_dir/schema_org_entity_names.cc",
"$target_gen_dir/schema_org_entity_names.h", "$target_gen_dir/schema_org_entity_names.h",
"$target_gen_dir/schema_org_enums.cc",
"$target_gen_dir/schema_org_enums.h",
"$target_gen_dir/schema_org_property_configurations.cc", "$target_gen_dir/schema_org_property_configurations.cc",
"$target_gen_dir/schema_org_property_configurations.h", "$target_gen_dir/schema_org_property_configurations.h",
"$target_gen_dir/schema_org_property_names.cc", "$target_gen_dir/schema_org_property_names.cc",
......
...@@ -121,6 +121,13 @@ bool ParseStringValue(const std::string& property_type, ...@@ -121,6 +121,13 @@ bool ParseStringValue(const std::string& property_type,
return true; return true;
} }
} }
if (!prop_config.enum_types.empty()) {
auto url = GURL(value);
if (!url.is_valid())
return false;
values->url_values.push_back(url);
return true;
}
return false; return false;
} }
......
...@@ -257,6 +257,26 @@ TEST_F(SchemaOrgExtractorTest, StringValueRepresentingDateTime) { ...@@ -257,6 +257,26 @@ TEST_F(SchemaOrgExtractorTest, StringValueRepresentingDateTime) {
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
} }
TEST_F(SchemaOrgExtractorTest, StringValueRepresentingEnum) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\",\"potentialAction\": {\"@type\": "
"\"Action\", \"actionStatus\": "
"\"http://schema.org/ActiveActionStatus\"}}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
EntityPtr action = Entity::New();
action->type = "Action";
action->properties.push_back(CreateUrlProperty(
"actionStatus", GURL("http://schema.org/ActiveActionStatus")));
expected->properties.push_back(
CreateEntityProperty("potentialAction", std::move(action)));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, UrlValue) { TEST_F(SchemaOrgExtractorTest, UrlValue) {
EntityPtr extracted = Extract( EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", " "{\"@type\": \"VideoObject\", "
......
...@@ -41,6 +41,22 @@ def get_schema_obj(obj_id, schema): ...@@ -41,6 +41,22 @@ def get_schema_obj(obj_id, schema):
return matches[0] if len(matches) == 1 else None return matches[0] if len(matches) == 1 else None
def is_enum_type(class_obj):
if 'rdfs:subClassOf' in class_obj:
parent_class = class_obj['rdfs:subClassOf']
if isinstance(parent_class, list):
return any(parent['@id'] == schema_org_id('Enumeration')
for parent in parent_class)
return parent_class['@id'] == schema_org_id('Enumeration')
def find_enum_options(obj_id, schema):
return [
object_name_from_id(obj['@id']) for obj in schema['@graph']
if obj['@type'] == obj_id
]
def get_root_type(the_class, schema): def get_root_type(the_class, schema):
"""Get the base type the class is descended from.""" """Get the base type the class is descended from."""
class_obj = get_schema_obj(the_class['@id'], schema) class_obj = get_schema_obj(the_class['@id'], schema)
...@@ -57,17 +73,25 @@ def get_root_type(the_class, schema): ...@@ -57,17 +73,25 @@ def get_root_type(the_class, schema):
and schema_org_id('DataType') in class_obj['@type']): and schema_org_id('DataType') in class_obj['@type']):
return class_obj return class_obj
if 'rdfs:subClassOf' in class_obj: if 'rdfs:subClassOf' in class_obj:
subclass = class_obj['rdfs:subClassOf'] parent_class = class_obj['rdfs:subClassOf']
# All classes that use multiple inheritance are Thing type. # All classes that use multiple inheritance are Thing type.
if isinstance(subclass, list): if isinstance(parent_class, list):
return get_schema_obj(schema_org_id('Thing'), schema) return get_schema_obj(schema_org_id('Thing'), schema)
return get_root_type(subclass, schema) # Enumeration classes are treated specially. Return the specific type
# of enum this class is.
if parent_class['@id'] == schema_org_id('Enumeration'):
return class_obj
return get_root_type(parent_class, schema)
return class_obj return class_obj
def parse_property(prop, schema): def parse_property(prop, schema):
"""Parse out details about the property, including what type it can be.""" """Parse out details about the property, including what type it can be."""
parsed_prop = {'name': object_name_from_id(prop['@id']), 'thing_types': []} parsed_prop = {
'name': object_name_from_id(prop['@id']),
'thing_types': [],
'enum_types': []
}
if not schema_org_id('rangeIncludes') in prop: if not schema_org_id('rangeIncludes') in prop:
return parsed_prop return parsed_prop
...@@ -94,12 +118,14 @@ def parse_property(prop, schema): ...@@ -94,12 +118,14 @@ def parse_property(prop, schema):
parsed_prop['has_date_time'] = True parsed_prop['has_date_time'] = True
elif root_type['@id'] == schema_org_id('URL'): elif root_type['@id'] == schema_org_id('URL'):
parsed_prop['has_url'] = True parsed_prop['has_url'] = True
elif is_enum_type(root_type):
parsed_prop['enum_types'].append(possible_type['@id'])
return parsed_prop return parsed_prop
def get_template_vars(schema_file_path): def get_template_vars(schema_file_path):
"""Read the needed template variables from the schema file.""" """Read the needed template variables from the schema file."""
template_vars = {'entities': [], 'properties': []} template_vars = {'entities': [], 'properties': [], 'enums': []}
with open(schema_file_path) as schema_file: with open(schema_file_path) as schema_file:
schema = json.loads(schema_file.read()) schema = json.loads(schema_file.read())
...@@ -107,6 +133,15 @@ def get_template_vars(schema_file_path): ...@@ -107,6 +133,15 @@ def get_template_vars(schema_file_path):
for thing in schema['@graph']: for thing in schema['@graph']:
if thing['@type'] == 'rdfs:Class': if thing['@type'] == 'rdfs:Class':
template_vars['entities'].append(object_name_from_id(thing['@id'])) template_vars['entities'].append(object_name_from_id(thing['@id']))
if is_enum_type(thing):
template_vars['enums'].append({
'name':
object_name_from_id(thing['@id']),
'id':
thing['@id'],
'options':
find_enum_options(thing['@id'], schema)
})
elif thing['@type'] == 'rdf:Property': elif thing['@type'] == 'rdf:Property':
template_vars['properties'].append(parse_property(thing, schema)) template_vars['properties'].append(parse_property(thing, schema))
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
// found in the LICENSE file. // found in the LICENSE file.
#include "components/schema_org/schema_org_entity_names.h" #include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_enums.h"
#include "components/schema_org/schema_org_property_configurations.h" #include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/schema_org_property_names.h" #include "components/schema_org/schema_org_property_names.h"
#include "testing/gmock/include/gmock/gmock.h" #include "testing/gmock/include/gmock/gmock.h"
...@@ -50,11 +51,15 @@ TEST(GenerateSchemaOrgCodeTest, GetPropertyConfigurationSetsUrl) { ...@@ -50,11 +51,15 @@ TEST(GenerateSchemaOrgCodeTest, GetPropertyConfigurationSetsUrl) {
} }
TEST(GenerateSchemaOrgCodeTest, GetPropertyConfigurationSetsThingType) { TEST(GenerateSchemaOrgCodeTest, GetPropertyConfigurationSetsThingType) {
EXPECT_THAT(property::GetPropertyConfiguration(property::kBrand).thing_types,
testing::UnorderedElementsAre("http://schema.org/Brand",
"http://schema.org/Organization"));
}
TEST(GenerateSchemaOrgCodeTest, GetPropertyConfigurationSetsEnumType) {
EXPECT_THAT( EXPECT_THAT(
property::GetPropertyConfiguration(property::kAcceptedPaymentMethod) property::GetPropertyConfiguration(property::kActionStatus).enum_types,
.thing_types, testing::UnorderedElementsAre("http://schema.org/ActionStatusType"));
testing::UnorderedElementsAre("http://schema.org/LoanOrCredit",
"http://schema.org/PaymentMethod"));
} }
TEST(GenerateSchemaOrgCodeTest, GetPropertyConfigurationSetsMultipleTypes) { TEST(GenerateSchemaOrgCodeTest, GetPropertyConfigurationSetsMultipleTypes) {
...@@ -64,4 +69,20 @@ TEST(GenerateSchemaOrgCodeTest, GetPropertyConfigurationSetsMultipleTypes) { ...@@ -64,4 +69,20 @@ TEST(GenerateSchemaOrgCodeTest, GetPropertyConfigurationSetsMultipleTypes) {
testing::UnorderedElementsAre("http://schema.org/PropertyValue")); testing::UnorderedElementsAre("http://schema.org/PropertyValue"));
} }
TEST(GenerateSchemaOrgCodeTest, CheckValidEnumStringReturnsCorrectOption) {
auto enum_value =
enums::CheckValidEnumString("http://schema.org/ActionStatusType",
GURL("http://schema.org/ActiveActionStatus"));
ASSERT_TRUE(enum_value.has_value());
EXPECT_EQ(static_cast<int>(enums::ActionStatusType::kActiveActionStatus),
enum_value.value());
}
TEST(GenerateSchemaOrgCodeTest, CheckValidEnumStringReturnsAbsent) {
EXPECT_FALSE(
enums::CheckValidEnumString("http://schema.org/ActionStatusType",
GURL("http://schema.org/FakeActionStatus"))
.has_value());
}
} // namespace schema_org } // namespace schema_org
...@@ -44,8 +44,10 @@ class GenerateSchemaOrgCodeTest(unittest.TestCase): ...@@ -44,8 +44,10 @@ class GenerateSchemaOrgCodeTest(unittest.TestCase):
'entities': ['MediaObject'], 'entities': ['MediaObject'],
'properties': [{ 'properties': [{
'name': 'propertyName', 'name': 'propertyName',
'thing_types': [] 'thing_types': [],
}] 'enum_types': []
}],
'enums': [],
}) })
def test_get_root_type_thing(self): def test_get_root_type_thing(self):
...@@ -75,6 +77,26 @@ class GenerateSchemaOrgCodeTest(unittest.TestCase): ...@@ -75,6 +77,26 @@ class GenerateSchemaOrgCodeTest(unittest.TestCase):
self.assertEqual( self.assertEqual(
generate_schema_org_code.get_root_type(integer, schema), number) generate_schema_org_code.get_root_type(integer, schema), number)
def test_get_root_type_enum(self):
thing = {'@id': schema_org_id('Thing')}
intangible = {
'@id': schema_org_id('Intangible'),
'rdfs:subClassOf': thing
}
enumeration = {
'@id': schema_org_id('Enumeration'),
'rdfs:subClassOf': intangible
}
actionStatusType = {
'@id': schema_org_id('ActionStatusType'),
'rdfs:subClassOf': enumeration
}
schema = {'@graph': [thing, intangible, enumeration, actionStatusType]}
self.assertEqual(
generate_schema_org_code.get_root_type(actionStatusType, schema),
actionStatusType)
def test_parse_property_identifier(self): def test_parse_property_identifier(self):
thing = {'@id': schema_org_id('Thing')} thing = {'@id': schema_org_id('Thing')}
intangible = { intangible = {
...@@ -109,7 +131,8 @@ class GenerateSchemaOrgCodeTest(unittest.TestCase): ...@@ -109,7 +131,8 @@ class GenerateSchemaOrgCodeTest(unittest.TestCase):
generate_schema_org_code.parse_property(identifier, schema), { generate_schema_org_code.parse_property(identifier, schema), {
'name': 'Identifier', 'name': 'Identifier',
'has_number': True, 'has_number': True,
'thing_types': [property_value['@id']] 'thing_types': [property_value['@id']],
'enum_types': []
}) })
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Generated by running //components/schema_org/generate_schema_org_code.py.
// Do not edit.
#include "components/schema_org/{{ header_file }}.h"
namespace schema_org {
namespace enums {
base::Optional<int> CheckValidEnumString(const std::string& name,
const GURL& value) {
if (!value.SchemeIsHTTPOrHTTPS() ||
value.host() != "schema.org" ||
value.path().empty()) {
return base::nullopt;
}
auto path = value.path().substr(1);
{% for enum in enums %}
if (name == "{{enum.id}}") {
{% for option in enum.options %}
if (path == "{{option}}") {
return {{ loop.index }};
}
{% endfor %}
}
{% endfor %}
return base::nullopt;
}
} // property
} // enums
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Generated by running //components/schema_org/generate_schema_org_code.py.
// Do not edit.
#ifndef SCHEMA_ORG_ENUMS_{{ header_guard | upper }}
#define SCHEMA_ORG_ENUMS_{{ header_guard | upper }}
#include <string>
#include "base/optional.h"
#include "url/gurl.h"
namespace schema_org {
namespace enums {
{% for enum in enums %}
enum class {{enum.name}} {
{% for option in enum.options %}
k{{option}} = {{ loop.index }},
{% endfor %}
};
{% endfor %}
base::Optional<int> CheckValidEnumString(const std::string& name, const GURL& value);
} // namespace enums
} // namespace schema_org
#endif // SCHEMA_ORG_ENUMS_{{ header_guard | upper }}
...@@ -25,6 +25,11 @@ PropertyConfiguration GetPropertyConfiguration(const std::string& name) { ...@@ -25,6 +25,11 @@ PropertyConfiguration GetPropertyConfiguration(const std::string& name) {
{% for thing_type in property.thing_types %} {% for thing_type in property.thing_types %}
"{{thing_type}}", "{{thing_type}}",
{% endfor %} {% endfor %}
},
/* .enum_types = */ {
{% for enum_type in property.enum_types %}
"{{enum_type}}",
{% endfor %}
} }
}; };
} }
......
...@@ -24,6 +24,7 @@ struct PropertyConfiguration { ...@@ -24,6 +24,7 @@ struct PropertyConfiguration {
bool boolean; bool boolean;
std::set<std::string> thing_types; std::set<std::string> thing_types;
std::set<std::string> enum_types;
}; };
PropertyConfiguration GetPropertyConfiguration(const std::string& name); PropertyConfiguration GetPropertyConfiguration(const std::string& name);
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "components/schema_org/common/improved_metadata.mojom.h" #include "components/schema_org/common/improved_metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h" #include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_enums.h"
#include "components/schema_org/schema_org_property_configurations.h" #include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/schema_org_property_names.h" #include "components/schema_org/schema_org_property_names.h"
...@@ -64,6 +65,37 @@ bool ValidateEntity(Entity* entity) { ...@@ -64,6 +65,37 @@ bool ValidateEntity(Entity* entity) {
++it; ++it;
} }
} }
} else if (!(*it)->values->url_values.empty()) {
if (config.url) {
++it;
} else if (!config.enum_types.empty()) {
// Check all the url values in this property. Remove any ones that
// aren't a valid enum option for the enum type. Although stored as a
// set, all properties should only have one valid enum type.
auto enum_type = *config.enum_types.begin();
bool has_valid_enums = false;
auto nested_it = (*it)->values->url_values.begin();
while (nested_it != (*it)->values->url_values.end()) {
auto& url = *nested_it;
if (!enums::CheckValidEnumString(enum_type, url).has_value()) {
nested_it = (*it)->values->url_values.erase(nested_it);
} else {
has_valid_enums = true;
++nested_it;
}
}
// If there were no valid url values representing enum options for
// this property, remove the whole property.
if (!has_valid_enums) {
it = entity->properties.erase(it);
} else {
++it;
}
} else {
// This property shouldn't have any url values according to the config.
it = entity->properties.erase(it);
}
} else { } else {
++it; ++it;
} }
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "components/schema_org/common/improved_metadata.mojom.h" #include "components/schema_org/common/improved_metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h" #include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_enums.h"
#include "components/schema_org/schema_org_property_configurations.h" #include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/schema_org_property_names.h" #include "components/schema_org/schema_org_property_names.h"
#include "components/schema_org/validator.h" #include "components/schema_org/validator.h"
...@@ -277,4 +278,38 @@ TEST_F(SchemaOrgValidatorTest, InvalidRepeatedEntityPropertyValue) { ...@@ -277,4 +278,38 @@ TEST_F(SchemaOrgValidatorTest, InvalidRepeatedEntityPropertyValue) {
EXPECT_TRUE(entity->properties.empty()); EXPECT_TRUE(entity->properties.empty());
} }
TEST_F(SchemaOrgValidatorTest, ValidEnumPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAction;
PropertyPtr property = Property::New();
property->name = property::kActionStatus;
property->values = Values::New();
property->values->url_values.push_back(
GURL("http://schema.org/ActiveActionStatus"));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidEnumPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAction;
PropertyPtr property = Property::New();
property->name = property::kActionStatus;
property->values = Values::New();
property->values->url_values.push_back(
GURL("http://schema.org/FakeActionStatus"));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
} // namespace schema_org } // namespace schema_org
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment