Commit 298daf75 authored by Sam Bowen's avatar Sam Bowen Committed by Commit Bot

Use Chrome URL type in schema org extractor.

Add Chrome URL as a property value type and in the extractor, parse a
string as a URL if the given property can be a URL.

Bug: 1044251
Change-Id: I041ad0bb61c9dc4ba3a2093711c28b88e9adbdff
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2076704
Commit-Queue: Sam Bowen <sgbowen@google.com>
Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarBecca Hughes <beccahughes@chromium.org>
Cr-Commit-Position: refs/heads/master@{#751430}
parent 64441e46
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
module schema_org.improved.mojom; module schema_org.improved.mojom;
import "mojo/public/mojom/base/time.mojom"; import "mojo/public/mojom/base/time.mojom";
import "url/mojom/url.mojom";
// A property can have arrays of different types simultaneously. Non-array // A property can have arrays of different types simultaneously. Non-array
// values are converted to arrays of one element. // values are converted to arrays of one element.
...@@ -21,6 +22,7 @@ struct Values { ...@@ -21,6 +22,7 @@ struct Values {
array<double> double_values; array<double> double_values;
array<mojo_base.mojom.Time> date_time_values; array<mojo_base.mojom.Time> date_time_values;
array<mojo_base.mojom.TimeDelta> time_values; array<mojo_base.mojom.TimeDelta> time_values;
array<url.mojom.Url> url_values;
array<Entity> entity_values; array<Entity> entity_values;
}; };
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#include "components/schema_org/common/improved_metadata.mojom.h" #include "components/schema_org/common/improved_metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h" #include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_property_configurations.h" #include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/validator.h"
namespace schema_org { namespace schema_org {
...@@ -73,6 +72,10 @@ bool ParseStringValue(const std::string& property_type, ...@@ -73,6 +72,10 @@ bool ParseStringValue(const std::string& property_type,
values->string_values.push_back(value.as_string()); values->string_values.push_back(value.as_string());
return true; return true;
} }
if (prop_config.url) {
values->url_values.push_back(GURL(value));
return true;
}
if (prop_config.number) { if (prop_config.number) {
double d; double d;
bool parsed_double = base::StringToDouble(value, &d); bool parsed_double = base::StringToDouble(value, &d);
...@@ -123,7 +126,6 @@ bool ParseRepeatedValue(const base::Value::ConstListView& arr, ...@@ -123,7 +126,6 @@ bool ParseRepeatedValue(const base::Value::ConstListView& arr,
for (size_t j = 0; j < std::min(arr.size(), kMaxRepeatedSize); ++j) { for (size_t j = 0; j < std::min(arr.size(), kMaxRepeatedSize); ++j) {
auto& list_item = arr[j]; auto& list_item = arr[j];
switch (list_item.type()) { switch (list_item.type()) {
case base::Value::Type::BOOLEAN: { case base::Value::Type::BOOLEAN: {
values->bool_values.push_back(list_item.GetBool()); values->bool_values.push_back(list_item.GetBool());
......
...@@ -45,6 +45,8 @@ class SchemaOrgExtractorTest : public testing::Test { ...@@ -45,6 +45,8 @@ class SchemaOrgExtractorTest : public testing::Test {
PropertyPtr CreateTimeProperty(const std::string& name, PropertyPtr CreateTimeProperty(const std::string& name,
const base::TimeDelta& value); const base::TimeDelta& value);
PropertyPtr CreateUrlProperty(const std::string& name, const GURL& url);
PropertyPtr CreateEntityProperty(const std::string& name, EntityPtr value); PropertyPtr CreateEntityProperty(const std::string& name, EntityPtr value);
}; };
...@@ -107,6 +109,15 @@ PropertyPtr SchemaOrgExtractorTest::CreateTimeProperty( ...@@ -107,6 +109,15 @@ PropertyPtr SchemaOrgExtractorTest::CreateTimeProperty(
return property; return property;
} }
PropertyPtr SchemaOrgExtractorTest::CreateUrlProperty(const std::string& name,
const GURL& value) {
PropertyPtr property = Property::New();
property->name = name;
property->values = Values::New();
property->values->url_values.push_back(value);
return property;
}
PropertyPtr SchemaOrgExtractorTest::CreateEntityProperty( PropertyPtr SchemaOrgExtractorTest::CreateEntityProperty(
const std::string& name, const std::string& name,
EntityPtr value) { EntityPtr value) {
...@@ -231,6 +242,20 @@ TEST_F(SchemaOrgExtractorTest, StringValueRepresentingDateTime) { ...@@ -231,6 +242,20 @@ TEST_F(SchemaOrgExtractorTest, StringValueRepresentingDateTime) {
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
} }
TEST_F(SchemaOrgExtractorTest, UrlValue) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", "
"\"contentUrl\":\"https://www.google.com\"}");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
expected->properties.push_back(
CreateUrlProperty("contentUrl", GURL("https://www.google.com")));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, NestedEntities) { TEST_F(SchemaOrgExtractorTest, NestedEntities) {
EntityPtr extracted = Extract( EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", \"actor\": { \"@type\": \"Person\", " "{\"@type\": \"VideoObject\", \"actor\": { \"@type\": \"Person\", "
......
...@@ -49,6 +49,10 @@ def get_root_type(the_class, schema): ...@@ -49,6 +49,10 @@ def get_root_type(the_class, schema):
if class_obj['@id'] == schema_org_id('Thing'): if class_obj['@id'] == schema_org_id('Thing'):
return class_obj return class_obj
# Consider URLs to be a base type as we will use have a struct field for
# them specifically.
if class_obj['@id'] == schema_org_id('URL'):
return class_obj
if (class_obj.has_key('@type') if (class_obj.has_key('@type')
and schema_org_id('DataType') in class_obj['@type']): and schema_org_id('DataType') in class_obj['@type']):
return class_obj return class_obj
...@@ -88,6 +92,8 @@ def parse_property(prop, schema): ...@@ -88,6 +92,8 @@ def parse_property(prop, schema):
parsed_prop['has_number'] = True parsed_prop['has_number'] = True
elif root_type['@id'] == schema_org_id('DateTime'): elif root_type['@id'] == schema_org_id('DateTime'):
parsed_prop['has_date_time'] = True parsed_prop['has_date_time'] = True
elif root_type['@id'] == schema_org_id('URL'):
parsed_prop['has_url'] = True
return parsed_prop return parsed_prop
......
...@@ -45,6 +45,10 @@ TEST(GenerateSchemaOrgCodeTest, GetPropertyConfigurationSetsNumber) { ...@@ -45,6 +45,10 @@ TEST(GenerateSchemaOrgCodeTest, GetPropertyConfigurationSetsNumber) {
property::GetPropertyConfiguration(property::kDownvoteCount).number); property::GetPropertyConfiguration(property::kDownvoteCount).number);
} }
TEST(GenerateSchemaOrgCodeTest, GetPropertyConfigurationSetsUrl) {
EXPECT_TRUE(property::GetPropertyConfiguration(property::kContentUrl).url);
}
TEST(GenerateSchemaOrgCodeTest, GetPropertyConfigurationSetsThingType) { TEST(GenerateSchemaOrgCodeTest, GetPropertyConfigurationSetsThingType) {
EXPECT_THAT( EXPECT_THAT(
property::GetPropertyConfiguration(property::kAcceptedPaymentMethod) property::GetPropertyConfiguration(property::kAcceptedPaymentMethod)
......
...@@ -65,15 +65,15 @@ class GenerateSchemaOrgCodeTest(unittest.TestCase): ...@@ -65,15 +65,15 @@ class GenerateSchemaOrgCodeTest(unittest.TestCase):
thing) thing)
def test_get_root_type_datatype(self): def test_get_root_type_datatype(self):
text = { number = {
'@id': schema_org_id('Text'), '@id': schema_org_id('Number'),
'@type': [schema_org_id('DataType'), 'rdfs:Class'] '@type': [schema_org_id('DataType'), 'rdfs:Class']
} }
url = {'@id': schema_org_id('URL'), 'rdfs:subClassOf': text} integer = {'@id': schema_org_id('Integer'), 'rdfs:subClassOf': number}
schema = {'@graph': [url, text]} schema = {'@graph': [integer, number]}
self.assertEqual( self.assertEqual(
generate_schema_org_code.get_root_type(url, schema), text) generate_schema_org_code.get_root_type(integer, schema), number)
def test_parse_property_identifier(self): def test_parse_property_identifier(self):
thing = {'@id': schema_org_id('Thing')} thing = {'@id': schema_org_id('Thing')}
...@@ -89,26 +89,26 @@ class GenerateSchemaOrgCodeTest(unittest.TestCase): ...@@ -89,26 +89,26 @@ class GenerateSchemaOrgCodeTest(unittest.TestCase):
'@id': schema_org_id('PropertyValue'), '@id': schema_org_id('PropertyValue'),
'rdfs:subClassOf': structured_value 'rdfs:subClassOf': structured_value
} }
text = { number = {
'@id': schema_org_id('Text'), '@id': schema_org_id('Number'),
'@type': [schema_org_id('DataType'), 'rdfs:Class'] '@type': [schema_org_id('DataType'), 'rdfs:Class']
} }
url = {'@id': schema_org_id('URL'), 'rdfs:subClassOf': text} integer = {'@id': schema_org_id('Integer'), 'rdfs:subClassOf': number}
identifier = { identifier = {
'@id': schema_org_id('Identifier'), '@id': schema_org_id('Identifier'),
schema_org_id('rangeIncludes'): [property_value, url, text] schema_org_id('rangeIncludes'): [property_value, integer, number]
} }
schema = { schema = {
'@graph': [ '@graph': [
thing, intangible, structured_value, property_value, text, url, thing, intangible, structured_value, property_value, number,
identifier integer, identifier
] ]
} }
self.assertEqual( self.assertEqual(
generate_schema_org_code.parse_property(identifier, schema), { generate_schema_org_code.parse_property(identifier, schema), {
'name': 'Identifier', 'name': 'Identifier',
'has_text': True, 'has_number': True,
'thing_types': [property_value['@id']] 'thing_types': [property_value['@id']]
}) })
......
...@@ -14,12 +14,13 @@ PropertyConfiguration GetPropertyConfiguration(const std::string& name) { ...@@ -14,12 +14,13 @@ PropertyConfiguration GetPropertyConfiguration(const std::string& name) {
{% for property in properties %} {% for property in properties %}
if (name == "{{property.name}}") { if (name == "{{property.name}}") {
return { return {
.text = {{'true' if property.has_text else 'false'}}, /* .text = */ {{'true' if property.has_text else 'false'}},
.date = {{'true' if property.has_date else 'false'}}, /* .date = */ {{'true' if property.has_date else 'false'}},
.time = {{'true' if property.has_time else 'false'}}, /* .time = */ {{'true' if property.has_time else 'false'}},
.date_time = {{'true' if property.has_date_time else 'false'}}, /* .date_time = */ {{'true' if property.has_date_time else 'false'}},
.number = {{'true' if property.has_number else 'false'}}, /* .number = */ {{'true' if property.has_number else 'false'}},
.thing_types = { /* .url = */ {{'true' if property.has_url else 'false'}},
/* .thing_types = */ {
{% for thing_type in property.thing_types %} {% for thing_type in property.thing_types %}
"{{thing_type}}", "{{thing_type}}",
{% endfor %} {% endfor %}
......
...@@ -20,6 +20,7 @@ struct PropertyConfiguration { ...@@ -20,6 +20,7 @@ struct PropertyConfiguration {
bool time; bool time;
bool date_time; bool date_time;
bool number; bool number;
bool url;
std::set<std::string> thing_types; std::set<std::string> thing_types;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment