Commit 28cff5e2 authored by Sam Bowen's avatar Sam Bowen Committed by Commit Bot

Add validator for schema org entities and types in doc metadata mojo.

* Add a validator that removes properties that are not the right type.
* Add double, time, and timedelta types in the document metadata mojo
  structure.
* Use the new mojo types in document metadata extractor.

Downstream clank will not be able to read fields parsed into the new
type, but will just skip over them. We should consider updating support
there.
http://cs/clank/java/src/com/google/android/apps/chrome/icing/AppIndexingReporterInternal.java?l=133

Bug: 1044250
Change-Id: Icfd7f0db74abf7bd1261b30dc416c94a7a23b4c0
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2062907Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarBecca Hughes <beccahughes@chromium.org>
Commit-Queue: Sam Bowen <sgbowen@google.com>
Cr-Commit-Position: refs/heads/master@{#748357}
parent 57d82214
......@@ -7,11 +7,13 @@ source_set("unit_tests") {
sources = [
"extractor_unittest.cc",
"generate_schema_org_code_unittest.cc",
"validator_unittest.cc",
]
deps = [
":extractor",
":generate_schema_org_code",
":schema_org",
":schema_org_properties",
"//base",
"//components/schema_org/common:mojom",
......@@ -67,6 +69,20 @@ static_library("extractor") {
"extractor.h",
]
deps = [
"//components/schema_org:generate_schema_org_code",
"//components/schema_org:schema_org",
"//components/schema_org:schema_org_properties",
"//components/schema_org/common:mojom",
]
}
static_library("schema_org") {
sources = [
"validator.cc",
"validator.h",
]
deps = [
"//components/schema_org:generate_schema_org_code",
"//components/schema_org:schema_org_properties",
......
......@@ -8,5 +8,8 @@ mojom("mojom") {
generate_java = true
sources = [ "metadata.mojom" ]
public_deps = [ "//url/mojom:url_mojom_gurl" ]
public_deps = [
"//mojo/public/mojom/base",
"//url/mojom:url_mojom_gurl",
]
}
......@@ -4,12 +4,17 @@
module schema_org.mojom;
import "mojo/public/mojom/base/time.mojom";
// Due to the restriction of AppIndexing, all elements should be of the
// same type. Non-array values are converted to arrays of one element.
union Values {
array<bool> bool_values;
array<int64> long_values;
array<string> string_values;
array<double> double_values;
array<mojo_base.mojom.Time> date_time_values;
array<mojo_base.mojom.TimeDelta> time_values;
array<Entity> entity_values;
};
......
This diff is collapsed.
This diff is collapsed.
......@@ -14,6 +14,11 @@ TEST(GenerateSchemaOrgTest, EntityName) {
EXPECT_STREQ(entity::kAboutPage, "AboutPage");
}
TEST(GenerateSchemaOrgTest, IsValidEntityName) {
EXPECT_TRUE(entity::IsValidEntityName(entity::kAboutPage));
EXPECT_FALSE(entity::IsValidEntityName("a made up name"));
}
TEST(GenerateSchemaOrgTest, PropertyName) {
EXPECT_STREQ(property::kAcceptedAnswer, "acceptedAnswer");
}
......
......@@ -6,6 +6,9 @@
// Do not edit.
#include "components/schema_org/{{ header_file }}.h"
#include "base/containers/flat_set.h"
#include "base/no_destructor.h"
#include "base/strings/string_piece.h"
namespace schema_org {
namespace entity {
......@@ -14,5 +17,15 @@ namespace entity {
const char k{{entity[0]|upper}}{{entity[1:]}}[] = "{{entity}}";
{% endfor %}
bool IsValidEntityName(const std::string& entity_name) {
static const base::NoDestructor<base::flat_set<base::StringPiece>>
kValidEntityNames(base::flat_set<base::StringPiece>({
{%for entity in entities %}
k{{entity[0]|upper}}{{entity[1:]}},
{% endfor %}
}));
return kValidEntityNames->find(entity_name) != kValidEntityNames->end();
}
} // entity
} // schema_org
......@@ -17,6 +17,8 @@ namespace entity {
extern const char k{{entity[0]|upper}}{{entity[1:]}}[];
{% endfor %}
bool IsValidEntityName(const std::string& entity_name);
} // namespace entity
} // namespace schema_org
......
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/schema_org/validator.h"
#include <vector>
#include "components/schema_org/common/metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/schema_org_property_names.h"
namespace schema_org {
using mojom::Entity;
using mojom::EntityPtr;
// static
bool ValidateEntity(Entity* entity) {
if (!entity::IsValidEntityName(entity->type)) {
return false;
}
// Cycle through properties and remove any that have the wrong type.
auto it = entity->properties.begin();
while (it != entity->properties.end()) {
property::PropertyConfiguration config =
property::GetPropertyConfiguration((*it)->name);
if ((*it)->values->is_string_values() && !config.text) {
it = entity->properties.erase(it);
} else if ((*it)->values->is_double_values() && !config.number) {
it = entity->properties.erase(it);
} else if ((*it)->values->is_time_values() && !config.time) {
it = entity->properties.erase(it);
} else if ((*it)->values->is_date_time_values() && !config.date_time &&
!config.date) {
it = entity->properties.erase(it);
} else if ((*it)->values->is_entity_values()) {
if (config.thing_types.empty()) {
// Property is not supposed to have an entity type.
it = entity->properties.erase(it);
} else {
// Check all the entities nested in this property. Remove any invalid
// ones.
bool has_valid_entities = false;
auto nested_it = (*it)->values->get_entity_values().begin();
while (nested_it != (*it)->values->get_entity_values().end()) {
auto& nested_entity = *nested_it;
if (!ValidateEntity(nested_entity.get())) {
nested_it = (*it)->values->get_entity_values().erase(nested_it);
} else {
has_valid_entities = true;
++nested_it;
}
}
// If there were no valid entity values for this property, remove the
// whole property.
if (!has_valid_entities) {
it = entity->properties.erase(it);
} else {
++it;
}
}
} else {
++it;
}
}
return true;
}
} // namespace schema_org
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_SCHEMA_ORG_VALIDATOR_H_
#define COMPONENTS_SCHEMA_ORG_VALIDATOR_H_
#include "components/schema_org/common/metadata.mojom-forward.h"
namespace schema_org {
// Validates and cleans up the Schema.org entity in-place. Invalid properties
// will be removed from the entity. Returns true if the entity was valid.
bool ValidateEntity(mojom::Entity* entity);
} // namespace schema_org
#endif // COMPONENTS_SCHEMA_ORG_VALIDATOR_H_
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <memory>
#include <utility>
#include <vector>
#include "components/schema_org/common/metadata.mojom.h"
#include "components/schema_org/schema_org_entity_names.h"
#include "components/schema_org/schema_org_property_configurations.h"
#include "components/schema_org/schema_org_property_names.h"
#include "components/schema_org/validator.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace schema_org {
using mojom::Entity;
using mojom::EntityPtr;
using mojom::Property;
using mojom::PropertyPtr;
using mojom::Values;
class SchemaOrgValidatorTest : public testing::Test {};
TEST_F(SchemaOrgValidatorTest, InvalidEntityType) {
EntityPtr entity = Entity::New();
entity->type = "random entity type";
bool validated_entity = ValidateEntity(entity.get());
EXPECT_FALSE(validated_entity);
}
TEST_F(SchemaOrgValidatorTest, ValidStringPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAccessMode;
property->values = Values::New();
property->values->set_string_values({"foo"});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidStringPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->set_string_values({"foo"});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidNumberPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kSingleFamilyResidence;
PropertyPtr property = Property::New();
property->name = property::kAdditionalNumberOfGuests;
property->values = Values::New();
property->values->set_double_values({1.0});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidNumberPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->set_double_values({1.0});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidDateTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kLodgingBusiness;
PropertyPtr property = Property::New();
property->name = property::kCheckinTime;
property->values = Values::New();
property->values->set_date_time_values(
{base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromMilliseconds(12999772800000))});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidDateTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->set_date_time_values(
{base::Time::FromDeltaSinceWindowsEpoch(
base::TimeDelta::FromMilliseconds(12999772800000))});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kLodgingBusiness;
PropertyPtr property = Property::New();
property->name = property::kCheckinTime;
property->values = Values::New();
property->values->set_time_values(
{base::TimeDelta::FromMilliseconds(12999772800000)});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidTimePropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAbout;
property->values = Values::New();
property->values->set_time_values(
{base::TimeDelta::FromMilliseconds(12999772800000)});
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value = Entity::New();
value->type = entity::kPostalAddress;
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
}
TEST_F(SchemaOrgValidatorTest, InvalidEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kAboutPage;
PropertyPtr property = Property::New();
property->name = property::kAccessMode;
property->values = Values::New();
EntityPtr value = Entity::New();
value->type = entity::kPostalAddress;
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
TEST_F(SchemaOrgValidatorTest, ValidRepeatedEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value1 = Entity::New();
value1->type = entity::kPostalAddress;
EntityPtr value2 = Entity::New();
value2->type = entity::kPostalAddress;
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value1));
property->values->get_entity_values().push_back(std::move(value2));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
EXPECT_EQ(2u, entity->properties[0]->values->get_entity_values().size());
}
// If one value of a repeated property is invalid but the other is not,
// validator should keep the outer property and remove only the invalid nested
// property.
TEST_F(SchemaOrgValidatorTest, MixedValidityRepeatedEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value1 = Entity::New();
value1->type = entity::kPostalAddress;
EntityPtr value2 = Entity::New();
value2->type = "bad address";
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value1));
property->values->get_entity_values().push_back(std::move(value2));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_EQ(1u, entity->properties.size());
EXPECT_EQ(1u, entity->properties[0]->values->get_entity_values().size());
}
TEST_F(SchemaOrgValidatorTest, InvalidRepeatedEntityPropertyValue) {
EntityPtr entity = Entity::New();
entity->type = entity::kRestaurant;
PropertyPtr property = Property::New();
property->name = property::kAddress;
property->values = Values::New();
EntityPtr value1 = Entity::New();
value1->type = "this is not a real type";
EntityPtr value2 = Entity::New();
value2->type = "bad address type";
property->values->set_entity_values(std::vector<EntityPtr>());
property->values->get_entity_values().push_back(std::move(value1));
property->values->get_entity_values().push_back(std::move(value2));
entity->properties.push_back(std::move(property));
bool validated_entity = ValidateEntity(entity.get());
EXPECT_TRUE(validated_entity);
EXPECT_TRUE(entity->properties.empty());
}
} // namespace schema_org
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment