Commit a2180d81 authored by Sam Bowen's avatar Sam Bowen Committed by Commit Bot

Support nested arrays in schema org extractor.

Bug: 1044253
Change-Id: I652f04d2e7ced7a5289a4d6d0dcef2e577e7a570
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2076084
Commit-Queue: Sam Bowen <sgbowen@google.com>
Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarBecca Hughes <beccahughes@chromium.org>
Cr-Commit-Position: refs/heads/master@{#750286}
parent d71b5e2c
...@@ -28,7 +28,10 @@ namespace { ...@@ -28,7 +28,10 @@ namespace {
// the max nesting depth has a property corresponding to an entity, that // the max nesting depth has a property corresponding to an entity, that
// property will be dropped. Note that we will still parse json-ld blocks deeper // property will be dropped. Note that we will still parse json-ld blocks deeper
// than this, but it won't be passed to App Indexing. // than this, but it won't be passed to App Indexing.
constexpr int kMaxDepth = 5; constexpr int kMaxDictionaryDepth = 5;
// Maximum amount of nesting of arrays to support, where 0 is a completely flat
// array.
constexpr int kMaxNestedArrayDepth = 1;
// Some strings are very long, and we don't currently use those, so limit string // Some strings are very long, and we don't currently use those, so limit string
// length to something reasonable to avoid undue pressure on Icing. Note that // length to something reasonable to avoid undue pressure on Icing. Note that
// App Indexing supports strings up to length 20k. // App Indexing supports strings up to length 20k.
...@@ -107,12 +110,17 @@ bool ParseStringValue(const std::string& property_type, ...@@ -107,12 +110,17 @@ bool ParseStringValue(const std::string& property_type,
bool ParseRepeatedValue(const base::Value::ConstListView& arr, bool ParseRepeatedValue(const base::Value::ConstListView& arr,
const std::string& property_type, const std::string& property_type,
Values* values, Values* values,
int recursion_level) { int recursion_level,
int nested_array_level) {
DCHECK(values); DCHECK(values);
if (arr.empty()) { if (arr.empty()) {
return false; return false;
} }
if (nested_array_level > kMaxNestedArrayDepth) {
return false;
}
for (size_t j = 0; j < std::min(arr.size(), kMaxRepeatedSize); ++j) { for (size_t j = 0; j < std::min(arr.size(), kMaxRepeatedSize); ++j) {
auto& list_item = arr[j]; auto& list_item = arr[j];
...@@ -140,9 +148,13 @@ bool ParseRepeatedValue(const base::Value::ConstListView& arr, ...@@ -140,9 +148,13 @@ bool ParseRepeatedValue(const base::Value::ConstListView& arr,
values->entity_values.push_back(std::move(entity)); values->entity_values.push_back(std::move(entity));
} }
} break; } break;
case base::Value::Type::LIST: case base::Value::Type::LIST: {
// App Indexing doesn't support nested arrays. const base::Value::ConstListView list_view = list_item.GetList();
return false; if (!ParseRepeatedValue(list_view, property_type, values,
recursion_level, nested_array_level + 1)) {
continue;
}
} break;
default: default:
break; break;
} }
...@@ -153,7 +165,7 @@ bool ParseRepeatedValue(const base::Value::ConstListView& arr, ...@@ -153,7 +165,7 @@ bool ParseRepeatedValue(const base::Value::ConstListView& arr,
void ExtractEntity(const base::DictionaryValue& val, void ExtractEntity(const base::DictionaryValue& val,
Entity* entity, Entity* entity,
int recursion_level) { int recursion_level) {
if (recursion_level >= kMaxDepth) { if (recursion_level >= kMaxDictionaryDepth) {
return; return;
} }
...@@ -192,7 +204,7 @@ void ExtractEntity(const base::DictionaryValue& val, ...@@ -192,7 +204,7 @@ void ExtractEntity(const base::DictionaryValue& val,
break; break;
} }
case base::Value::Type::DICTIONARY: { case base::Value::Type::DICTIONARY: {
if (recursion_level + 1 >= kMaxDepth) { if (recursion_level + 1 >= kMaxDictionaryDepth) {
continue; continue;
} }
...@@ -209,7 +221,7 @@ void ExtractEntity(const base::DictionaryValue& val, ...@@ -209,7 +221,7 @@ void ExtractEntity(const base::DictionaryValue& val,
case base::Value::Type::LIST: { case base::Value::Type::LIST: {
const base::Value::ConstListView list_view = entry.second.GetList(); const base::Value::ConstListView list_view = entry.second.GetList();
if (!ParseRepeatedValue(list_view, property->name, if (!ParseRepeatedValue(list_view, property->name,
property->values.get(), recursion_level)) { property->values.get(), recursion_level, 0)) {
continue; continue;
} }
break; break;
......
...@@ -269,6 +269,45 @@ TEST_F(SchemaOrgExtractorTest, RepeatedProperty) { ...@@ -269,6 +269,45 @@ TEST_F(SchemaOrgExtractorTest, RepeatedProperty) {
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
} }
TEST_F(SchemaOrgExtractorTest, RepeatedPropertyNestedArray) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", \"name\": [[\"Movie Title\", \"The Second "
"One\"], [\"Different List Name\"]] }");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
PropertyPtr name = Property::New();
name->name = "name";
name->values = Values::New();
name->values->string_values = {"Movie Title", "The Second One",
"Different List Name"};
expected->properties.push_back(std::move(name));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, RepeatedPropertyNestedArrayMaxDepth) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", \"name\": [[\"Movie Title\", \"The Second "
"One\"], [[\"this one is too deeply nested\"]]] }");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
PropertyPtr name = Property::New();
name->name = "name";
name->values = Values::New();
name->values->string_values = {"Movie Title", "The Second One"};
expected->properties.push_back(std::move(name));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, MixedRepeatedProperty) { TEST_F(SchemaOrgExtractorTest, MixedRepeatedProperty) {
EntityPtr extracted = EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"version\": [\"6.5a\", 6] }"); Extract("{\"@type\": \"VideoObject\", \"version\": [\"6.5a\", 6] }");
...@@ -427,17 +466,6 @@ TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithEmptyArray) { ...@@ -427,17 +466,6 @@ TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithEmptyArray) {
EXPECT_EQ(expected, extracted); EXPECT_EQ(expected, extracted);
} }
TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithNestedArray) {
EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"name\": [[\"Name\"]] }");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) { TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) {
EntityPtr extracted = Extract( EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", \"name\": \"a video!\"," "{\"@type\": \"VideoObject\", \"name\": \"a video!\","
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment