Commit a2180d81 authored by Sam Bowen's avatar Sam Bowen Committed by Commit Bot

Support nested arrays in schema org extractor.

Bug: 1044253
Change-Id: I652f04d2e7ced7a5289a4d6d0dcef2e577e7a570
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2076084
Commit-Queue: Sam Bowen <sgbowen@google.com>
Reviewed-by: default avatarDaniel Cheng <dcheng@chromium.org>
Reviewed-by: default avatarBecca Hughes <beccahughes@chromium.org>
Cr-Commit-Position: refs/heads/master@{#750286}
parent d71b5e2c
......@@ -28,7 +28,10 @@ namespace {
// the max nesting depth has a property corresponding to an entity, that
// property will be dropped. Note that we will still parse json-ld blocks deeper
// than this, but it won't be passed to App Indexing.
constexpr int kMaxDepth = 5;
constexpr int kMaxDictionaryDepth = 5;
// Maximum amount of nesting of arrays to support, where 0 is a completely flat
// array.
constexpr int kMaxNestedArrayDepth = 1;
// Some strings are very long, and we don't currently use those, so limit string
// length to something reasonable to avoid undue pressure on Icing. Note that
// App Indexing supports strings up to length 20k.
......@@ -107,12 +110,17 @@ bool ParseStringValue(const std::string& property_type,
bool ParseRepeatedValue(const base::Value::ConstListView& arr,
const std::string& property_type,
Values* values,
int recursion_level) {
int recursion_level,
int nested_array_level) {
DCHECK(values);
if (arr.empty()) {
return false;
}
if (nested_array_level > kMaxNestedArrayDepth) {
return false;
}
for (size_t j = 0; j < std::min(arr.size(), kMaxRepeatedSize); ++j) {
auto& list_item = arr[j];
......@@ -140,9 +148,13 @@ bool ParseRepeatedValue(const base::Value::ConstListView& arr,
values->entity_values.push_back(std::move(entity));
}
} break;
case base::Value::Type::LIST:
// App Indexing doesn't support nested arrays.
return false;
case base::Value::Type::LIST: {
const base::Value::ConstListView list_view = list_item.GetList();
if (!ParseRepeatedValue(list_view, property_type, values,
recursion_level, nested_array_level + 1)) {
continue;
}
} break;
default:
break;
}
......@@ -153,7 +165,7 @@ bool ParseRepeatedValue(const base::Value::ConstListView& arr,
void ExtractEntity(const base::DictionaryValue& val,
Entity* entity,
int recursion_level) {
if (recursion_level >= kMaxDepth) {
if (recursion_level >= kMaxDictionaryDepth) {
return;
}
......@@ -192,7 +204,7 @@ void ExtractEntity(const base::DictionaryValue& val,
break;
}
case base::Value::Type::DICTIONARY: {
if (recursion_level + 1 >= kMaxDepth) {
if (recursion_level + 1 >= kMaxDictionaryDepth) {
continue;
}
......@@ -209,7 +221,7 @@ void ExtractEntity(const base::DictionaryValue& val,
case base::Value::Type::LIST: {
const base::Value::ConstListView list_view = entry.second.GetList();
if (!ParseRepeatedValue(list_view, property->name,
property->values.get(), recursion_level)) {
property->values.get(), recursion_level, 0)) {
continue;
}
break;
......
......@@ -269,6 +269,45 @@ TEST_F(SchemaOrgExtractorTest, RepeatedProperty) {
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, RepeatedPropertyNestedArray) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", \"name\": [[\"Movie Title\", \"The Second "
"One\"], [\"Different List Name\"]] }");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
PropertyPtr name = Property::New();
name->name = "name";
name->values = Values::New();
name->values->string_values = {"Movie Title", "The Second One",
"Different List Name"};
expected->properties.push_back(std::move(name));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, RepeatedPropertyNestedArrayMaxDepth) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", \"name\": [[\"Movie Title\", \"The Second "
"One\"], [[\"this one is too deeply nested\"]]] }");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
PropertyPtr name = Property::New();
name->name = "name";
name->values = Values::New();
name->values->string_values = {"Movie Title", "The Second One"};
expected->properties.push_back(std::move(name));
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, MixedRepeatedProperty) {
EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"version\": [\"6.5a\", 6] }");
......@@ -427,17 +466,6 @@ TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithEmptyArray) {
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, IgnorePropertyWithNestedArray) {
EntityPtr extracted =
Extract("{\"@type\": \"VideoObject\", \"name\": [[\"Name\"]] }");
ASSERT_FALSE(extracted.is_null());
EntityPtr expected = Entity::New();
expected->type = "VideoObject";
EXPECT_EQ(expected, extracted);
}
TEST_F(SchemaOrgExtractorTest, EnforceMaxNestingDepth) {
EntityPtr extracted = Extract(
"{\"@type\": \"VideoObject\", \"name\": \"a video!\","
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment