Commit 6ca590e1 authored by Tom Sepez's avatar Tom Sepez Committed by Commit Bot

Reduce memory usage in SuffixTree.h

Convert fixed array of pointers to a flat map. Previously, we
were requiring a direct-mapped array of 128 pointers per byte of
input string.

Add a small unit test for SuffixTree.

Bug: 740873
Change-Id: I03836f5f9f893861bec3076fafaffd7c4e3e04eb
Reviewed-on: https://chromium-review.googlesource.com/571193Reviewed-by: default avatarKentaro Hara <haraken@chromium.org>
Reviewed-by: default avatarMike West <mkwst@chromium.org>
Commit-Queue: Tom Sepez <tsepez@chromium.org>
Cr-Commit-Position: refs/heads/master@{#486859}
parent 71aec55e
......@@ -1970,6 +1970,7 @@ test("blink_platform_unittests") {
"text/ICUErrorTest.cpp",
"text/PlatformLocaleTest.cpp",
"text/SegmentedStringTest.cpp",
"text/SuffixTreeTest.cpp",
"text/TextBreakIteratorTest.cpp",
"text/TextEncodingDetectorTest.cpp",
"text/UnicodeUtilitiesTest.cpp",
......
......@@ -26,6 +26,9 @@
#ifndef SuffixTree_h
#define SuffixTree_h
#include <algorithm>
#include <utility>
#include "platform/wtf/Allocator.h"
#include "platform/wtf/Noncopyable.h"
#include "platform/wtf/Vector.h"
......@@ -63,9 +66,10 @@ class SuffixTree {
Node* current = &root_;
int limit = std::min(depth_, query.length());
for (int i = 0; i < limit; ++i) {
current = current->at(Codebook::CodeWord(query[i]));
if (!current)
auto iter = current->Find(Codebook::CodeWord(query[i]));
if (iter == current->End())
return false;
current = iter->second;
}
return true;
}
......@@ -76,27 +80,39 @@ class SuffixTree {
WTF_MAKE_NONCOPYABLE(Node);
public:
Node(bool is_leaf = false) {
children_.resize(Codebook::kCodeSize);
children_.Fill(0);
is_leaf_ = is_leaf;
}
Node(bool is_leaf = false) : is_leaf_(is_leaf) {}
~Node() {
for (unsigned i = 0; i < children_.size(); ++i) {
Node* child = children_.at(i);
for (const auto& pair : children_) {
Node* child = pair.second;
if (child && !child->is_leaf_)
delete child;
}
}
Node*& at(int code_word) { return children_.at(code_word); }
Node*& At(int key) {
auto it = Find(key);
if (it != children_.end())
return it->second;
children_.emplace_back(key, nullptr);
return children_.back().second;
}
private:
typedef Vector<Node*, Codebook::kCodeSize> ChildrenVector;
typename Vector<std::pair<int, Node*>>::iterator Find(int key) {
return std::find_if(children_.begin(), children_.end(),
[key](const std::pair<int, Node*>& entry) {
return entry.first == key;
});
}
typename Vector<std::pair<int, Node*>>::iterator End() {
return children_.end();
}
ChildrenVector children_;
bool is_leaf_;
private:
// TODO(tsepez): convert to base::flat_map when allowed in blink.
Vector<std::pair<int, Node*>> children_;
const bool is_leaf_;
};
void Build(const String& text) {
......@@ -105,7 +121,7 @@ class SuffixTree {
unsigned limit = std::min(base + depth_, text.length());
for (unsigned offset = 0; base + offset < limit; ++offset) {
DCHECK_NE(current, &leaf_);
Node*& child = current->at(Codebook::CodeWord(text[base + offset]));
Node*& child = current->At(Codebook::CodeWord(text[base + offset]));
if (!child)
child = base + offset + 1 == limit ? &leaf_ : new Node();
current = child;
......
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "platform/text/SuffixTree.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace blink {
TEST(SuffixTreeTest, EmptyString) {
SuffixTree<ASCIICodebook> tree("", 16);
EXPECT_TRUE(tree.MightContain(""));
EXPECT_FALSE(tree.MightContain("potato"));
}
TEST(SuffixTreeTest, NormalString) {
SuffixTree<ASCIICodebook> tree("banana", 16);
EXPECT_TRUE(tree.MightContain(""));
EXPECT_TRUE(tree.MightContain("a"));
EXPECT_TRUE(tree.MightContain("na"));
EXPECT_TRUE(tree.MightContain("ana"));
EXPECT_TRUE(tree.MightContain("nana"));
EXPECT_TRUE(tree.MightContain("anana"));
EXPECT_TRUE(tree.MightContain("banana"));
EXPECT_FALSE(tree.MightContain("ab"));
EXPECT_FALSE(tree.MightContain("bananan"));
EXPECT_FALSE(tree.MightContain("abanana"));
EXPECT_FALSE(tree.MightContain("potato"));
}
} // namespace blink
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment