Commit f091addf authored by Dominic Battre's avatar Dominic Battre Committed by Commit Bot

Take AnonymizerTool calls off the UI thread

Executing the AnonymizerTool is an expensive operation and should not happen on
the UI thread. This CL moves all existing calls on the UI thread to spearate
worker TaskRunners.

Bug: 787554
Change-Id: I3c096873f784cc2a9dd23f9dd538e2aebd075cd0
Reviewed-on: https://chromium-review.googlesource.com/787713
Commit-Queue: Dominic Battré <battre@chromium.org>
Reviewed-by: default avatarSimon Que <sque@chromium.org>
Reviewed-by: default avatarBartosz Fabianowski <bartfab@chromium.org>
Reviewed-by: default avatarDevlin <rdevlin.cronin@chromium.org>
Reviewed-by: default avatarRobert Liao <robliao@chromium.org>
Reviewed-by: default avatarAhmed Fakhry <afakhry@chromium.org>
Cr-Commit-Position: refs/heads/master@{#522435}
parent 224aa617
......@@ -288,33 +288,4 @@ TEST_F(SystemLogUploaderTest, DisableLogUpload) {
SystemLogUploader::kDefaultUploadDelayMs));
}
// Test RemovePII function.
TEST_F(SystemLogUploaderTest, TestPII) {
feedback::AnonymizerTool anonymizer;
std::string data =
"aaaaaaaa [SSID=123aaaaaa]aaaaa\n" // SSID.
"aaaaaaaahttp://tets.comaaaaaaa\n" // URL.
"aaaaaemail@example.comaaa\n" // Email address.
"example@@1234\n" // No PII, it is not valid email address.
"255.255.155.255\n" // IP address.
"aaaa123.123.45.4aaa\n" // IP address.
"11:11;11::11\n" // IP address.
"11::11\n" // IP address.
"11:11:abcdef:0:0:0:0:0\n" // No PII.
"aa:aa:aa:aa:aa:aa"; // MAC address (BSSID).
std::string result =
"aaaaaaaa [SSID=1]aaaaa\n"
"aaaaaaaa<URL: 1>\n"
"<email: 1>\n"
"example@@1234\n"
"<IPv4: 1>55\n"
"aaaa<IPv4: 2>aaa\n"
"11:11;<IPv6: 1>\n"
"<IPv6: 1>\n"
"11:11:abcdef:0:0:0:0:0\n"
"aa:aa:aa:00:00:01";
EXPECT_EQ(result, SystemLogUploader::RemoveSensitiveData(&anonymizer, data));
}
} // namespace policy
......@@ -10,6 +10,7 @@
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/strings/stringprintf.h"
#include "content/public/browser/browser_thread.h"
#include "third_party/re2/src/re2/re2.h"
using re2::RE2;
......@@ -229,11 +230,19 @@ bool FindAndConsumeAndGetSkipped(re2::StringPiece* input,
AnonymizerTool::AnonymizerTool()
: custom_patterns_with_context_(arraysize(kCustomPatternsWithContext)),
custom_patterns_without_context_(
arraysize(kCustomPatternsWithoutContext)) {}
arraysize(kCustomPatternsWithoutContext)) {
DETACH_FROM_SEQUENCE(sequence_checker_);
}
AnonymizerTool::~AnonymizerTool() {}
AnonymizerTool::~AnonymizerTool() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
}
std::string AnonymizerTool::Anonymize(const std::string& input) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(!::content::BrowserThread::CurrentlyOn(::content::BrowserThread::UI))
<< "This is an expensive operation. Do not execute this on the UI "
"thread.";
std::string anonymized = AnonymizeMACAddresses(input);
anonymized = AnonymizeCustomPatterns(std::move(anonymized));
return anonymized;
......@@ -375,4 +384,17 @@ std::string AnonymizerTool::AnonymizeCustomPatternWithoutContext(
return result;
}
AnonymizerToolContainer::AnonymizerToolContainer(
scoped_refptr<base::SequencedTaskRunner> task_runner)
: anonymizer_(new AnonymizerTool), task_runner_(task_runner) {}
AnonymizerToolContainer::~AnonymizerToolContainer() {
task_runner_->DeleteSoon(FROM_HERE, std::move(anonymizer_));
}
AnonymizerTool* AnonymizerToolContainer::Get() {
DCHECK(task_runner_->RunsTasksInCurrentSequence());
return anonymizer_.get();
}
} // namespace feedback
......@@ -11,6 +11,9 @@
#include <vector>
#include "base/macros.h"
#include "base/memory/ref_counted.h"
#include "base/sequence_checker.h"
#include "base/sequenced_task_runner.h"
namespace re2 {
class RE2;
......@@ -34,6 +37,8 @@ class AnonymizerTool {
// Returns an anonymized version of |input|. PII-sensitive data (such as MAC
// addresses) in |input| is replaced with unique identifiers.
// This is an expensive operation. Make sure not to execute this on the UI
// thread.
std::string Anonymize(const std::string& input);
private:
......@@ -70,9 +75,33 @@ class AnonymizerTool {
// pattern. Key is the string representation of the RegEx.
std::map<std::string, std::unique_ptr<re2::RE2>> regexp_cache_;
SEQUENCE_CHECKER(sequence_checker_);
DISALLOW_COPY_AND_ASSIGN(AnonymizerTool);
};
// A container for a AnonymizerTool that is thread-safely ref-countable.
// This is useful for a class that wants to post an async anonymization task
// to a background sequence runner and not deal with its own life-cycle ending
// while the AnonymizerTool is busy on another sequence.
class AnonymizerToolContainer
: public base::RefCountedThreadSafe<AnonymizerToolContainer> {
public:
explicit AnonymizerToolContainer(
scoped_refptr<base::SequencedTaskRunner> task_runner);
// Returns a pointer to the instance of this anonymier. May only be called
// on |task_runner_|.
AnonymizerTool* Get();
private:
friend class base::RefCountedThreadSafe<AnonymizerToolContainer>;
~AnonymizerToolContainer();
std::unique_ptr<AnonymizerTool> anonymizer_;
scoped_refptr<base::SequencedTaskRunner> task_runner_;
};
} // namespace feedback
#endif // COMPONENTS_FEEDBACK_ANONYMIZER_TOOL_H_
......@@ -201,4 +201,30 @@ TEST_F(AnonymizerToolTest, AnonymizeCustomPatternWithoutContext) {
&space));
}
TEST_F(AnonymizerToolTest, AnonymizeChunk) {
std::string data =
"aaaaaaaa [SSID=123aaaaaa]aaaaa\n" // SSID.
"aaaaaaaahttp://tets.comaaaaaaa\n" // URL.
"aaaaaemail@example.comaaa\n" // Email address.
"example@@1234\n" // No PII, it is not valid email address.
"255.255.155.255\n" // IP address.
"aaaa123.123.45.4aaa\n" // IP address.
"11:11;11::11\n" // IP address.
"11::11\n" // IP address.
"11:11:abcdef:0:0:0:0:0\n" // No PII.
"aa:aa:aa:aa:aa:aa"; // MAC address (BSSID).
std::string result =
"aaaaaaaa [SSID=1]aaaaa\n"
"aaaaaaaa<URL: 1>\n"
"<email: 1>\n"
"example@@1234\n"
"<IPv4: 1>55\n"
"aaaa<IPv4: 2>aaa\n"
"11:11;<IPv6: 1>\n"
"<IPv6: 1>\n"
"11:11:abcdef:0:0:0:0:0\n"
"aa:aa:aa:00:00:01";
EXPECT_EQ(result, anonymizer_.Anonymize(data));
}
} // namespace feedback
......@@ -9,6 +9,8 @@
#include "base/bind.h"
#include "base/bind_helpers.h"
#include "base/memory/ptr_util.h"
#include "base/task_scheduler/post_task.h"
#include "base/task_scheduler/task_traits.h"
#include "content/public/browser/browser_thread.h"
using content::BrowserThread;
......@@ -34,17 +36,35 @@ bool IsKeyWhitelisted(const std::string& key) {
return false;
}
// Runs the Anonymizer tool over the entris of |response|.
void Anonymize(feedback::AnonymizerTool* anonymizer,
SystemLogsResponse* response) {
for (auto& element : *response) {
if (!IsKeyWhitelisted(element.first))
element.second = anonymizer->Anonymize(element.second);
}
}
} // namespace
SystemLogsFetcher::SystemLogsFetcher(bool scrub_data)
: response_(base::MakeUnique<SystemLogsResponse>()),
num_pending_requests_(0),
task_runner_for_anonymizer_(base::CreateSequencedTaskRunnerWithTraits(
{// User visible because this is called when the user is looking at
// the send feedback dialog, watching a spinner.
base::TaskPriority::USER_VISIBLE,
base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN})),
weak_ptr_factory_(this) {
if (scrub_data)
anonymizer_ = base::MakeUnique<feedback::AnonymizerTool>();
}
SystemLogsFetcher::~SystemLogsFetcher() {}
SystemLogsFetcher::~SystemLogsFetcher() {
// Ensure that destruction happens on same sequence where the object is being
// accessed.
task_runner_for_anonymizer_->DeleteSoon(FROM_HERE, std::move(anonymizer_));
}
void SystemLogsFetcher::AddSource(std::unique_ptr<SystemLogsSource> source) {
data_sources_.emplace_back(std::move(source));
......@@ -73,12 +93,20 @@ void SystemLogsFetcher::OnFetched(
VLOG(1) << "Received SystemLogSource: " << source_name;
if (anonymizer_) {
for (auto& element : *response) {
if (!IsKeyWhitelisted(element.first))
element.second = anonymizer_->Anonymize(element.second);
}
// It is safe to pass the unretained anonymizer_ instance here because
// the anonymizer_ is owned by |this| and |this| only deletes itself
// once all responses have been collected and added (see AddResponse()).
SystemLogsResponse* response_ptr = response.get();
task_runner_for_anonymizer_->PostTaskAndReply(
FROM_HERE,
base::BindOnce(Anonymize, base::Unretained(anonymizer_.get()),
base::Unretained(response_ptr)),
base::BindOnce(&SystemLogsFetcher::AddResponse,
weak_ptr_factory_.GetWeakPtr(), source_name,
std::move(response)));
} else {
AddResponse(source_name, std::move(response));
}
AddResponse(source_name, std::move(response));
}
void SystemLogsFetcher::AddResponse(
......
......@@ -15,6 +15,7 @@
#include "base/callback.h"
#include "base/macros.h"
#include "base/memory/weak_ptr.h"
#include "base/sequenced_task_runner.h"
#include "components/feedback/anonymizer_tool.h"
#include "components/feedback/feedback_common.h"
#include "components/feedback/system_logs/system_logs_source.h"
......@@ -72,6 +73,7 @@ class SystemLogsFetcher {
size_t num_pending_requests_; // The number of callbacks it should get.
std::unique_ptr<feedback::AnonymizerTool> anonymizer_;
scoped_refptr<base::SequencedTaskRunner> task_runner_for_anonymizer_;
base::WeakPtrFactory<SystemLogsFetcher> weak_ptr_factory_;
......
......@@ -232,9 +232,9 @@ ExtensionFunction::ResponseAction FeedbackPrivateReadLogSourceFunction::Run() {
#if defined(OS_CHROMEOS)
void FeedbackPrivateReadLogSourceFunction::OnCompleted(
const feedback_private::ReadLogSourceResult& result) {
std::unique_ptr<feedback_private::ReadLogSourceResult> result) {
Respond(
ArgumentList(feedback_private::ReadLogSource::Results::Create(result)));
ArgumentList(feedback_private::ReadLogSource::Results::Create(*result)));
}
#endif // defined(OS_CHROMEOS)
......
......@@ -117,7 +117,8 @@ class FeedbackPrivateReadLogSourceFunction : public UIThreadExtensionFunction {
#if defined(OS_CHROMEOS)
private:
void OnCompleted(const api::feedback_private::ReadLogSourceResult& result);
void OnCompleted(
std::unique_ptr<api::feedback_private::ReadLogSourceResult> result);
#endif // defined(OS_CHROMEOS)
};
......
......@@ -10,6 +10,8 @@
#include "base/bind.h"
#include "base/memory/ptr_util.h"
#include "base/strings/string_split.h"
#include "base/task_runner_util.h"
#include "base/task_scheduler/post_task.h"
#include "base/time/default_tick_clock.h"
#include "extensions/browser/api/api_resource_manager.h"
#include "extensions/browser/api/extensions_api_client.h"
......@@ -52,12 +54,28 @@ void GetLogLinesFromSystemLogsResponse(const SystemLogsResponse& response,
}
}
// Anonymizes the strings in |result|.
void AnonymizeResults(
scoped_refptr<feedback::AnonymizerToolContainer> anonymizer_container,
ReadLogSourceResult* result) {
feedback::AnonymizerTool* anonymizer = anonymizer_container->Get();
for (std::string& line : result->log_lines)
line = anonymizer->Anonymize(line);
}
} // namespace
LogSourceAccessManager::LogSourceAccessManager(content::BrowserContext* context)
: context_(context),
tick_clock_(std::make_unique<base::DefaultTickClock>()),
anonymizer_(std::make_unique<feedback::AnonymizerTool>()),
task_runner_for_anonymizer_(base::CreateSequencedTaskRunnerWithTraits(
// User visible as the feedback_api is used by the Chrome (OS)
// feedback extension while the user may be looking at a spinner.
{base::TaskPriority::USER_VISIBLE,
base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN})),
anonymizer_container_(
base::MakeRefCounted<feedback::AnonymizerToolContainer>(
task_runner_for_anonymizer_)),
weak_factory_(this) {}
LogSourceAccessManager::~LogSourceAccessManager() {}
......@@ -95,7 +113,8 @@ bool LogSourceAccessManager::FetchFromSource(
// perspective, there is no new data. There is no need for the caller to keep
// track of the time since last access.
if (!UpdateSourceAccessTime(resource_id)) {
callback.Run({});
callback.Run(
std::make_unique<api::feedback_private::ReadLogSourceResult>());
return true;
}
......@@ -117,22 +136,28 @@ void LogSourceAccessManager::OnFetchComplete(
bool delete_resource,
const ReadLogSourceCallback& callback,
std::unique_ptr<SystemLogsResponse> response) {
ReadLogSourceResult result;
auto result = std::make_unique<ReadLogSourceResult>();
// Always return invalid resource ID if there is a cleanup.
result.reader_id = delete_resource ? kInvalidResourceId : resource_id;
result->reader_id = delete_resource ? kInvalidResourceId : resource_id;
GetLogLinesFromSystemLogsResponse(*response, &result.log_lines);
GetLogLinesFromSystemLogsResponse(*response, &result->log_lines);
for (std::string& line : result.log_lines)
line = anonymizer_->Anonymize(line);
// Retrieve result pointer before the PostTaskAndReply to fix issues with
// an undefined execution order of arguments in a function call
// (std::move(result) being executed before result.get()).
ReadLogSourceResult* result_ptr = result.get();
task_runner_for_anonymizer_->PostTaskAndReply(
FROM_HERE,
base::BindOnce(AnonymizeResults, anonymizer_container_,
base::Unretained(result_ptr)),
base::BindOnce(callback, std::move(result)));
if (delete_resource) {
// This should also remove the entry from |sources_|.
ApiResourceManager<LogSourceResource>::Get(context_)->Remove(extension_id,
resource_id);
}
callback.Run(result);
}
void LogSourceAccessManager::RemoveHandle(ResourceId id) {
......
......@@ -14,6 +14,7 @@
#include "base/gtest_prod_util.h"
#include "base/macros.h"
#include "base/memory/weak_ptr.h"
#include "base/sequenced_task_runner.h"
#include "base/time/tick_clock.h"
#include "base/time/time.h"
#include "components/feedback/anonymizer_tool.h"
......@@ -29,8 +30,8 @@ namespace extensions {
// - A source may not be accessed too frequently by an extension.
class LogSourceAccessManager {
public:
using ReadLogSourceCallback =
base::Callback<void(const api::feedback_private::ReadLogSourceResult&)>;
using ReadLogSourceCallback = base::Callback<void(
std::unique_ptr<api::feedback_private::ReadLogSourceResult>)>;
explicit LogSourceAccessManager(content::BrowserContext* context);
~LogSourceAccessManager();
......@@ -152,7 +153,8 @@ class LogSourceAccessManager {
std::unique_ptr<base::TickClock> tick_clock_;
// For removing PII from log strings from log sources.
std::unique_ptr<feedback::AnonymizerTool> anonymizer_;
scoped_refptr<base::SequencedTaskRunner> task_runner_for_anonymizer_;
scoped_refptr<feedback::AnonymizerToolContainer> anonymizer_container_;
base::WeakPtrFactory<LogSourceAccessManager> weak_factory_;
......
......@@ -33,7 +33,7 @@ TEST_F(LogSourceAccessManagerTest, MaxNumberOfOpenLogSourcesSameExtension) {
// Create a dummy callback to pass to FetchFromSource().
LogSourceAccessManager::ReadLogSourceCallback callback =
base::Bind([](const ReadLogSourceResult&) {});
base::Bind([](std::unique_ptr<ReadLogSourceResult>) {});
const std::string extension_id = "extension";
......@@ -84,7 +84,7 @@ TEST_F(LogSourceAccessManagerTest,
// Create a dummy callback to pass to FetchFromSource().
LogSourceAccessManager::ReadLogSourceCallback callback =
base::Bind([](const ReadLogSourceResult&) {});
base::Bind([](std::unique_ptr<ReadLogSourceResult>) {});
int count = 0;
......
......@@ -9,6 +9,7 @@
#include <vector>
#include "base/memory/ref_counted.h"
#include "base/run_loop.h"
#include "base/strings/string_util.h"
#include "base/values.h"
#include "components/feedback/system_logs/system_logs_fetcher.h"
......@@ -46,12 +47,16 @@ class ShellSystemLogsFetcherTest : public ExtensionsTest {
void OnSystemLogsResponse(
std::unique_ptr<system_logs::SystemLogsResponse> response) {
response_ = std::move(response);
wait_for_logs_response_run_loop_.Quit();
}
const system_logs::SystemLogsResponse* response() const {
return response_.get();
}
protected:
base::RunLoop wait_for_logs_response_run_loop_;
private:
std::unique_ptr<system_logs::SystemLogsResponse> response_;
};
......@@ -72,7 +77,9 @@ TEST_F(ShellSystemLogsFetcherTest, TestLogSources) {
fetcher->Fetch(base::Bind(&ShellSystemLogsFetcherTest::OnSystemLogsResponse,
base::Unretained(this)));
EXPECT_TRUE(response());
wait_for_logs_response_run_loop_.Run();
ASSERT_TRUE(response());
EXPECT_LT(0u, response()->at("APPSHELL VERSION").size());
EXPECT_LT(0u, response()->at("OS VERSION").size());
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment