Commit 8d2ee9a7 authored by Jeffrey Kardatzke's avatar Jeffrey Kardatzke Committed by Commit Bot

Consolidate debugd anonymizer into Chrome

This merges all of the functionality that was in the anonymizer in
debugd into the one that is in Chrome. The plan is to remove the one in
debugd because anything run through that anonymizer is also getting run
through the one in Chrome.

Specifically, this changes:
1. Add anonymization of Android storage paths
2. Add preservation of anonymous MACs
3. Change formatting of anonymized MACs

Bug: 921388
Test: Unit tests pass
Change-Id: I1882ac2cbc10a0911257d04188f225ebca52db82
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1726672
Commit-Queue: Jeffrey Kardatzke <jkardatzke@google.com>
Reviewed-by: default avatarAhmed Fakhry <afakhry@chromium.org>
Reviewed-by: default avatarMike Frysinger <vapier@chromium.org>
Cr-Commit-Position: refs/heads/master@{#683285}
parent ffaacd30
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <memory> #include <memory>
#include <utility> #include <utility>
#include "base/files/file_path.h"
#include "base/strings/strcat.h" #include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h" #include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h" #include "base/strings/string_util.h"
...@@ -351,6 +352,14 @@ bool FindAndConsumeAndGetSkipped(re2::StringPiece* input, ...@@ -351,6 +352,14 @@ bool FindAndConsumeAndGetSkipped(re2::StringPiece* input,
base::size(args)); base::size(args));
} }
// The following MAC addresses will not be anonymized as they are not specific
// to a device but have general meanings.
const char* const kNonAnonymizedMacAddresses[] = {
"00:00:00:00:00:00", // ARP failure result MAC.
"ff:ff:ff:ff:ff:ff", // Broadcast MAC.
};
constexpr size_t kNumNonAnonymizedMacs = base::size(kNonAnonymizedMacAddresses);
} // namespace } // namespace
AnonymizerTool::AnonymizerTool(const char* const* first_party_extension_ids) AnonymizerTool::AnonymizerTool(const char* const* first_party_extension_ids)
...@@ -359,6 +368,9 @@ AnonymizerTool::AnonymizerTool(const char* const* first_party_extension_ids) ...@@ -359,6 +368,9 @@ AnonymizerTool::AnonymizerTool(const char* const* first_party_extension_ids)
custom_patterns_without_context_( custom_patterns_without_context_(
base::size(kCustomPatternsWithoutContext)) { base::size(kCustomPatternsWithoutContext)) {
DETACH_FROM_SEQUENCE(sequence_checker_); DETACH_FROM_SEQUENCE(sequence_checker_);
// Identity-map these, so we don't mangle them.
for (const char* mac : kNonAnonymizedMacAddresses)
mac_addresses_[mac] = mac;
} }
AnonymizerTool::~AnonymizerTool() { AnonymizerTool::~AnonymizerTool() {
...@@ -371,6 +383,7 @@ std::string AnonymizerTool::Anonymize(const std::string& input) { ...@@ -371,6 +383,7 @@ std::string AnonymizerTool::Anonymize(const std::string& input) {
<< "This is an expensive operation. Do not execute this on the UI " << "This is an expensive operation. Do not execute this on the UI "
"thread."; "thread.";
std::string anonymized = AnonymizeMACAddresses(input); std::string anonymized = AnonymizeMACAddresses(input);
anonymized = AnonymizeAndroidAppStoragePaths(std::move(anonymized));
anonymized = AnonymizeCustomPatterns(std::move(anonymized)); anonymized = AnonymizeCustomPatterns(std::move(anonymized));
return anonymized; return anonymized;
} }
...@@ -417,10 +430,9 @@ std::string AnonymizerTool::AnonymizeMACAddresses(const std::string& input) { ...@@ -417,10 +430,9 @@ std::string AnonymizerTool::AnonymizeMACAddresses(const std::string& input) {
if (replacement_mac.empty()) { if (replacement_mac.empty()) {
// If not found, build up a replacement MAC address by generating a new // If not found, build up a replacement MAC address by generating a new
// NIC part. // NIC part.
int mac_id = mac_addresses_.size(); int mac_id = mac_addresses_.size() - kNumNonAnonymizedMacs;
replacement_mac = base::StringPrintf( replacement_mac = base::StringPrintf("[MAC OUI=%s IFACE=%d]",
"%s:%02x:%02x:%02x", oui_string.c_str(), (mac_id & 0x00ff0000) >> 16, oui_string.c_str(), mac_id);
(mac_id & 0x0000ff00) >> 8, (mac_id & 0x000000ff));
mac_addresses_[mac] = replacement_mac; mac_addresses_[mac] = replacement_mac;
} }
...@@ -432,6 +444,62 @@ std::string AnonymizerTool::AnonymizeMACAddresses(const std::string& input) { ...@@ -432,6 +444,62 @@ std::string AnonymizerTool::AnonymizeMACAddresses(const std::string& input) {
return result; return result;
} }
std::string AnonymizerTool::AnonymizeAndroidAppStoragePaths(
const std::string& input) {
// We only use this on Chrome OS and there's differences in the API for
// FilePath on Windows which prevents this from compiling, so only enable this
// code for Chrome OS.
#if defined(OS_CHROMEOS)
std::string result;
result.reserve(input.size());
// This is for anonymizing 'android_app_storage' output. When the path starts
// either /home/root/<hash>/data/data/<package_name>/ or
// /home/root/<hash>/data/user_de/<number>/<package_name>/, this function will
// anonymize path components following <package_name>/.
RE2* path_re = GetRegExp(
"(?m)(\\t/home/root/[\\da-f]+/android-data/data/"
"(data|user_de/\\d+)/[^/\\n]+)("
"/[^\\n]+)");
// Keep consuming, building up a result string as we go.
re2::StringPiece text(input);
re2::StringPiece skipped, path_prefix, ignored, app_specific;
while (FindAndConsumeAndGetSkipped(&text, *path_re, &skipped, &path_prefix,
&ignored, &app_specific)) {
// We can record these parts as-is.
skipped.AppendToString(&result);
path_prefix.AppendToString(&result);
// |app_specific| has to be anonymized. First, convert it into components,
// and then anonymize each component as follows:
// - If the component has a non-ASCII character, change it to '*'.
// - Otherwise, remove all the characters in the component but the first
// one.
// - If the original component has 2 or more bytes, add '_'.
const base::FilePath path(app_specific.as_string());
std::vector<std::string> components;
path.GetComponents(&components);
DCHECK(!components.empty());
auto it = components.begin() + 1; // ignore the leading slash
for (; it != components.end(); ++it) {
const auto& component = *it;
DCHECK(!component.empty());
result += '/';
result += (base::IsStringASCII(component) ? component[0] : '*');
if (component.length() > 1)
result += '_';
}
}
text.AppendToString(&result);
return result;
#else
return input;
#endif // defined(OS_CHROMEOS)
}
std::string AnonymizerTool::AnonymizeCustomPatterns(std::string input) { std::string AnonymizerTool::AnonymizeCustomPatterns(std::string input) {
for (size_t i = 0; i < base::size(kCustomPatternsWithContext); i++) { for (size_t i = 0; i < base::size(kCustomPatternsWithContext); i++) {
input = input =
......
...@@ -50,6 +50,7 @@ class AnonymizerTool { ...@@ -50,6 +50,7 @@ class AnonymizerTool {
re2::RE2* GetRegExp(const std::string& pattern); re2::RE2* GetRegExp(const std::string& pattern);
std::string AnonymizeMACAddresses(const std::string& input); std::string AnonymizeMACAddresses(const std::string& input);
std::string AnonymizeAndroidAppStoragePaths(const std::string& input);
std::string AnonymizeCustomPatterns(std::string input); std::string AnonymizeCustomPatterns(std::string input);
std::string AnonymizeCustomPatternWithContext( std::string AnonymizeCustomPatternWithContext(
const std::string& input, const std::string& input,
......
...@@ -19,6 +19,10 @@ class AnonymizerToolTest : public testing::Test { ...@@ -19,6 +19,10 @@ class AnonymizerToolTest : public testing::Test {
return anonymizer_.AnonymizeMACAddresses(input); return anonymizer_.AnonymizeMACAddresses(input);
} }
std::string AnonymizeAndroidAppStoragePaths(const std::string& input) {
return anonymizer_.AnonymizeAndroidAppStoragePaths(input);
}
std::string AnonymizeCustomPatterns(const std::string& input) { std::string AnonymizeCustomPatterns(const std::string& input) {
return anonymizer_.AnonymizeCustomPatterns(input); return anonymizer_.AnonymizeCustomPatterns(input);
} }
...@@ -46,7 +50,8 @@ TEST_F(AnonymizerToolTest, Anonymize) { ...@@ -46,7 +50,8 @@ TEST_F(AnonymizerToolTest, Anonymize) {
EXPECT_EQ("foo\nbar\n", anonymizer_.Anonymize("foo\nbar\n")); EXPECT_EQ("foo\nbar\n", anonymizer_.Anonymize("foo\nbar\n"));
// Make sure MAC address anonymization is invoked. // Make sure MAC address anonymization is invoked.
EXPECT_EQ("02:46:8a:00:00:01", anonymizer_.Anonymize("02:46:8a:ce:13:57")); EXPECT_EQ("[MAC OUI=02:46:8a IFACE=1]",
anonymizer_.Anonymize("02:46:8a:ce:13:57"));
// Make sure custom pattern anonymization is invoked. // Make sure custom pattern anonymization is invoked.
EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'")); EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'"));
...@@ -72,21 +77,24 @@ TEST_F(AnonymizerToolTest, AnonymizeMACAddresses) { ...@@ -72,21 +77,24 @@ TEST_F(AnonymizerToolTest, AnonymizeMACAddresses) {
EXPECT_EQ("", AnonymizeMACAddresses("")); EXPECT_EQ("", AnonymizeMACAddresses(""));
EXPECT_EQ("foo\nbar\n", AnonymizeMACAddresses("foo\nbar\n")); EXPECT_EQ("foo\nbar\n", AnonymizeMACAddresses("foo\nbar\n"));
EXPECT_EQ("11:22:33:44:55", AnonymizeMACAddresses("11:22:33:44:55")); EXPECT_EQ("11:22:33:44:55", AnonymizeMACAddresses("11:22:33:44:55"));
EXPECT_EQ("aa:bb:cc:00:00:01", AnonymizeMACAddresses("aa:bb:cc:dd:ee:ff")); EXPECT_EQ("[MAC OUI=aa:bb:cc IFACE=1]",
AnonymizeMACAddresses("aa:bb:cc:dd:ee:ff"));
EXPECT_EQ("00:00:00:00:00:00", AnonymizeMACAddresses("00:00:00:00:00:00"));
EXPECT_EQ("ff:ff:ff:ff:ff:ff", AnonymizeMACAddresses("ff:ff:ff:ff:ff:ff"));
EXPECT_EQ( EXPECT_EQ(
"BSSID: aa:bb:cc:00:00:01 in the middle\n" "BSSID: [MAC OUI=aa:bb:cc IFACE=1] in the middle\n"
"bb:cc:dd:00:00:02 start of line\n" "[MAC OUI=bb:cc:dd IFACE=2] start of line\n"
"end of line aa:bb:cc:00:00:01\n" "end of line [MAC OUI=aa:bb:cc IFACE=1]\n"
"no match across lines aa:bb:cc:\n" "no match across lines aa:bb:cc:\n"
"dd:ee:ff two on the same line:\n" "dd:ee:ff two on the same line:\n"
"x bb:cc:dd:00:00:02 cc:dd:ee:00:00:03 x\n", "x [MAC OUI=bb:cc:dd IFACE=2] [MAC OUI=cc:dd:ee IFACE=3] x\n",
AnonymizeMACAddresses("BSSID: aa:bb:cc:dd:ee:ff in the middle\n" AnonymizeMACAddresses("BSSID: aa:bb:cc:dd:ee:ff in the middle\n"
"bb:cc:dd:ee:ff:00 start of line\n" "bb:cc:dd:ee:ff:00 start of line\n"
"end of line aa:bb:cc:dd:ee:ff\n" "end of line aa:bb:cc:dd:ee:ff\n"
"no match across lines aa:bb:cc:\n" "no match across lines aa:bb:cc:\n"
"dd:ee:ff two on the same line:\n" "dd:ee:ff two on the same line:\n"
"x bb:cc:dd:ee:ff:00 cc:dd:ee:ff:00:11 x\n")); "x bb:cc:dd:ee:ff:00 cc:dd:ee:ff:00:11 x\n"));
EXPECT_EQ("Remember bb:cc:dd:00:00:02?", EXPECT_EQ("Remember [MAC OUI=bb:cc:dd IFACE=2]?",
AnonymizeMACAddresses("Remember bB:Cc:DD:ee:ff:00?")); AnonymizeMACAddresses("Remember bB:Cc:DD:ee:ff:00?"));
} }
...@@ -347,7 +355,7 @@ TEST_F(AnonymizerToolTest, AnonymizeChunk) { ...@@ -347,7 +355,7 @@ TEST_F(AnonymizerToolTest, AnonymizeChunk) {
{"::0101:ffff:c0a8:640a", // IP address. {"::0101:ffff:c0a8:640a", // IP address.
"<IPv6: 27>"}, "<IPv6: 27>"},
{"aa:aa:aa:aa:aa:aa", // MAC address (BSSID). {"aa:aa:aa:aa:aa:aa", // MAC address (BSSID).
"aa:aa:aa:00:00:01"}, "[MAC OUI=aa:aa:aa IFACE=1]"},
{"chrome://resources/foo", // Secure chrome resource, whitelisted. {"chrome://resources/foo", // Secure chrome resource, whitelisted.
"chrome://resources/foo"}, "chrome://resources/foo"},
{"chrome://settings/crisper.js", // Whitelisted settings URLs. {"chrome://settings/crisper.js", // Whitelisted settings URLs.
...@@ -359,6 +367,14 @@ TEST_F(AnonymizerToolTest, AnonymizeChunk) { ...@@ -359,6 +367,14 @@ TEST_F(AnonymizerToolTest, AnonymizeChunk) {
"<URL: 2>"}, "<URL: 2>"},
{"chrome-extension://nkoccljplnhpfnfiajclkommnmllphnl/foobar.js?bar=x", {"chrome-extension://nkoccljplnhpfnfiajclkommnmllphnl/foobar.js?bar=x",
"<URL: 3>"}, // Potentially PII in parameter. "<URL: 3>"}, // Potentially PII in parameter.
#if defined(OS_CHROMEOS) // We only anonymize Android paths on Chrome OS.
// Allowed android storage path.
{"112K\t/home/root/deadbeef1234/android-data/data/system_de",
"112K\t/home/root/deadbeef1234/android-data/data/system_de"},
// Anonymized app-specific storage path.
{"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/de",
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/d_"},
#endif // defined(OS_CHROMEOS)
}; };
std::string anon_input; std::string anon_input;
std::string anon_output; std::string anon_output;
...@@ -369,4 +385,53 @@ TEST_F(AnonymizerToolTest, AnonymizeChunk) { ...@@ -369,4 +385,53 @@ TEST_F(AnonymizerToolTest, AnonymizeChunk) {
EXPECT_EQ(anon_output, anonymizer_.Anonymize(anon_input)); EXPECT_EQ(anon_output, anonymizer_.Anonymize(anon_input));
} }
#if defined(OS_CHROMEOS) // We only anonymize Android paths on Chrome OS.
TEST_F(AnonymizerToolTest, AnonymizeAndroidAppStoragePaths) {
EXPECT_EQ("", AnonymizeAndroidAppStoragePaths(""));
EXPECT_EQ("foo\nbar\n", AnonymizeAndroidAppStoragePaths("foo\nbar\n"));
constexpr char kDuOutput[] =
"112K\t/home/root/deadbeef1234/android-data/data/system_de\n"
// /data/data will be modified by the anonymizer.
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pack.age1/a\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pack.age1/bc\n"
"24K\t/home/root/deadbeef1234/android-data/data/data/pack.age1\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/de\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/de/"
"\xe3\x81\x82\n"
"8.1K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/de/"
"\xe3\x81\x82\xe3\x81\x83\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/ef\n"
"24K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2\n"
// /data/app won't.
"8.0K\t/home/root/deadbeef1234/android-data/data/app/pack.age1/a\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/app/pack.age1/bc\n"
"24K\t/home/root/deadbeef1234/android-data/data/app/pack.age1\n"
// /data/user_de will.
"8.0K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1/a\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1/bc\n"
"24K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1\n"
"78M\t/home/root/deadbeef1234/android-data/data/data\n";
constexpr char kDuOutputRedacted[] =
"112K\t/home/root/deadbeef1234/android-data/data/system_de\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pack.age1/a\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pack.age1/b_\n"
"24K\t/home/root/deadbeef1234/android-data/data/data/pack.age1\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/d_\n"
// The non-ASCII directory names will become '*_'.
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/d_/*_\n"
"8.1K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/d_/*_\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/e_\n"
"24K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/app/pack.age1/a\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/app/pack.age1/bc\n"
"24K\t/home/root/deadbeef1234/android-data/data/app/pack.age1\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1/a\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1/b_\n"
"24K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1\n"
"78M\t/home/root/deadbeef1234/android-data/data/data\n";
EXPECT_EQ(kDuOutputRedacted, AnonymizeAndroidAppStoragePaths(kDuOutput));
}
#endif // defined(OS_CHROMEOS)
} // namespace feedback } // namespace feedback
...@@ -207,7 +207,7 @@ TEST_F(FeedbackPrivateApiUnittest, Anonymize) { ...@@ -207,7 +207,7 @@ TEST_F(FeedbackPrivateApiUnittest, Anonymize) {
EXPECT_TRUE( EXPECT_TRUE(
RunReadLogSourceFunction(params, &result_reader_id, &result_string)); RunReadLogSourceFunction(params, &result_reader_id, &result_string));
EXPECT_EQ(*params.reader_id, result_reader_id); EXPECT_EQ(*params.reader_id, result_reader_id);
EXPECT_EQ("11:22:33:00:00:01", result_string); EXPECT_EQ("[MAC OUI=11:22:33 IFACE=1]", result_string);
} }
TEST_F(FeedbackPrivateApiUnittest, ReadLogSourceMultipleSources) { TEST_F(FeedbackPrivateApiUnittest, ReadLogSourceMultipleSources) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment