Commit 8d2ee9a7 authored by Jeffrey Kardatzke's avatar Jeffrey Kardatzke Committed by Commit Bot

Consolidate debugd anonymizer into Chrome

This merges all of the functionality that was in the anonymizer in
debugd into the one that is in Chrome. The plan is to remove the one in
debugd because anything run through that anonymizer is also getting run
through the one in Chrome.

Specifically, this changes:
1. Add anonymization of Android storage paths
2. Add preservation of anonymous MACs
3. Change formatting of anonymized MACs

Bug: 921388
Test: Unit tests pass
Change-Id: I1882ac2cbc10a0911257d04188f225ebca52db82
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1726672
Commit-Queue: Jeffrey Kardatzke <jkardatzke@google.com>
Reviewed-by: default avatarAhmed Fakhry <afakhry@chromium.org>
Reviewed-by: default avatarMike Frysinger <vapier@chromium.org>
Cr-Commit-Position: refs/heads/master@{#683285}
parent ffaacd30
......@@ -7,6 +7,7 @@
#include <memory>
#include <utility>
#include "base/files/file_path.h"
#include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
......@@ -351,6 +352,14 @@ bool FindAndConsumeAndGetSkipped(re2::StringPiece* input,
base::size(args));
}
// The following MAC addresses will not be anonymized as they are not specific
// to a device but have general meanings.
const char* const kNonAnonymizedMacAddresses[] = {
"00:00:00:00:00:00", // ARP failure result MAC.
"ff:ff:ff:ff:ff:ff", // Broadcast MAC.
};
constexpr size_t kNumNonAnonymizedMacs = base::size(kNonAnonymizedMacAddresses);
} // namespace
AnonymizerTool::AnonymizerTool(const char* const* first_party_extension_ids)
......@@ -359,6 +368,9 @@ AnonymizerTool::AnonymizerTool(const char* const* first_party_extension_ids)
custom_patterns_without_context_(
base::size(kCustomPatternsWithoutContext)) {
DETACH_FROM_SEQUENCE(sequence_checker_);
// Identity-map these, so we don't mangle them.
for (const char* mac : kNonAnonymizedMacAddresses)
mac_addresses_[mac] = mac;
}
AnonymizerTool::~AnonymizerTool() {
......@@ -371,6 +383,7 @@ std::string AnonymizerTool::Anonymize(const std::string& input) {
<< "This is an expensive operation. Do not execute this on the UI "
"thread.";
std::string anonymized = AnonymizeMACAddresses(input);
anonymized = AnonymizeAndroidAppStoragePaths(std::move(anonymized));
anonymized = AnonymizeCustomPatterns(std::move(anonymized));
return anonymized;
}
......@@ -417,10 +430,9 @@ std::string AnonymizerTool::AnonymizeMACAddresses(const std::string& input) {
if (replacement_mac.empty()) {
// If not found, build up a replacement MAC address by generating a new
// NIC part.
int mac_id = mac_addresses_.size();
replacement_mac = base::StringPrintf(
"%s:%02x:%02x:%02x", oui_string.c_str(), (mac_id & 0x00ff0000) >> 16,
(mac_id & 0x0000ff00) >> 8, (mac_id & 0x000000ff));
int mac_id = mac_addresses_.size() - kNumNonAnonymizedMacs;
replacement_mac = base::StringPrintf("[MAC OUI=%s IFACE=%d]",
oui_string.c_str(), mac_id);
mac_addresses_[mac] = replacement_mac;
}
......@@ -432,6 +444,62 @@ std::string AnonymizerTool::AnonymizeMACAddresses(const std::string& input) {
return result;
}
std::string AnonymizerTool::AnonymizeAndroidAppStoragePaths(
const std::string& input) {
// We only use this on Chrome OS and there's differences in the API for
// FilePath on Windows which prevents this from compiling, so only enable this
// code for Chrome OS.
#if defined(OS_CHROMEOS)
std::string result;
result.reserve(input.size());
// This is for anonymizing 'android_app_storage' output. When the path starts
// either /home/root/<hash>/data/data/<package_name>/ or
// /home/root/<hash>/data/user_de/<number>/<package_name>/, this function will
// anonymize path components following <package_name>/.
RE2* path_re = GetRegExp(
"(?m)(\\t/home/root/[\\da-f]+/android-data/data/"
"(data|user_de/\\d+)/[^/\\n]+)("
"/[^\\n]+)");
// Keep consuming, building up a result string as we go.
re2::StringPiece text(input);
re2::StringPiece skipped, path_prefix, ignored, app_specific;
while (FindAndConsumeAndGetSkipped(&text, *path_re, &skipped, &path_prefix,
&ignored, &app_specific)) {
// We can record these parts as-is.
skipped.AppendToString(&result);
path_prefix.AppendToString(&result);
// |app_specific| has to be anonymized. First, convert it into components,
// and then anonymize each component as follows:
// - If the component has a non-ASCII character, change it to '*'.
// - Otherwise, remove all the characters in the component but the first
// one.
// - If the original component has 2 or more bytes, add '_'.
const base::FilePath path(app_specific.as_string());
std::vector<std::string> components;
path.GetComponents(&components);
DCHECK(!components.empty());
auto it = components.begin() + 1; // ignore the leading slash
for (; it != components.end(); ++it) {
const auto& component = *it;
DCHECK(!component.empty());
result += '/';
result += (base::IsStringASCII(component) ? component[0] : '*');
if (component.length() > 1)
result += '_';
}
}
text.AppendToString(&result);
return result;
#else
return input;
#endif // defined(OS_CHROMEOS)
}
std::string AnonymizerTool::AnonymizeCustomPatterns(std::string input) {
for (size_t i = 0; i < base::size(kCustomPatternsWithContext); i++) {
input =
......
......@@ -50,6 +50,7 @@ class AnonymizerTool {
re2::RE2* GetRegExp(const std::string& pattern);
std::string AnonymizeMACAddresses(const std::string& input);
std::string AnonymizeAndroidAppStoragePaths(const std::string& input);
std::string AnonymizeCustomPatterns(std::string input);
std::string AnonymizeCustomPatternWithContext(
const std::string& input,
......
......@@ -19,6 +19,10 @@ class AnonymizerToolTest : public testing::Test {
return anonymizer_.AnonymizeMACAddresses(input);
}
std::string AnonymizeAndroidAppStoragePaths(const std::string& input) {
return anonymizer_.AnonymizeAndroidAppStoragePaths(input);
}
std::string AnonymizeCustomPatterns(const std::string& input) {
return anonymizer_.AnonymizeCustomPatterns(input);
}
......@@ -46,7 +50,8 @@ TEST_F(AnonymizerToolTest, Anonymize) {
EXPECT_EQ("foo\nbar\n", anonymizer_.Anonymize("foo\nbar\n"));
// Make sure MAC address anonymization is invoked.
EXPECT_EQ("02:46:8a:00:00:01", anonymizer_.Anonymize("02:46:8a:ce:13:57"));
EXPECT_EQ("[MAC OUI=02:46:8a IFACE=1]",
anonymizer_.Anonymize("02:46:8a:ce:13:57"));
// Make sure custom pattern anonymization is invoked.
EXPECT_EQ("Cell ID: '1'", AnonymizeCustomPatterns("Cell ID: 'A1B2'"));
......@@ -72,21 +77,24 @@ TEST_F(AnonymizerToolTest, AnonymizeMACAddresses) {
EXPECT_EQ("", AnonymizeMACAddresses(""));
EXPECT_EQ("foo\nbar\n", AnonymizeMACAddresses("foo\nbar\n"));
EXPECT_EQ("11:22:33:44:55", AnonymizeMACAddresses("11:22:33:44:55"));
EXPECT_EQ("aa:bb:cc:00:00:01", AnonymizeMACAddresses("aa:bb:cc:dd:ee:ff"));
EXPECT_EQ("[MAC OUI=aa:bb:cc IFACE=1]",
AnonymizeMACAddresses("aa:bb:cc:dd:ee:ff"));
EXPECT_EQ("00:00:00:00:00:00", AnonymizeMACAddresses("00:00:00:00:00:00"));
EXPECT_EQ("ff:ff:ff:ff:ff:ff", AnonymizeMACAddresses("ff:ff:ff:ff:ff:ff"));
EXPECT_EQ(
"BSSID: aa:bb:cc:00:00:01 in the middle\n"
"bb:cc:dd:00:00:02 start of line\n"
"end of line aa:bb:cc:00:00:01\n"
"BSSID: [MAC OUI=aa:bb:cc IFACE=1] in the middle\n"
"[MAC OUI=bb:cc:dd IFACE=2] start of line\n"
"end of line [MAC OUI=aa:bb:cc IFACE=1]\n"
"no match across lines aa:bb:cc:\n"
"dd:ee:ff two on the same line:\n"
"x bb:cc:dd:00:00:02 cc:dd:ee:00:00:03 x\n",
"x [MAC OUI=bb:cc:dd IFACE=2] [MAC OUI=cc:dd:ee IFACE=3] x\n",
AnonymizeMACAddresses("BSSID: aa:bb:cc:dd:ee:ff in the middle\n"
"bb:cc:dd:ee:ff:00 start of line\n"
"end of line aa:bb:cc:dd:ee:ff\n"
"no match across lines aa:bb:cc:\n"
"dd:ee:ff two on the same line:\n"
"x bb:cc:dd:ee:ff:00 cc:dd:ee:ff:00:11 x\n"));
EXPECT_EQ("Remember bb:cc:dd:00:00:02?",
EXPECT_EQ("Remember [MAC OUI=bb:cc:dd IFACE=2]?",
AnonymizeMACAddresses("Remember bB:Cc:DD:ee:ff:00?"));
}
......@@ -347,7 +355,7 @@ TEST_F(AnonymizerToolTest, AnonymizeChunk) {
{"::0101:ffff:c0a8:640a", // IP address.
"<IPv6: 27>"},
{"aa:aa:aa:aa:aa:aa", // MAC address (BSSID).
"aa:aa:aa:00:00:01"},
"[MAC OUI=aa:aa:aa IFACE=1]"},
{"chrome://resources/foo", // Secure chrome resource, whitelisted.
"chrome://resources/foo"},
{"chrome://settings/crisper.js", // Whitelisted settings URLs.
......@@ -359,6 +367,14 @@ TEST_F(AnonymizerToolTest, AnonymizeChunk) {
"<URL: 2>"},
{"chrome-extension://nkoccljplnhpfnfiajclkommnmllphnl/foobar.js?bar=x",
"<URL: 3>"}, // Potentially PII in parameter.
#if defined(OS_CHROMEOS) // We only anonymize Android paths on Chrome OS.
// Allowed android storage path.
{"112K\t/home/root/deadbeef1234/android-data/data/system_de",
"112K\t/home/root/deadbeef1234/android-data/data/system_de"},
// Anonymized app-specific storage path.
{"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/de",
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/d_"},
#endif // defined(OS_CHROMEOS)
};
std::string anon_input;
std::string anon_output;
......@@ -369,4 +385,53 @@ TEST_F(AnonymizerToolTest, AnonymizeChunk) {
EXPECT_EQ(anon_output, anonymizer_.Anonymize(anon_input));
}
#if defined(OS_CHROMEOS) // We only anonymize Android paths on Chrome OS.
TEST_F(AnonymizerToolTest, AnonymizeAndroidAppStoragePaths) {
EXPECT_EQ("", AnonymizeAndroidAppStoragePaths(""));
EXPECT_EQ("foo\nbar\n", AnonymizeAndroidAppStoragePaths("foo\nbar\n"));
constexpr char kDuOutput[] =
"112K\t/home/root/deadbeef1234/android-data/data/system_de\n"
// /data/data will be modified by the anonymizer.
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pack.age1/a\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pack.age1/bc\n"
"24K\t/home/root/deadbeef1234/android-data/data/data/pack.age1\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/de\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/de/"
"\xe3\x81\x82\n"
"8.1K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/de/"
"\xe3\x81\x82\xe3\x81\x83\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/ef\n"
"24K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2\n"
// /data/app won't.
"8.0K\t/home/root/deadbeef1234/android-data/data/app/pack.age1/a\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/app/pack.age1/bc\n"
"24K\t/home/root/deadbeef1234/android-data/data/app/pack.age1\n"
// /data/user_de will.
"8.0K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1/a\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1/bc\n"
"24K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1\n"
"78M\t/home/root/deadbeef1234/android-data/data/data\n";
constexpr char kDuOutputRedacted[] =
"112K\t/home/root/deadbeef1234/android-data/data/system_de\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pack.age1/a\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pack.age1/b_\n"
"24K\t/home/root/deadbeef1234/android-data/data/data/pack.age1\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/d_\n"
// The non-ASCII directory names will become '*_'.
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/d_/*_\n"
"8.1K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/d_/*_\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2/e_\n"
"24K\t/home/root/deadbeef1234/android-data/data/data/pa.ckage2\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/app/pack.age1/a\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/app/pack.age1/bc\n"
"24K\t/home/root/deadbeef1234/android-data/data/app/pack.age1\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1/a\n"
"8.0K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1/b_\n"
"24K\t/home/root/deadbeef1234/android-data/data/user_de/0/pack.age1\n"
"78M\t/home/root/deadbeef1234/android-data/data/data\n";
EXPECT_EQ(kDuOutputRedacted, AnonymizeAndroidAppStoragePaths(kDuOutput));
}
#endif // defined(OS_CHROMEOS)
} // namespace feedback
......@@ -207,7 +207,7 @@ TEST_F(FeedbackPrivateApiUnittest, Anonymize) {
EXPECT_TRUE(
RunReadLogSourceFunction(params, &result_reader_id, &result_string));
EXPECT_EQ(*params.reader_id, result_reader_id);
EXPECT_EQ("11:22:33:00:00:01", result_string);
EXPECT_EQ("[MAC OUI=11:22:33 IFACE=1]", result_string);
}
TEST_F(FeedbackPrivateApiUnittest, ReadLogSourceMultipleSources) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment