Commit 83a0342c authored by Benoit Lize's avatar Benoit Lize Committed by Commit Bot

tools/cygprofile: Collect multiple profiles.

This adds collections of several "phases" for each process, evenly
spaced. Additional processing will allow to construct a list of startup
only / commonly called functions.

Bug: 758566
Change-Id: I644fc80af42451d62a7d5d71dd0506680de80cae
Reviewed-on: https://chromium-review.googlesource.com/840021
Commit-Queue: Benoit L <lizeb@chromium.org>
Reviewed-by: default avatarMatthew Cary <mattcary@chromium.org>
Cr-Commit-Position: refs/heads/master@{#526004}
parent 772561ba
......@@ -8,10 +8,7 @@
#include <thread>
#include "base/android/library_loader/anchor_functions.h"
#include "base/files/file.h"
#include "base/format_macros.h"
#include "base/logging.h"
#include "base/strings/stringprintf.h"
#include "build/build_config.h"
#include "tools/cygprofile/lightweight_cygprofile.h"
......@@ -34,19 +31,19 @@ class DelayedDumper {
PLOG(FATAL) << "clock_gettime.";
uint64_t start_ns_since_epoch =
static_cast<uint64_t>(ts.tv_sec) * 1000 * 1000 * 1000 + ts.tv_nsec;
int pid = getpid();
std::thread([start_ns_since_epoch]() {
sleep(kDelayInSeconds);
auto path = base::StringPrintf(
"/data/local/tmp/chrome/cyglog/"
"cygprofile-instrumented-code-hitmap-%d-%" PRIu64 ".txt",
getpid(), start_ns_since_epoch);
StopAndDumpToFile(base::FilePath(path));
std::thread([pid, start_ns_since_epoch]() {
sleep(kInitialDelayInSeconds);
while (!SwitchToNextPhaseOrDump(pid, start_ns_since_epoch))
sleep(kDelayInSeconds);
})
.detach();
}
static constexpr int kDelayInSeconds = 30;
static constexpr int kInitialDelayInSeconds =
kPhases == 1 ? kDelayInSeconds : 5;
};
// Static initializer on purpose. Will disable instrumentation after
......
......@@ -32,6 +32,16 @@ constexpr size_t kBitfieldSize = 1 << 22;
constexpr size_t kMaxTextSizeInBytes = kBitfieldSize * (4 * 32);
constexpr size_t kMaxElements = 1 << 20;
// Data required to log reached offsets.
struct LogData {
std::atomic<uint32_t> offsets[kBitfieldSize];
std::atomic<size_t> ordered_offsets[kMaxElements];
std::atomic<size_t> index;
};
LogData g_data[kPhases];
std::atomic<int> g_data_index;
// |RecordAddress()| adds an element to a concurrent bitset and to a concurrent
// append-only list of offsets.
//
......@@ -52,28 +62,13 @@ constexpr size_t kMaxElements = 1 << 20;
// - Capacity of the set is limited by |kMaxElements|.
// - Some insertions at the end of collection may be lost.
// |g_offsets| and |g_ordered_offsets| are allocated in .bss, as
// std::atomic<uint32_t> and std::atomic<size_t> are guarangeed to behave like
// their respective template type parameters with respect to size and
// initialization.
// Having these arrays in .bss simplifies initialization.
std::atomic<uint32_t> g_offsets[kBitfieldSize];
// Non-null iff collection is enabled. Also use to as the pointer to the offset
// array to save a global load in the instrumentation.
std::atomic<std::atomic<uint32_t>*> g_enabled_and_offsets = {g_offsets};
// Ordered list of recorded offsets.
std::atomic<size_t> g_ordered_offsets[kMaxElements];
// Next free slot.
std::atomic<size_t> g_ordered_offsets_index;
// Records that |address| has been reached, if recording is enabled.
// To avoid any risk of infinite recursion, this *must* *never* call any
// instrumented function.
template <bool for_testing>
void RecordAddress(size_t address) {
auto* offsets = g_enabled_and_offsets.load(std::memory_order_relaxed);
if (!offsets)
int index = g_data_index.load(std::memory_order_relaxed);
if (index == kPhases)
return;
const size_t start =
......@@ -89,14 +84,15 @@ void RecordAddress(size_t address) {
size_t offset = address - start;
static_assert(sizeof(int) == 4,
"Collection and processing code assumes that sizeof(int) == 4");
size_t index = offset / 4;
size_t offset_index = offset / 4;
auto* offsets = g_data[index].offsets;
// Atomically set the corresponding bit in the array.
std::atomic<uint32_t>* element = offsets + (index / 32);
std::atomic<uint32_t>* element = offsets + (offset_index / 32);
// First, a racy check. This saves a CAS if the bit is already set, and
// allows the cache line to remain shared acoss CPUs in this case.
uint32_t value = element->load(std::memory_order_relaxed);
uint32_t mask = 1 << (index % 32);
uint32_t mask = 1 << (offset_index % 32);
if (value & mask)
return;
......@@ -108,48 +104,32 @@ void RecordAddress(size_t address) {
// elements list.
// Use relaxed ordering, as the value is not published, or used for
// synchronization.
size_t ordered_offsets_index =
g_ordered_offsets_index.fetch_add(1, std::memory_order_relaxed);
if (UNLIKELY(ordered_offsets_index >= kMaxElements)) {
auto* ordered_offsets = g_data[index].ordered_offsets;
auto& ordered_offsets_index = g_data[index].index;
size_t insertion_index =
ordered_offsets_index.fetch_add(1, std::memory_order_relaxed);
if (UNLIKELY(insertion_index >= kMaxElements)) {
Disable();
LOG(FATAL) << "Too many reached offsets";
}
g_ordered_offsets[ordered_offsets_index].store(offset,
std::memory_order_relaxed);
ordered_offsets[insertion_index].store(offset, std::memory_order_relaxed);
}
} // namespace
void Disable() {
g_enabled_and_offsets.store(nullptr, std::memory_order_relaxed);
std::atomic_thread_fence(std::memory_order_seq_cst);
}
void SanityChecks() {
CHECK_LT(base::android::kEndOfText - base::android::kStartOfText,
kMaxTextSizeInBytes);
base::android::CheckOrderingSanity();
}
// Stops recording, and outputs the data to |path|.
void StopAndDumpToFile(const base::FilePath& path) {
Disable();
auto file = base::File(base::FilePath(path), base::File::FLAG_CREATE_ALWAYS |
base::File::FLAG_WRITE);
void DumpToFile(const base::FilePath& path, const LogData& data) {
auto file =
base::File(path, base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE);
if (!file.IsValid()) {
PLOG(ERROR) << "Could not open " << path;
return;
}
std::atomic_thread_fence(std::memory_order_seq_cst);
// |g_ordered_offset_index| is the index of the next insertion.
size_t count = g_ordered_offsets_index.load(std::memory_order_relaxed) - 1;
size_t count = data.index - 1;
for (size_t i = 0; i < count; i++) {
// |g_ordered_offsets| is initialized to 0, so a 0 in the middle of it
// indicates a case where the index was incremented, but the write is not
// visible in this thread yet. Safe to skip, also because the function at
// the start of text is never called.
auto offset = g_ordered_offsets[i].load(std::memory_order_relaxed);
auto offset = data.ordered_offsets[i].load(std::memory_order_relaxed);
if (!offset)
continue;
auto offset_str = base::StringPrintf("%" PRIuS "\n", offset);
......@@ -158,14 +138,51 @@ void StopAndDumpToFile(const base::FilePath& path) {
}
}
// Stops recording, and outputs the data to |path|.
void StopAndDumpToFile(int pid, uint64_t start_ns_since_epoch) {
Disable();
for (int phase = 0; phase < kPhases; phase++) {
auto path = base::StringPrintf(
"/data/local/tmp/chrome/cyglog/"
"cygprofile-instrumented-code-hitmap-%d-%" PRIu64 ".txt_%d",
pid, start_ns_since_epoch, phase);
DumpToFile(base::FilePath(path), g_data[phase]);
}
}
} // namespace
void Disable() {
g_data_index.store(kPhases, std::memory_order_relaxed);
std::atomic_thread_fence(std::memory_order_seq_cst);
}
void SanityChecks() {
CHECK_LT(base::android::kEndOfText - base::android::kStartOfText,
kMaxTextSizeInBytes);
base::android::CheckOrderingSanity();
}
bool SwitchToNextPhaseOrDump(int pid, uint64_t start_ns_since_epoch) {
int before = g_data_index.fetch_add(1, std::memory_order_relaxed);
if (before + 1 == kPhases) {
StopAndDumpToFile(pid, start_ns_since_epoch);
return true;
}
return false;
}
void ResetForTesting() {
Disable();
g_ordered_offsets_index = 0;
memset(reinterpret_cast<uint32_t*>(g_offsets), 0,
sizeof(uint32_t) * kBitfieldSize);
memset(reinterpret_cast<size_t*>(g_ordered_offsets), 0,
sizeof(size_t) * kMaxElements);
g_enabled_and_offsets.store(g_offsets);
g_data_index = 0;
for (int i = 0; i < kPhases; i++) {
memset(reinterpret_cast<uint32_t*>(g_data[i].offsets), 0,
sizeof(uint32_t) * kBitfieldSize);
memset(reinterpret_cast<uint32_t*>(g_data[i].ordered_offsets), 0,
sizeof(uint32_t) * kMaxElements);
g_data[i].index.store(0);
}
}
void RecordAddressForTesting(size_t address) {
......@@ -174,10 +191,9 @@ void RecordAddressForTesting(size_t address) {
std::vector<size_t> GetOrderedOffsetsForTesting() {
std::vector<size_t> result;
size_t max_index = g_ordered_offsets_index.load(std::memory_order_relaxed);
size_t max_index = g_data[0].index.load(std::memory_order_relaxed);
for (size_t i = 0; i < max_index; ++i) {
auto value = g_ordered_offsets[i].load(std::memory_order_relaxed);
auto value = g_data[0].ordered_offsets[i].load(std::memory_order_relaxed);
if (value)
result.push_back(value);
}
......
......@@ -8,11 +8,8 @@
#include <cstdint>
#include <vector>
namespace base {
class FilePath;
}
namespace cygprofile {
constexpr int kPhases = 1;
constexpr size_t kStartOfTextForTesting = 1000;
constexpr size_t kEndOfTextForTesting = kStartOfTextForTesting + 1000 * 1000;
......@@ -22,8 +19,10 @@ void Disable();
// CHECK()s that the offsets are correctly set up.
void SanityChecks();
// Stops recording, and dump the results to |path|.
void StopAndDumpToFile(const base::FilePath& path);
// Switches to the next recording phase. If called from the last phase, dumps
// the data to disk, and returns |true|. |pid| is the current process pid, and
// |start_ns_since_epoch| the process start timestamp.
bool SwitchToNextPhaseOrDump(int pid, uint64_t start_ns_since_epoch);
// Record an |address|, if recording is enabled. Only for testing.
void RecordAddressForTesting(size_t address);
......
......@@ -234,10 +234,11 @@ def main():
reached_symbols = GetReachedSymbolsFromDumpsAndMaybeWriteOffsets(
sorted_dumps, instrumented_native_lib, args.offsets_output)
logging.info('Reached Symbols = %d', len(reached_symbols))
total_size = sum(s.size for s in reached_symbols)
logging.info('Total reached size = %d', total_size)
matched_in_regular_build = MatchSymbolsInRegularBuild(reached_symbols,
regular_native_lib)
total_size = sum(s.size for s in matched_in_regular_build)
logging.info('Total reached size = %d', total_size)
with open(args.output, 'w') as f:
for s in matched_in_regular_build:
f.write(s.name + '\n')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment