Commit 1ee3c451 authored by Brian Geffon's avatar Brian Geffon Committed by Commit Bot

CrOS: userspace_swap: Introduce fault requeue on userfaultfd.

When handling userfaultfd faults UFFDIO_COPYRANGE and UFFDIO_ZEROPAGE
can both return -EAGAIN when memory mappings are changing. It will
continue to do so until the event which was causing the change is
read from the userfaultfd. This introduces a simple mechaism for
fault handlers to return false when they couldn't handle a fault
due to a -EAGAIN. Those faults will be queued and redelivered later.

Bug: chromium:1067833
Change-Id: I6ef5ce6ded969be95d320c02bad5e33b91de6a1e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2450702Reviewed-by: default avatarMatthew Denton <mpdenton@chromium.org>
Commit-Queue: Brian Geffon <bgeffon@chromium.org>
Cr-Commit-Position: refs/heads/master@{#814418}
parent 732899e3
...@@ -236,7 +236,7 @@ std::unique_ptr<UserfaultFD> UserfaultFD::Create(Features features) { ...@@ -236,7 +236,7 @@ std::unique_ptr<UserfaultFD> UserfaultFD::Create(Features features) {
#endif #endif
} }
void UserfaultFD::DispatchMessage(const uffd_msg& msg) { bool UserfaultFD::DispatchMessage(const uffd_msg& msg) {
#if defined(HAS_USERFAULTFD) #if defined(HAS_USERFAULTFD)
if (msg.event == UFFD_EVENT_UNMAP) { if (msg.event == UFFD_EVENT_UNMAP) {
handler_->Unmapped(msg.arg.remove.start, msg.arg.remove.end); handler_->Unmapped(msg.arg.remove.start, msg.arg.remove.end);
...@@ -245,25 +245,41 @@ void UserfaultFD::DispatchMessage(const uffd_msg& msg) { ...@@ -245,25 +245,41 @@ void UserfaultFD::DispatchMessage(const uffd_msg& msg) {
} else if (msg.event == UFFD_EVENT_REMAP) { } else if (msg.event == UFFD_EVENT_REMAP) {
handler_->Remapped(msg.arg.remap.from, msg.arg.remap.to, msg.arg.remap.len); handler_->Remapped(msg.arg.remap.from, msg.arg.remap.to, msg.arg.remap.len);
} else if (msg.event == UFFD_EVENT_PAGEFAULT) { } else if (msg.event == UFFD_EVENT_PAGEFAULT) {
handler_->Pagefault(msg.arg.pagefault.address, pending_faults_.push_back(msg);
msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE
? UserfaultFDHandler::PagefaultFlags::kWriteFault
: UserfaultFDHandler::PagefaultFlags::kReadFault,
msg.arg.pagefault.feat.ptid);
} else { } else {
DLOG(ERROR) << "Unknown userfaultfd event: " << msg.event; DLOG(ERROR) << "Unknown userfaultfd event: " << msg.event;
} }
return DrainPendingFaults();
#endif #endif
return true;
}
bool UserfaultFD::DrainPendingFaults() {
while (!pending_faults_.empty()) {
const uffd_msg& pending_fault = pending_faults_.front();
CHECK(pending_fault.event == UFFD_EVENT_PAGEFAULT);
if (!handler_->Pagefault(
pending_fault.arg.pagefault.address,
pending_fault.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE
? UserfaultFDHandler::PagefaultFlags::kWriteFault
: UserfaultFDHandler::PagefaultFlags::kReadFault,
pending_fault.arg.pagefault.feat.ptid)) {
// It'll get retried later (it wasn't popped).
return false;
}
// And we successfully handled it, let's remove it and move along.
pending_faults_.pop_front();
}
return true;
} }
void UserfaultFD::UserfaultFDReadable() { void UserfaultFD::UserfaultFDReadable() {
#if defined(HAS_USERFAULTFD) #if defined(HAS_USERFAULTFD)
base::ScopedBlockingCall scoped_blocking_call(FROM_HERE, base::ScopedBlockingCall scoped_blocking_call(FROM_HERE,
base::BlockingType::WILL_BLOCK); base::BlockingType::WILL_BLOCK);
constexpr int kMsgsToRead = 32; // Try to read up to 32 messages in one shot. uffd_msg msg;
uffd_msg msgs[kMsgsToRead];
int msgs_read = 0;
// It's very important that messages are posted in the order they were read // It's very important that messages are posted in the order they were read
// otherwise, if another thread also attempted to handle the // otherwise, if another thread also attempted to handle the
...@@ -272,20 +288,23 @@ void UserfaultFD::UserfaultFDReadable() { ...@@ -272,20 +288,23 @@ void UserfaultFD::UserfaultFDReadable() {
base::ReleasableAutoLock read_locker(&read_lock_); base::ReleasableAutoLock read_locker(&read_lock_);
do { do {
memset(msgs, 0, sizeof(msgs)); memset(&msg, 0, sizeof(msg));
// We start by draining all messages and then we process them in order. // We start by draining all messages and then we process them in order.
int bytes_read = HANDLE_EINTR(read(fd_.get(), msgs, sizeof(msgs))); int bytes_read = HANDLE_EINTR(read(fd_.get(), &msg, sizeof(msg)));
if (bytes_read <= 0) { if (bytes_read <= 0) {
if (errno == EAGAIN) {
// No problems here.
return;
}
// We either got an EOF or an EBADF to indicate that we're done. // We either got an EOF or an EBADF to indicate that we're done.
if (errno == EBADF || bytes_read == 0) { if (bytes_read == 0 || errno == EBADF) {
handler_->Closed(0); // EBADF will indicate closed at this point. handler_->Closed(0); // EBADF will indicate closed at this point.
} else if (errno == EWOULDBLOCK) {
// No problems here.
// But make sure before we exit this loop that if there are still queued
// messages that we attempt to drain them.
DrainPendingFaults();
return;
} else { } else {
PLOG(ERROR) << "Userfaultfd encountered an expected error"; PLOG(ERROR) << "Userfaultfd encountered an expected error";
handler_->Closed(errno); handler_->Closed(errno);
...@@ -296,15 +315,9 @@ void UserfaultFD::UserfaultFDReadable() { ...@@ -296,15 +315,9 @@ void UserfaultFD::UserfaultFDReadable() {
} }
// Partial reads CANNOT happen. // Partial reads CANNOT happen.
CHECK_EQ(bytes_read % sizeof(msgs[0]), 0u); CHECK_EQ(bytes_read, static_cast<int>(sizeof(msg)));
DispatchMessage(msg);
msgs_read = bytes_read / sizeof(msgs[0]); } while (true);
DCHECK(msgs_read);
for (int i = 0; i < msgs_read; ++i) {
DispatchMessage(msgs[i]);
}
} while (msgs_read == kMsgsToRead);
#endif #endif
} }
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#ifndef CHROMEOS_MEMORY_USERSPACE_SWAP_USERFAULTFD_H_ #ifndef CHROMEOS_MEMORY_USERSPACE_SWAP_USERFAULTFD_H_
#define CHROMEOS_MEMORY_USERSPACE_SWAP_USERFAULTFD_H_ #define CHROMEOS_MEMORY_USERSPACE_SWAP_USERFAULTFD_H_
#include <list>
#include <memory> #include <memory>
#include "base/files/file_descriptor_watcher_posix.h" #include "base/files/file_descriptor_watcher_posix.h"
...@@ -23,8 +24,22 @@ namespace userspace_swap { ...@@ -23,8 +24,22 @@ namespace userspace_swap {
// receive depend on the features used when the userfaultfd is created. It's // receive depend on the features used when the userfaultfd is created. It's
// always safe to use the UserfaultFD class associated with this handler during // always safe to use the UserfaultFD class associated with this handler during
// a callback, you're guaranteed that a UserfaultFD will always outlive its // a callback, you're guaranteed that a UserfaultFD will always outlive its
// associated handler. This guarantee exists because the UserfaultFD will join // associated handler.
// the fault handling thread before completing destruction. //
// For the purpose of a fault handler we will refer to the events received as
// two different types: pagefault events and non-pagefault events. You're
// guaranteed that pagefault events and non-pagefault events will all be
// delivered in order with respect to their group. Page fault events that
// couldn't be handled will be redelivered later until they are able to be
// handled. This isn't a problem, because when responding to a pagefault event
// any attempt to resolve it using CopyToRange or ZeroRange would fail if PTEs
// already exist or the mapping is gone. For those reasons it's important that
// you always read and handle those non-pagefault events before pagefaults. A
// more concrete example of this would be: suppose you had a MADV_DONTNEED
// racing with a pagefault to be handled. If you were to process the pagefault
// first before observing the Remove event you could potentially restore stale
// memory when the correct action after the MADV_DONTNEED would be to zero the
// range.
class CHROMEOS_EXPORT UserfaultFDHandler { class CHROMEOS_EXPORT UserfaultFDHandler {
public: public:
// PagefaultFlags are passed in the Pagefault Handler. // PagefaultFlags are passed in the Pagefault Handler.
...@@ -39,7 +54,12 @@ class CHROMEOS_EXPORT UserfaultFDHandler { ...@@ -39,7 +54,12 @@ class CHROMEOS_EXPORT UserfaultFDHandler {
// Finally if kFeatureThreadID is set when the UserfaultFD is created, |tid| // Finally if kFeatureThreadID is set when the UserfaultFD is created, |tid|
// will be set to the thread id that caused the fault, otherwise it will be // will be set to the thread id that caused the fault, otherwise it will be
// zero. // zero.
virtual void Pagefault(uintptr_t fault_address, //
// The implementation is responsible for returning true or false, when true
// is return it means the fault was handled, when false is returned the fault
// will be retried later. The reason for this is you cannot resolve a fault
// while mappings are changing.
virtual bool Pagefault(uintptr_t fault_address,
PagefaultFlags fault_flags, PagefaultFlags fault_flags,
base::PlatformThreadId tid) = 0; base::PlatformThreadId tid) = 0;
...@@ -91,7 +111,7 @@ class CHROMEOS_EXPORT UserfaultFD { ...@@ -91,7 +111,7 @@ class CHROMEOS_EXPORT UserfaultFD {
kFeatureRemove = 1 << 2, kFeatureRemove = 1 << 2,
// kFeatureThreadID will cause Pagefault callbacks to include the faulting // kFeatureThreadID will cause Pagefault callbacks to include the faulting
// thread id. // thread id.
kFeatureThreadID = 1 << 3 kFeatureThreadID = 1 << 3,
}; };
// Note: Although it's documented UFFDIO_REGISTER_MDOE_WP is not actually // Note: Although it's documented UFFDIO_REGISTER_MDOE_WP is not actually
...@@ -166,7 +186,16 @@ class CHROMEOS_EXPORT UserfaultFD { ...@@ -166,7 +186,16 @@ class CHROMEOS_EXPORT UserfaultFD {
void UserfaultFDReadable(); void UserfaultFDReadable();
void DispatchMessage(const uffd_msg& msg); bool DispatchMessage(const uffd_msg& msg);
// DrainPendingFaults will attempt to deliver any pending fault messages.
bool DrainPendingFaults();
// Because userfaultfd will return -EAGAIN when the memory maps are changing
// until the remap, unmap, or remove message has been read off the userfaultfd
// we provide a mechanism for users to re-enque the fault to be delivered
// again.
std::list<uffd_msg> pending_faults_;
// We need to make sure messages are read and posted in order so we prevent // We need to make sure messages are read and posted in order so we prevent
// two different threads from simultaenously reading and posting. // two different threads from simultaenously reading and posting.
......
...@@ -152,7 +152,7 @@ class MockUserfaultFDHandler : public UserfaultFDHandler { ...@@ -152,7 +152,7 @@ class MockUserfaultFDHandler : public UserfaultFDHandler {
~MockUserfaultFDHandler() override = default; ~MockUserfaultFDHandler() override = default;
MOCK_METHOD3(Pagefault, MOCK_METHOD3(Pagefault,
void(uintptr_t fault_address, bool(uintptr_t fault_address,
PagefaultFlags flags, PagefaultFlags flags,
base::PlatformThreadId tid)); base::PlatformThreadId tid));
MOCK_METHOD2(Unmapped, void(uintptr_t range_start, uintptr_t range_end)); MOCK_METHOD2(Unmapped, void(uintptr_t range_start, uintptr_t range_end));
...@@ -224,6 +224,54 @@ TEST_F(UserfaultFDTest, SimpleZeroPageReadFault) { ...@@ -224,6 +224,54 @@ TEST_F(UserfaultFDTest, SimpleZeroPageReadFault) {
UserfaultFDHandler::PagefaultFlags, UserfaultFDHandler::PagefaultFlags,
base::PlatformThreadId) { base::PlatformThreadId) {
HandleWithZeroRange(uffd_ptr, fault_address, kPageSize); HandleWithZeroRange(uffd_ptr, fault_address, kPageSize);
return true;
}));
ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler)));
base::RunLoop run_loop;
ExecuteOffMainThread([&]() {
// Now generate a page fault by reading
// from the page, this will invoke our
// Pagefault handler above which will
// zero fill the page for us.
EXPECT_EQ(*static_cast<int*>(mem), 0);
run_loop.Quit();
});
run_loop.Run();
}
// This test validates that when a fault handler returns false the fault will be
// re-enqued and redelivered later.
TEST_F(UserfaultFDTest, SimpleZeroPageReadFaultRetry) {
ASSERT_TRUE(CreateUffd());
std::unique_ptr<StrictMock<MockUserfaultFDHandler>> handler(
new StrictMock<MockUserfaultFDHandler>);
/* Create a simple mapping with no PTEs */
ScopedMemory mem(kPageSize);
ASSERT_TRUE(mem.is_valid());
ASSERT_TRUE(uffd_->RegisterRange(UserfaultFD::RegisterMode::kRegisterMissing,
mem, kPageSize));
// The first fault handle will return false, the second will return true.
auto* uffd_ptr = uffd_.get();
EXPECT_CALL(*handler,
Pagefault(static_cast<uintptr_t>(mem),
UserfaultFDHandler::PagefaultFlags::kReadFault,
/* we didn't register for tids */ 0))
.WillOnce(
Invoke([](uintptr_t fault_address, UserfaultFDHandler::PagefaultFlags,
base::PlatformThreadId) { return false; }))
.WillOnce(Invoke([uffd_ptr](uintptr_t fault_address,
UserfaultFDHandler::PagefaultFlags,
base::PlatformThreadId) {
HandleWithZeroRange(uffd_ptr, fault_address, kPageSize);
return true;
})); }));
ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler))); ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler)));
...@@ -273,6 +321,7 @@ TEST_F(UserfaultFDTest, SimpleZeroPageReadFaultWithTid) { ...@@ -273,6 +321,7 @@ TEST_F(UserfaultFDTest, SimpleZeroPageReadFaultWithTid) {
UserfaultFDHandler::PagefaultFlags, UserfaultFDHandler::PagefaultFlags,
base::PlatformThreadId) { base::PlatformThreadId) {
HandleWithZeroRange(uffd_ptr, fault_address, kPageSize); HandleWithZeroRange(uffd_ptr, fault_address, kPageSize);
return true;
})); }));
ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler))); ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler)));
...@@ -317,6 +366,7 @@ TEST_F(UserfaultFDTest, SimpleZeroPageWriteFault) { ...@@ -317,6 +366,7 @@ TEST_F(UserfaultFDTest, SimpleZeroPageWriteFault) {
UserfaultFDHandler::PagefaultFlags, UserfaultFDHandler::PagefaultFlags,
base::PlatformThreadId) { base::PlatformThreadId) {
HandleWithZeroRange(uffd_ptr, fault_address, kPageSize); HandleWithZeroRange(uffd_ptr, fault_address, kPageSize);
return true;
})); }));
ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler))); ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler)));
...@@ -371,6 +421,7 @@ TEST_F(UserfaultFDTest, SimpleReadFaultResolveWithCopyPage) { ...@@ -371,6 +421,7 @@ TEST_F(UserfaultFDTest, SimpleReadFaultResolveWithCopyPage) {
uintptr_t) { uintptr_t) {
HandleWithCopyRange(uffd_ptr, fault_address, HandleWithCopyRange(uffd_ptr, fault_address,
reinterpret_cast<uintptr_t>(buf.data()), kPageSize); reinterpret_cast<uintptr_t>(buf.data()), kPageSize);
return true;
})); }));
ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler))); ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler)));
...@@ -423,6 +474,7 @@ TEST_F(UserfaultFDTest, ReadFaultResolveWithCopyPageForMultiplePages) { ...@@ -423,6 +474,7 @@ TEST_F(UserfaultFDTest, ReadFaultResolveWithCopyPageForMultiplePages) {
HandleWithCopyRange(uffd_ptr, fault_address, HandleWithCopyRange(uffd_ptr, fault_address,
reinterpret_cast<uintptr_t>(buf.data()), reinterpret_cast<uintptr_t>(buf.data()),
kRegionSize); kRegionSize);
return true;
})); }));
ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler))); ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler)));
...@@ -487,6 +539,7 @@ TEST_F(UserfaultFDTest, ...@@ -487,6 +539,7 @@ TEST_F(UserfaultFDTest,
HandleWithCopyRange(uffd_ptr, fault_address, HandleWithCopyRange(uffd_ptr, fault_address,
reinterpret_cast<uintptr_t>(pg_fill_buf.data()), reinterpret_cast<uintptr_t>(pg_fill_buf.data()),
kPageSize); kPageSize);
return true;
})); }));
ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler))); ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler)));
...@@ -558,6 +611,7 @@ TEST_F(UserfaultFDTest, ReadFaultRegisteredOnPartialRange) { ...@@ -558,6 +611,7 @@ TEST_F(UserfaultFDTest, ReadFaultRegisteredOnPartialRange) {
HandleWithCopyRange(uffd_ptr, fault_address, HandleWithCopyRange(uffd_ptr, fault_address,
reinterpret_cast<uintptr_t>(pg_fill_buf.data()), reinterpret_cast<uintptr_t>(pg_fill_buf.data()),
kPageSize); kPageSize);
return true;
})); }));
ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler))); ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler)));
...@@ -640,6 +694,7 @@ TEST_F(UserfaultFDTest, WriteFaultRegisteredOnPartialRange) { ...@@ -640,6 +694,7 @@ TEST_F(UserfaultFDTest, WriteFaultRegisteredOnPartialRange) {
HandleWithCopyRange(uffd_ptr, fault_address, HandleWithCopyRange(uffd_ptr, fault_address,
reinterpret_cast<uintptr_t>(pg_fill_buf.data()), reinterpret_cast<uintptr_t>(pg_fill_buf.data()),
kPageSize); kPageSize);
return true;
})); }));
ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler))); ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler)));
...@@ -994,6 +1049,7 @@ TEST_F(UserfaultFDTest, RemapAndFaultAtNewAddress) { ...@@ -994,6 +1049,7 @@ TEST_F(UserfaultFDTest, RemapAndFaultAtNewAddress) {
UserfaultFDHandler::PagefaultFlags, UserfaultFDHandler::PagefaultFlags,
base::PlatformThreadId) { base::PlatformThreadId) {
HandleWithZeroRange(uffd_ptr, fault_address, kPageSize); HandleWithZeroRange(uffd_ptr, fault_address, kPageSize);
return true;
})); }));
// And because the userfaultfd is attached to the VMA when it's remapped and // And because the userfaultfd is attached to the VMA when it's remapped and
...@@ -1006,6 +1062,7 @@ TEST_F(UserfaultFDTest, RemapAndFaultAtNewAddress) { ...@@ -1006,6 +1062,7 @@ TEST_F(UserfaultFDTest, RemapAndFaultAtNewAddress) {
UserfaultFDHandler::PagefaultFlags, UserfaultFDHandler::PagefaultFlags,
base::PlatformThreadId) { base::PlatformThreadId) {
HandleWithZeroRange(uffd_ptr, fault_address, kPageSize); HandleWithZeroRange(uffd_ptr, fault_address, kPageSize);
return true;
})); }));
ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler))); ASSERT_TRUE(uffd_->StartWaitingForEvents(std::move(handler)));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment