Reduce aggressive WebGPU command flushing

Instead of eagerly flushing commands whenever there is a pending callback, enqueue a microtask if one hasn't been already, to perform a flush if it is still needed. This optimization prevents excess IPC flushes. Bug: none Change-Id: Icc48978fdc5d6a54ef61f9379bb4c78d3c75e08a Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2380084Reviewed-by: Kai Ninomiya <kainino@chromium.org> Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Austin Eng <enga@chromium.org> Cr-Commit-Position: refs/heads/master@{#803560}

Reduce aggressive WebGPU command flushing
Instead of eagerly flushing commands whenever there is a pending callback, enqueue a microtask if one hasn't been already, to perform a flush if it is still needed. This optimization prevents excess IPC flushes. Bug: none Change-Id: Icc48978fdc5d6a54ef61f9379bb4c78d3c75e08a Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2380084Reviewed-by: Kai Ninomiya <kainino@chromium.org> Reviewed-by: Corentin Wallez <cwallez@chromium.org> Commit-Queue: Austin Eng <enga@chromium.org> Cr-Commit-Position: refs/heads/master@{#803560}
0786654a · Austin Eng · Commit Bot · 21401320 · 0786654a · 0786654a
Commit 0786654a authored Sep 01, 2020 by Austin Eng Committed by Commit Bot Sep 01, 2020
11 changed files
--- a/gpu/command_buffer/client/webgpu_implementation.cc
+++ b/gpu/command_buffer/client/webgpu_implementation.cc
@@ -141,12 +141,20 @@ bool WebGPUCommandSerializer::Flush() {
                          c2s_buffer_.offset(), c2s_put_offset_);
    c2s_put_offset_ = 0;
    c2s_buffer_.Release();
+    client_awaiting_flush_ = false;
  }

  memory_transfer_service_->FreeHandlesPendingToken(helper_->InsertToken());
  return true;
 }

+void WebGPUCommandSerializer::SetClientAwaitingFlush(bool awaiting_flush) {
+  // If awaiting_flush is true, but the c2s_buffer_ is invalid (empty), that
+  // means the last command right before this caused a flush. Another flush is
+  // not needed.
+  client_awaiting_flush_ = awaiting_flush && c2s_buffer_.valid();
+}
+
 void WebGPUCommandSerializer::HandleGpuControlLostContext() {
  // Immediately forget pending commands.
  c2s_buffer_.Discard();
@@ -572,6 +580,44 @@ void WebGPUImplementation::FlushCommands() {
  helper_->Flush();
 }

+void WebGPUImplementation::EnsureAwaitingFlush(
+    DawnDeviceClientID device_client_id,
+    bool* needs_flush) {
+#if BUILDFLAG(USE_DAWN)
+  WebGPUCommandSerializer* command_serializer =
+      GetCommandSerializerWithDeviceClientID(device_client_id);
+  DCHECK(command_serializer);
+
+  // If there is already a flush waiting, we don't need to flush.
+  // We only want to set |needs_flush| on state transition from
+  // false -> true.
+  if (command_serializer->ClientAwaitingFlush()) {
+    *needs_flush = false;
+    return;
+  }
+
+  // Set the state to waiting for flush, and then write |needs_flush|.
+  // Could still be false if there's no data to flush.
+  command_serializer->SetClientAwaitingFlush(true);
+  *needs_flush = command_serializer->ClientAwaitingFlush();
+#else
+  *needs_flush = false;
+#endif
+}
+
+void WebGPUImplementation::FlushAwaitingCommands(
+    DawnDeviceClientID device_client_id) {
+#if BUILDFLAG(USE_DAWN)
+  WebGPUCommandSerializer* command_serializer =
+      GetCommandSerializerWithDeviceClientID(device_client_id);
+  DCHECK(command_serializer);
+  if (command_serializer->ClientAwaitingFlush()) {
+    command_serializer->Flush();
+    helper_->Flush();
+  }
+#endif
+}
+
 WGPUDevice WebGPUImplementation::GetDevice(
    DawnDeviceClientID device_client_id) {
 #if BUILDFLAG(USE_DAWN)

--- a/gpu/command_buffer/client/webgpu_implementation.h
+++ b/gpu/command_buffer/client/webgpu_implementation.h
@@ -47,6 +47,9 @@ class WebGPUCommandSerializer final : public dawn_wire::CommandSerializer {
  void* GetCmdSpace(size_t size) final;
  bool Flush() final;

+  void SetClientAwaitingFlush(bool awaiting_flush);
+  bool ClientAwaitingFlush() const { return client_awaiting_flush_; }
+
  // Called upon context lost.
  void HandleGpuControlLostContext();

@@ -66,6 +69,8 @@ class WebGPUCommandSerializer final : public dawn_wire::CommandSerializer {
  uint32_t c2s_put_offset_ = 0;
  std::unique_ptr<TransferBuffer> c2s_transfer_buffer_;
  ScopedTransferBufferPtr c2s_buffer_;
+
+  bool client_awaiting_flush_ = false;
 };
 #endif

@@ -153,6 +158,9 @@ class WEBGPU_EXPORT WebGPUImplementation final : public WebGPUInterface,
  // WebGPUInterface implementation
  const DawnProcTable& GetProcs() const override;
  void FlushCommands() override;
+  void EnsureAwaitingFlush(DawnDeviceClientID device_client_id,
+                           bool* needs_flush) override;
+  void FlushAwaitingCommands(DawnDeviceClientID device_client_id) override;
  WGPUDevice GetDevice(DawnDeviceClientID device_client_id) override;
  ReservedTexture ReserveTexture(DawnDeviceClientID device_client_id) override;
  bool RequestAdapterAsync(

--- a/gpu/command_buffer/client/webgpu_interface.h
+++ b/gpu/command_buffer/client/webgpu_interface.h
@@ -28,7 +28,21 @@ class WebGPUInterface : public InterfaceBase {
  virtual ~WebGPUInterface() {}

  virtual const DawnProcTable& GetProcs() const = 0;
+
+  // Flush all commands.
  virtual void FlushCommands() = 0;
+
+  // Ensure the awaiting flush flag is set on the device client. Returns false
+  // if a flush has already been indicated, or a flush is not needed (there may
+  // be no commands to flush). Returns true if the caller should schedule a
+  // flush.
+  virtual void EnsureAwaitingFlush(DawnDeviceClientID device_client_id,
+                                   bool* needs_flush) = 0;
+
+  // If the awaiting flush flag is set, flushes commands. Otherwise, does
+  // nothing.
+  virtual void FlushAwaitingCommands(DawnDeviceClientID device_client_id) = 0;
+
  virtual WGPUDevice GetDevice(DawnDeviceClientID device_client_id) = 0;
  virtual ReservedTexture ReserveTexture(
      DawnDeviceClientID device_client_id) = 0;

--- a/gpu/command_buffer/client/webgpu_interface_stub.cc
+++ b/gpu/command_buffer/client/webgpu_interface_stub.cc
@@ -23,6 +23,11 @@ const DawnProcTable& WebGPUInterfaceStub::GetProcs() const {
  return null_procs_;
 }
 void WebGPUInterfaceStub::FlushCommands() {}
+void WebGPUInterfaceStub::EnsureAwaitingFlush(
+    DawnDeviceClientID device_client_id,
+    bool* needs_flush) {}
+void WebGPUInterfaceStub::FlushAwaitingCommands(
+    DawnDeviceClientID device_client_id) {}
 WGPUDevice WebGPUInterfaceStub::GetDevice(DawnDeviceClientID device_client_id) {
  return nullptr;
 }

--- a/gpu/command_buffer/client/webgpu_interface_stub.h
+++ b/gpu/command_buffer/client/webgpu_interface_stub.h
@@ -25,6 +25,9 @@ class WebGPUInterfaceStub : public WebGPUInterface {
  // WebGPUInterface implementation
  const DawnProcTable& GetProcs() const override;
  void FlushCommands() override;
+  void EnsureAwaitingFlush(DawnDeviceClientID device_client_id,
+                           bool* needs_flush) override;
+  void FlushAwaitingCommands(DawnDeviceClientID device_client_id) override;
  WGPUDevice GetDevice(DawnDeviceClientID device_client_id) override;
  ReservedTexture ReserveTexture(DawnDeviceClientID device_client_id) override;
  bool RequestAdapterAsync(

--- a/third_party/blink/renderer/modules/webgpu/dawn_object.cc
+++ b/third_party/blink/renderer/modules/webgpu/dawn_object.cc
@@ -6,6 +6,7 @@

 #include "gpu/command_buffer/client/webgpu_interface.h"
 #include "third_party/blink/renderer/modules/webgpu/gpu_device.h"
+#include "third_party/blink/renderer/platform/bindings/microtask.h"

 namespace blink {

@@ -62,6 +63,26 @@ uint64_t DeviceTreeObject::GetDeviceClientID() const {
  return device_client_serializer_holder_->device_client_id_;
 }

+void DeviceTreeObject::EnsureFlush() {
+  bool needs_flush = false;
+  GetInterface()->EnsureAwaitingFlush(
+      device_client_serializer_holder_->device_client_id_, &needs_flush);
+  if (!needs_flush) {
+    // We've already enqueued a task to flush, or the command buffer
+    // is empty. Do nothing.
+    return;
+  }
+  Microtask::EnqueueMicrotask(WTF::Bind(
+      [](scoped_refptr<DawnDeviceClientSerializerHolder> holder) {
+        if (holder->dawn_control_client_->IsDestroyed()) {
+          return;
+        }
+        holder->dawn_control_client_->GetInterface()->FlushAwaitingCommands(
+            holder->device_client_id_);
+      },
+      device_client_serializer_holder_));
+}
+
 DawnObjectImpl::DawnObjectImpl(GPUDevice* device)
    : DeviceTreeObject(device->GetDeviceClientSerializerHolder()),
      device_(device) {}

--- a/third_party/blink/renderer/modules/webgpu/dawn_object.h
+++ b/third_party/blink/renderer/modules/webgpu/dawn_object.h
@@ -88,6 +88,10 @@ class DeviceTreeObject {

  uint64_t GetDeviceClientID() const;

+  // Ensure commands up until now on this object's parent device are flushed by
+  // the end of the task.
+  void EnsureFlush();
+
 protected:
  scoped_refptr<DawnDeviceClientSerializerHolder>
      device_client_serializer_holder_;

--- a/third_party/blink/renderer/modules/webgpu/gpu_buffer.cc
+++ b/third_party/blink/renderer/modules/webgpu/gpu_buffer.cc
@@ -197,10 +197,9 @@ ScriptPromise GPUBuffer::MapAsyncImpl(ScriptState* script_state,
  GetProcs().bufferMapAsync(GetHandle(), mode, map_offset, map_size,
                            callback->UnboundCallback(),
                            callback->AsUserdata());
-  // WebGPU guarantees callbacks complete in finite time. Flush now so that
-  // commands reach the GPU process.
-  device_->GetInterface()->FlushCommands();
-
+  // WebGPU guarantees that promises are resolved in finite time so we
+  // need to ensure commands are flushed.
+  EnsureFlush();
  return promise;
 }


--- a/third_party/blink/renderer/modules/webgpu/gpu_device.cc
+++ b/third_party/blink/renderer/modules/webgpu/gpu_device.cc
@@ -234,11 +234,9 @@ ScriptPromise GPUDevice::popErrorScope(ScriptState* script_state) {
    return promise;
  }

-  // WebGPU guarantees callbacks complete in finite time. Flush now so that
-  // commands reach the GPU process. TODO(enga): This should happen at the end
-  // of the task.
-  GetInterface()->FlushCommands();
-
+  // WebGPU guarantees that promises are resolved in finite time so we
+  // need to ensure commands are flushed.
+  EnsureFlush();
  return promise;
 }


--- a/third_party/blink/renderer/modules/webgpu/gpu_fence.cc
+++ b/third_party/blink/renderer/modules/webgpu/gpu_fence.cc
@@ -56,11 +56,9 @@ ScriptPromise GPUFence::onCompletion(ScriptState* script_state,

  GetProcs().fenceOnCompletion(GetHandle(), value, callback->UnboundCallback(),
                               callback->AsUserdata());
-
-  // WebGPU guarantees that submitted commands finish in finite time so we
-  // flush commands to the GPU process now.
-  device_->GetInterface()->FlushCommands();
-
+  // WebGPU guarantees that promises are resolved in finite time so we
+  // need to ensure commands are flushed.
+  EnsureFlush();
  return promise;
 }


--- a/third_party/blink/renderer/modules/webgpu/gpu_queue.cc
+++ b/third_party/blink/renderer/modules/webgpu/gpu_queue.cc
@@ -146,16 +146,16 @@ void GPUQueue::submit(const HeapVector<Member<GPUCommandBuffer>>& buffers) {

  GetProcs().queueSubmit(GetHandle(), buffers.size(), commandBuffers.get());
  // WebGPU guarantees that submitted commands finish in finite time so we
-  // flush commands to the GPU process now.
-  device_->GetInterface()->FlushCommands();
+  // need to ensure commands are flushed.
+  EnsureFlush();
 }

 void GPUQueue::signal(GPUFence* fence, uint64_t signal_value) {
  GetProcs().queueSignal(GetHandle(), fence->GetHandle(), signal_value);
  // Signaling a fence adds a callback to update the fence value to the
  // completed value. WebGPU guarantees that the fence completion is
-  // observable in finite time so we flush commands to the GPU process now.
-  device_->GetInterface()->FlushCommands();
+  // observable in finite time so we need to ensure commands are flushed.
+  EnsureFlush();
 }

 GPUFence* GPUQueue::createFence(const GPUFenceDescriptor* descriptor) {