Move vector convolution from DirectConvolver to blink::VectorMath

This CL adds a new function blink::VectorMath::Conv based on code in blink::DirectConvolver, modifies DirectConvolver to use that new function and adds a unit test for that new function. This is a preparation for AVX optimized vector convolution. Bug: 778262 Change-Id: Iae970e9c1ecb359c2375305b6af8c54e853dcdc1 Reviewed-on: https://chromium-review.googlesource.com/924143 Commit-Queue: Eero Häkkinen <eero.hakkinen@intel.com> Reviewed-by: Raymond Toy <rtoy@chromium.org> Cr-Commit-Position: refs/heads/master@{#544164}

Move vector convolution from DirectConvolver to blink::VectorMath
This CL adds a new function blink::VectorMath::Conv based on code in blink::DirectConvolver, modifies DirectConvolver to use that new function and adds a unit test for that new function. This is a preparation for AVX optimized vector convolution. Bug: 778262 Change-Id: Iae970e9c1ecb359c2375305b6af8c54e853dcdc1 Reviewed-on: https://chromium-review.googlesource.com/924143 Commit-Queue: Eero Häkkinen <eero.hakkinen@intel.com> Reviewed-by: Raymond Toy <rtoy@chromium.org> Cr-Commit-Position: refs/heads/master@{#544164}
13a6b23c · Eero Häkkinen · Commit Bot · 27844bb9 · 13a6b23c · 13a6b23c
Commit 13a6b23c authored Mar 19, 2018 by Eero Häkkinen Committed by Commit Bot Mar 19, 2018
9 changed files
--- a/third_party/WebKit/Source/platform/audio/DirectConvolver.cpp
+++ b/third_party/WebKit/Source/platform/audio/DirectConvolver.cpp
--- a/third_party/WebKit/Source/platform/audio/VectorMath.cpp
+++ b/third_party/WebKit/Source/platform/audio/VectorMath.cpp
@@ -61,6 +61,24 @@ namespace Impl = Scalar;
 #endif
 }  // namespace
+void Conv(const float* source_p,
+          int source_stride,
+          const float* filter_p,
+          int filter_stride,
+          float* dest_p,
+          int dest_stride,
+          size_t frames_to_process,
+          size_t filter_size) {
+  // Only contiguous convolution is implemented by all implementations.
+  // Correlation (positive |filter_stride|) and support for non-contiguous
+  // vectors are not implemented by all implementations.
+  DCHECK_EQ(1, source_stride);
+  DCHECK_EQ(-1, filter_stride);
+  DCHECK_EQ(1, dest_stride);
+  Impl::Conv(source_p, source_stride, filter_p, filter_stride, dest_p,
+             dest_stride, frames_to_process, filter_size);
+}
 void Vadd(const float* source1p,
          int source_stride1,
          const float* source2p,

--- a/third_party/WebKit/Source/platform/audio/VectorMath.h
+++ b/third_party/WebKit/Source/platform/audio/VectorMath.h
@@ -27,6 +27,7 @@
 #define VectorMath_h
 #include <cstddef>
 #include "platform/PlatformExport.h"
 // Defines the interface for several vector math functions whose implementation
@@ -35,6 +36,19 @@
 namespace blink {
 namespace VectorMath {
+// Direct vector convolution:
+//
+// dest[k*dest_stride] =
+//     sum(source[(k+m)*source_stride]*filter[m*filter_stride]) for all m
+PLATFORM_EXPORT void Conv(const float* source_p,
+                          int source_stride,
+                          const float* filter_p,
+                          int filter_stride,
+                          float* dest_p,
+                          int dest_stride,
+                          size_t frames_to_process,
+                          size_t filter_size);
 // Vector scalar multiply and then add.
 //
 // dest[k*dest_stride] += scale * source[k*source_stride]

--- a/third_party/WebKit/Source/platform/audio/VectorMathScalar.h
+++ b/third_party/WebKit/Source/platform/audio/VectorMathScalar.h
@@ -8,12 +8,308 @@
 #include <algorithm>
 #include <cmath>
+#include "platform/wtf/Assertions.h"
 #include "platform/wtf/MathExtras.h"
 namespace blink {
 namespace VectorMath {
 namespace Scalar {
+static ALWAYS_INLINE void Conv(const float* source_p,
+                               int source_stride,
+                               const float* filter_p,
+                               int filter_stride,
+                               float* dest_p,
+                               int dest_stride,
+                               size_t frames_to_process,
+                               size_t filter_size) {
+  // Only contiguous convolution is implemented. Correlation (positive
+  // |filter_stride|) and support for non-contiguous vectors are not
+  // implemented.
+  DCHECK_EQ(1, source_stride);
+  DCHECK_EQ(-1, filter_stride);
+  DCHECK_EQ(1, dest_stride);
+  size_t kernel_size = filter_size;
+  const float* input_p = source_p + kernel_size - 1;
+  const float* kernel_p = filter_p + 1 - kernel_size;
+  size_t i = 0;
+// FIXME: The macro can be further optimized to avoid pipeline stalls. One
+// possibility is to maintain 4 separate sums and change the macro to
+// CONVOLVE_FOUR_SAMPLES.
+#define CONVOLVE_ONE_SAMPLE              \
+  do {                                   \
+    sum += input_p[i - j] * kernel_p[j]; \
+    j++;                                 \
+  } while (0)
+  while (i < frames_to_process) {
+    size_t j = 0;
+    float sum = 0;
+    if (kernel_size == 32) {
+      CONVOLVE_ONE_SAMPLE;  // 1
+      CONVOLVE_ONE_SAMPLE;  // 2
+      CONVOLVE_ONE_SAMPLE;  // 3
+      CONVOLVE_ONE_SAMPLE;  // 4
+      CONVOLVE_ONE_SAMPLE;  // 5
+      CONVOLVE_ONE_SAMPLE;  // 6
+      CONVOLVE_ONE_SAMPLE;  // 7
+      CONVOLVE_ONE_SAMPLE;  // 8
+      CONVOLVE_ONE_SAMPLE;  // 9
+      CONVOLVE_ONE_SAMPLE;  // 10
+      CONVOLVE_ONE_SAMPLE;  // 11
+      CONVOLVE_ONE_SAMPLE;  // 12
+      CONVOLVE_ONE_SAMPLE;  // 13
+      CONVOLVE_ONE_SAMPLE;  // 14
+      CONVOLVE_ONE_SAMPLE;  // 15
+      CONVOLVE_ONE_SAMPLE;  // 16
+      CONVOLVE_ONE_SAMPLE;  // 17
+      CONVOLVE_ONE_SAMPLE;  // 18
+      CONVOLVE_ONE_SAMPLE;  // 19
+      CONVOLVE_ONE_SAMPLE;  // 20
+      CONVOLVE_ONE_SAMPLE;  // 21
+      CONVOLVE_ONE_SAMPLE;  // 22
+      CONVOLVE_ONE_SAMPLE;  // 23
+      CONVOLVE_ONE_SAMPLE;  // 24
+      CONVOLVE_ONE_SAMPLE;  // 25
+      CONVOLVE_ONE_SAMPLE;  // 26
+      CONVOLVE_ONE_SAMPLE;  // 27
+      CONVOLVE_ONE_SAMPLE;  // 28
+      CONVOLVE_ONE_SAMPLE;  // 29
+      CONVOLVE_ONE_SAMPLE;  // 30
+      CONVOLVE_ONE_SAMPLE;  // 31
+      CONVOLVE_ONE_SAMPLE;  // 32
+    } else if (kernel_size == 64) {
+      CONVOLVE_ONE_SAMPLE;  // 1
+      CONVOLVE_ONE_SAMPLE;  // 2
+      CONVOLVE_ONE_SAMPLE;  // 3
+      CONVOLVE_ONE_SAMPLE;  // 4
+      CONVOLVE_ONE_SAMPLE;  // 5
+      CONVOLVE_ONE_SAMPLE;  // 6
+      CONVOLVE_ONE_SAMPLE;  // 7
+      CONVOLVE_ONE_SAMPLE;  // 8
+      CONVOLVE_ONE_SAMPLE;  // 9
+      CONVOLVE_ONE_SAMPLE;  // 10
+      CONVOLVE_ONE_SAMPLE;  // 11
+      CONVOLVE_ONE_SAMPLE;  // 12
+      CONVOLVE_ONE_SAMPLE;  // 13
+      CONVOLVE_ONE_SAMPLE;  // 14
+      CONVOLVE_ONE_SAMPLE;  // 15
+      CONVOLVE_ONE_SAMPLE;  // 16
+      CONVOLVE_ONE_SAMPLE;  // 17
+      CONVOLVE_ONE_SAMPLE;  // 18
+      CONVOLVE_ONE_SAMPLE;  // 19
+      CONVOLVE_ONE_SAMPLE;  // 20
+      CONVOLVE_ONE_SAMPLE;  // 21
+      CONVOLVE_ONE_SAMPLE;  // 22
+      CONVOLVE_ONE_SAMPLE;  // 23
+      CONVOLVE_ONE_SAMPLE;  // 24
+      CONVOLVE_ONE_SAMPLE;  // 25
+      CONVOLVE_ONE_SAMPLE;  // 26
+      CONVOLVE_ONE_SAMPLE;  // 27
+      CONVOLVE_ONE_SAMPLE;  // 28
+      CONVOLVE_ONE_SAMPLE;  // 29
+      CONVOLVE_ONE_SAMPLE;  // 30
+      CONVOLVE_ONE_SAMPLE;  // 31
+      CONVOLVE_ONE_SAMPLE;  // 32
+      CONVOLVE_ONE_SAMPLE;  // 33
+      CONVOLVE_ONE_SAMPLE;  // 34
+      CONVOLVE_ONE_SAMPLE;  // 35
+      CONVOLVE_ONE_SAMPLE;  // 36
+      CONVOLVE_ONE_SAMPLE;  // 37
+      CONVOLVE_ONE_SAMPLE;  // 38
+      CONVOLVE_ONE_SAMPLE;  // 39
+      CONVOLVE_ONE_SAMPLE;  // 40
+      CONVOLVE_ONE_SAMPLE;  // 41
+      CONVOLVE_ONE_SAMPLE;  // 42
+      CONVOLVE_ONE_SAMPLE;  // 43
+      CONVOLVE_ONE_SAMPLE;  // 44
+      CONVOLVE_ONE_SAMPLE;  // 45
+      CONVOLVE_ONE_SAMPLE;  // 46
+      CONVOLVE_ONE_SAMPLE;  // 47
+      CONVOLVE_ONE_SAMPLE;  // 48
+      CONVOLVE_ONE_SAMPLE;  // 49
+      CONVOLVE_ONE_SAMPLE;  // 50
+      CONVOLVE_ONE_SAMPLE;  // 51
+      CONVOLVE_ONE_SAMPLE;  // 52
+      CONVOLVE_ONE_SAMPLE;  // 53
+      CONVOLVE_ONE_SAMPLE;  // 54
+      CONVOLVE_ONE_SAMPLE;  // 55
+      CONVOLVE_ONE_SAMPLE;  // 56
+      CONVOLVE_ONE_SAMPLE;  // 57
+      CONVOLVE_ONE_SAMPLE;  // 58
+      CONVOLVE_ONE_SAMPLE;  // 59
+      CONVOLVE_ONE_SAMPLE;  // 60
+      CONVOLVE_ONE_SAMPLE;  // 61
+      CONVOLVE_ONE_SAMPLE;  // 62
+      CONVOLVE_ONE_SAMPLE;  // 63
+      CONVOLVE_ONE_SAMPLE;  // 64
+    } else if (kernel_size == 128) {
+      CONVOLVE_ONE_SAMPLE;  // 1
+      CONVOLVE_ONE_SAMPLE;  // 2
+      CONVOLVE_ONE_SAMPLE;  // 3
+      CONVOLVE_ONE_SAMPLE;  // 4
+      CONVOLVE_ONE_SAMPLE;  // 5
+      CONVOLVE_ONE_SAMPLE;  // 6
+      CONVOLVE_ONE_SAMPLE;  // 7
+      CONVOLVE_ONE_SAMPLE;  // 8
+      CONVOLVE_ONE_SAMPLE;  // 9
+      CONVOLVE_ONE_SAMPLE;  // 10
+      CONVOLVE_ONE_SAMPLE;  // 11
+      CONVOLVE_ONE_SAMPLE;  // 12
+      CONVOLVE_ONE_SAMPLE;  // 13
+      CONVOLVE_ONE_SAMPLE;  // 14
+      CONVOLVE_ONE_SAMPLE;  // 15
+      CONVOLVE_ONE_SAMPLE;  // 16
+      CONVOLVE_ONE_SAMPLE;  // 17
+      CONVOLVE_ONE_SAMPLE;  // 18
+      CONVOLVE_ONE_SAMPLE;  // 19
+      CONVOLVE_ONE_SAMPLE;  // 20
+      CONVOLVE_ONE_SAMPLE;  // 21
+      CONVOLVE_ONE_SAMPLE;  // 22
+      CONVOLVE_ONE_SAMPLE;  // 23
+      CONVOLVE_ONE_SAMPLE;  // 24
+      CONVOLVE_ONE_SAMPLE;  // 25
+      CONVOLVE_ONE_SAMPLE;  // 26
+      CONVOLVE_ONE_SAMPLE;  // 27
+      CONVOLVE_ONE_SAMPLE;  // 28
+      CONVOLVE_ONE_SAMPLE;  // 29
+      CONVOLVE_ONE_SAMPLE;  // 30
+      CONVOLVE_ONE_SAMPLE;  // 31
+      CONVOLVE_ONE_SAMPLE;  // 32
+      CONVOLVE_ONE_SAMPLE;  // 33
+      CONVOLVE_ONE_SAMPLE;  // 34
+      CONVOLVE_ONE_SAMPLE;  // 35
+      CONVOLVE_ONE_SAMPLE;  // 36
+      CONVOLVE_ONE_SAMPLE;  // 37
+      CONVOLVE_ONE_SAMPLE;  // 38
+      CONVOLVE_ONE_SAMPLE;  // 39
+      CONVOLVE_ONE_SAMPLE;  // 40
+      CONVOLVE_ONE_SAMPLE;  // 41
+      CONVOLVE_ONE_SAMPLE;  // 42
+      CONVOLVE_ONE_SAMPLE;  // 43
+      CONVOLVE_ONE_SAMPLE;  // 44
+      CONVOLVE_ONE_SAMPLE;  // 45
+      CONVOLVE_ONE_SAMPLE;  // 46
+      CONVOLVE_ONE_SAMPLE;  // 47
+      CONVOLVE_ONE_SAMPLE;  // 48
+      CONVOLVE_ONE_SAMPLE;  // 49
+      CONVOLVE_ONE_SAMPLE;  // 50
+      CONVOLVE_ONE_SAMPLE;  // 51
+      CONVOLVE_ONE_SAMPLE;  // 52
+      CONVOLVE_ONE_SAMPLE;  // 53
+      CONVOLVE_ONE_SAMPLE;  // 54
+      CONVOLVE_ONE_SAMPLE;  // 55
+      CONVOLVE_ONE_SAMPLE;  // 56
+      CONVOLVE_ONE_SAMPLE;  // 57
+      CONVOLVE_ONE_SAMPLE;  // 58
+      CONVOLVE_ONE_SAMPLE;  // 59
+      CONVOLVE_ONE_SAMPLE;  // 60
+      CONVOLVE_ONE_SAMPLE;  // 61
+      CONVOLVE_ONE_SAMPLE;  // 62
+      CONVOLVE_ONE_SAMPLE;  // 63
+      CONVOLVE_ONE_SAMPLE;  // 64
+      CONVOLVE_ONE_SAMPLE;  // 65
+      CONVOLVE_ONE_SAMPLE;  // 66
+      CONVOLVE_ONE_SAMPLE;  // 67
+      CONVOLVE_ONE_SAMPLE;  // 68
+      CONVOLVE_ONE_SAMPLE;  // 69
+      CONVOLVE_ONE_SAMPLE;  // 70
+      CONVOLVE_ONE_SAMPLE;  // 71
+      CONVOLVE_ONE_SAMPLE;  // 72
+      CONVOLVE_ONE_SAMPLE;  // 73
+      CONVOLVE_ONE_SAMPLE;  // 74
+      CONVOLVE_ONE_SAMPLE;  // 75
+      CONVOLVE_ONE_SAMPLE;  // 76
+      CONVOLVE_ONE_SAMPLE;  // 77
+      CONVOLVE_ONE_SAMPLE;  // 78
+      CONVOLVE_ONE_SAMPLE;  // 79
+      CONVOLVE_ONE_SAMPLE;  // 80
+      CONVOLVE_ONE_SAMPLE;  // 81
+      CONVOLVE_ONE_SAMPLE;  // 82
+      CONVOLVE_ONE_SAMPLE;  // 83
+      CONVOLVE_ONE_SAMPLE;  // 84
+      CONVOLVE_ONE_SAMPLE;  // 85
+      CONVOLVE_ONE_SAMPLE;  // 86
+      CONVOLVE_ONE_SAMPLE;  // 87
+      CONVOLVE_ONE_SAMPLE;  // 88
+      CONVOLVE_ONE_SAMPLE;  // 89
+      CONVOLVE_ONE_SAMPLE;  // 90
+      CONVOLVE_ONE_SAMPLE;  // 91
+      CONVOLVE_ONE_SAMPLE;  // 92
+      CONVOLVE_ONE_SAMPLE;  // 93
+      CONVOLVE_ONE_SAMPLE;  // 94
+      CONVOLVE_ONE_SAMPLE;  // 95
+      CONVOLVE_ONE_SAMPLE;  // 96
+      CONVOLVE_ONE_SAMPLE;  // 97
+      CONVOLVE_ONE_SAMPLE;  // 98
+      CONVOLVE_ONE_SAMPLE;  // 99
+      CONVOLVE_ONE_SAMPLE;  // 100
+      CONVOLVE_ONE_SAMPLE;  // 101
+      CONVOLVE_ONE_SAMPLE;  // 102
+      CONVOLVE_ONE_SAMPLE;  // 103
+      CONVOLVE_ONE_SAMPLE;  // 104
+      CONVOLVE_ONE_SAMPLE;  // 105
+      CONVOLVE_ONE_SAMPLE;  // 106
+      CONVOLVE_ONE_SAMPLE;  // 107
+      CONVOLVE_ONE_SAMPLE;  // 108
+      CONVOLVE_ONE_SAMPLE;  // 109
+      CONVOLVE_ONE_SAMPLE;  // 110
+      CONVOLVE_ONE_SAMPLE;  // 111
+      CONVOLVE_ONE_SAMPLE;  // 112
+      CONVOLVE_ONE_SAMPLE;  // 113
+      CONVOLVE_ONE_SAMPLE;  // 114
+      CONVOLVE_ONE_SAMPLE;  // 115
+      CONVOLVE_ONE_SAMPLE;  // 116
+      CONVOLVE_ONE_SAMPLE;  // 117
+      CONVOLVE_ONE_SAMPLE;  // 118
+      CONVOLVE_ONE_SAMPLE;  // 119
+      CONVOLVE_ONE_SAMPLE;  // 120
+      CONVOLVE_ONE_SAMPLE;  // 121
+      CONVOLVE_ONE_SAMPLE;  // 122
+      CONVOLVE_ONE_SAMPLE;  // 123
+      CONVOLVE_ONE_SAMPLE;  // 124
+      CONVOLVE_ONE_SAMPLE;  // 125
+      CONVOLVE_ONE_SAMPLE;  // 126
+      CONVOLVE_ONE_SAMPLE;  // 127
+      CONVOLVE_ONE_SAMPLE;  // 128
+    } else {
+      while (j < kernel_size) {
+        // Non-optimized using actual while loop.
+        CONVOLVE_ONE_SAMPLE;
+      }
+    }
+    dest_p[i++] = sum;
+  }
+#undef CONVOLVE_ONE_SAMPLE
+}
 static ALWAYS_INLINE void Vadd(const float* source1p,
                               int source_stride1,
                               const float* source2p,

--- a/third_party/WebKit/Source/platform/audio/VectorMathTest.cpp
+++ b/third_party/WebKit/Source/platform/audio/VectorMathTest.cpp
@@ -32,7 +32,7 @@ constexpr size_t kMaxByteAlignment = kMaxBitAlignment / 8u;
 constexpr size_t kMaxStride = 2u;
 constexpr MemoryLayout kMemoryLayouts[] = {
-    {kMaxByteAlignment / 2u - kMaxByteAlignment / 4u, 1u},
+    {kMaxByteAlignment / 4u, 1u},
    {kMaxByteAlignment / 2u, 1u},
    {kMaxByteAlignment / 2u + kMaxByteAlignment / 4u, 1u},
    {kMaxByteAlignment, 1u},
@@ -75,7 +75,7 @@ class TestVector {
    // These types are used by std::iterator_traits used by std::equal used by
    // TestVector::operator==.
    using difference_type = ptrdiff_t;
-    using iterator_category = std::input_iterator_tag;
+    using iterator_category = std::bidirectional_iterator_tag;
    using pointer = T*;
    using reference = T&;
    using value_type = T;
@@ -91,6 +91,15 @@ class TestVector {
      ++(*this);
      return iter;
    }
+    Iterator& operator--() {
+      p_ -= stride_;
+      return *this;
+    }
+    Iterator operator--(int) {
+      Iterator iter = *this;
+      --(*this);
+      return iter;
+    }
    bool operator==(const Iterator& other) const { return p_ == other.p_; }
    bool operator!=(const Iterator& other) const { return !(*this == other); }
    T& operator*() const { return *p_; }
@@ -101,6 +110,8 @@ class TestVector {
  };
 public:
+  using ReverseIterator = std::reverse_iterator<Iterator>;
  // These types are used internally by Google Test.
  using const_iterator = Iterator;
  using iterator = Iterator;
@@ -117,6 +128,8 @@ class TestVector {
  Iterator begin() const { return Iterator(p_, stride()); }
  Iterator end() const { return Iterator(p_ + size() * stride(), stride()); }
+  ReverseIterator rbegin() const { return ReverseIterator(end()); }
+  ReverseIterator rend() const { return ReverseIterator(begin()); }
  const MemoryLayout* memory_layout() const { return memory_layout_; }
  T* p() const { return p_; }
  size_t size() const { return size_; }
@@ -175,17 +188,27 @@ GetPrimaryVectors(const T* base) {
 template <typename T>
 std::array<TestVector<T>, 2u> GetSecondaryVectors(
    T* base,
-    const TestVector<const float>& primary_vector) {
+    const MemoryLayout* primary_memory_layout,
+    size_t size) {
  std::array<TestVector<T>, 2u> vectors;
-  const MemoryLayout* primary_memory_layout = primary_vector.memory_layout();
  const MemoryLayout* other_memory_layout =
      &kMemoryLayouts[primary_memory_layout == &kMemoryLayouts[0]];
  CHECK_NE(primary_memory_layout, other_memory_layout);
-  vectors[0] = TestVector<T>(base, primary_vector);
+  CHECK_NE(primary_memory_layout->byte_alignment,
-  vectors[1] = TestVector<T>(base, other_memory_layout, primary_vector.size());
+           other_memory_layout->byte_alignment);
+  vectors[0] = TestVector<T>(base, primary_memory_layout, size);
+  vectors[1] = TestVector<T>(base, other_memory_layout, size);
  return vectors;
 }
+template <typename T>
+std::array<TestVector<T>, 2u> GetSecondaryVectors(
+    T* base,
+    const TestVector<const float>& primary_vector) {
+  return GetSecondaryVectors(base, primary_vector.memory_layout(),
+                             primary_vector.size());
+}
 class VectorMathTest : public ::testing::Test {
 protected:
  enum {
@@ -194,8 +217,9 @@ class VectorMathTest : public ::testing::Test {
        (kMaxStride * kMaxVectorSizeInBytes + kMaxByteAlignment - 1u) /
        sizeof(float),
    kFullyFiniteSource = 4u,
-    kFullyNonNanSource = 5u,
+    kFullyFiniteSource2 = 5u,
-    kSourceCount = 6u
+    kFullyNonNanSource = 6u,
+    kSourceCount = 7u
  };
  // Get a destination buffer containing initially uninitialized floats.
@@ -219,7 +243,7 @@ class VectorMathTest : public ::testing::Test {
    std::uniform_int_distribution<size_t> index_distribution(
        0u, kFloatArraySize / 2u - 1u);
    for (size_t i = 0u; i < kSourceCount; ++i) {
-      if (i == kFullyFiniteSource)
+      if (i == kFullyFiniteSource || i == kFullyFiniteSource2)
        continue;
      sources_[i][index_distribution(generator)] = INFINITY;
      sources_[i][index_distribution(generator)] = -INFINITY;
@@ -236,6 +260,46 @@ class VectorMathTest : public ::testing::Test {
 float VectorMathTest::destinations_[kDestinationCount][kFloatArraySize];
 float VectorMathTest::sources_[kSourceCount][kFloatArraySize];
+TEST_F(VectorMathTest, Conv) {
+  for (const auto& source : GetPrimaryVectors(GetSource(kFullyFiniteSource))) {
+    if (source.stride() != 1)
+      continue;
+    for (size_t filter_size : {3u, 20u, 32u, 64u, 128u}) {
+      // The maximum number of frames which could be processed here is
+      // |source.size() - filter_size + 1|. However, in order to test
+      // optimization paths, |frames_to_process| should be optimal (divisible
+      // by a power of 2) whenever |filter_size| is optimal. Therefore, let's
+      // process only |source.size() - filter_size| frames here.
+      if (filter_size >= source.size())
+        break;
+      size_t frames_to_process = source.size() - filter_size;
+      // The stride of a convolution filter must be -1. Let's first create
+      // a reversed filter whose stride is 1.
+      TestVector<const float> reversed_filter(
+          GetSource(kFullyFiniteSource2), source.memory_layout(), filter_size);
+      // The filter begins from the reverse beginning of the reversed filter
+      // and grows downwards.
+      const float* filter_p = &*reversed_filter.rbegin();
+      TestVector<float> expected_dest(
+          GetDestination(0u), source.memory_layout(), frames_to_process);
+      for (size_t i = 0u; i < frames_to_process; ++i) {
+        expected_dest[i] = 0u;
+        for (size_t j = 0u; j < filter_size; ++j)
+          expected_dest[i] += source[i + j] * *(filter_p - j);
+      }
+      for (auto& dest : GetSecondaryVectors(
+               GetDestination(1u), source.memory_layout(), frames_to_process)) {
+        Conv(source.p(), 1, filter_p, -1, dest.p(), 1, frames_to_process,
+             filter_size);
+        for (size_t i = 0u; i < frames_to_process; ++i) {
+          EXPECT_NEAR(expected_dest[i], dest[i],
+                      1e-3 * std::abs(expected_dest[i]));
+        }
+      }
+    }
+  }
+}
 TEST_F(VectorMathTest, Vadd) {
  for (const auto& source1 : GetPrimaryVectors(GetSource(0u))) {
    for (const auto& source2 : GetSecondaryVectors(GetSource(1u), source1)) {

--- a/third_party/WebKit/Source/platform/audio/cpu/arm/VectorMathNEON.h
+++ b/third_party/WebKit/Source/platform/audio/cpu/arm/VectorMathNEON.h
@@ -15,6 +15,9 @@ namespace blink {
 namespace VectorMath {
 namespace NEON {
+// TODO: Consider optimizing this.
+using Scalar::Conv;
 static ALWAYS_INLINE void Vadd(const float* source1p,
                               int source_stride1,
                               const float* source2p,

--- a/third_party/WebKit/Source/platform/audio/cpu/mips/VectorMathMSA.h
+++ b/third_party/WebKit/Source/platform/audio/cpu/mips/VectorMathMSA.h
@@ -14,6 +14,11 @@ namespace blink {
 namespace VectorMath {
 namespace MSA {
+// TODO: Consider optimizing these.
+using Scalar::Conv;
+using Scalar::Vsvesq;
+using Scalar::Zvmul;
 static ALWAYS_INLINE void Vadd(const float* source1p,
                               int source_stride1,
                               const float* source2p,
@@ -207,24 +212,6 @@ static ALWAYS_INLINE void Vsmul(const float* source_p,
  Scalar::Vsmul(source_p, source_stride, scale, dest_p, dest_stride, n);
 }
-static ALWAYS_INLINE void Vsvesq(const float* source_p,
-                                 int source_stride,
-                                 float* sum_p,
-                                 size_t frames_to_process) {
-  Scalar::Vsvesq(source_p, source_stride, sum_p, frames_to_process);
-}
-static ALWAYS_INLINE void Zvmul(const float* real1p,
-                                const float* imag1p,
-                                const float* real2p,
-                                const float* imag2p,
-                                float* real_dest_p,
-                                float* imag_dest_p,
-                                size_t frames_to_process) {
-  Scalar::Zvmul(real1p, imag1p, real2p, imag2p, real_dest_p, imag_dest_p,
-                frames_to_process);
-}
 }  // namespace MSA
 }  // namespace VectorMath
 }  // namespace blink

--- a/third_party/WebKit/Source/platform/audio/cpu/x86/VectorMathX86.h
+++ b/third_party/WebKit/Source/platform/audio/cpu/x86/VectorMathX86.h
@@ -6,11 +6,14 @@
 #define VectorMathX86_h
 #include "base/cpu.h"
+#include "platform/audio/AudioArray.h"
 #include "platform/audio/VectorMathScalar.h"
 #include "platform/audio/cpu/x86/VectorMathAVX.h"
 #include "platform/audio/cpu/x86/VectorMathSSE.h"
 #include "platform/wtf/Assertions.h"
+#include <xmmintrin.h>
 namespace blink {
 namespace VectorMath {
 namespace X86 {
@@ -91,6 +94,70 @@ SplitFramesToProcess(const float* source_p, size_t frames_to_process) {
  return counts;
 }
+static ALWAYS_INLINE void Conv(const float* source_p,
+                               int source_stride,
+                               const float* filter_p,
+                               int filter_stride,
+                               float* dest_p,
+                               int dest_stride,
+                               size_t frames_to_process,
+                               size_t filter_size) {
+  // Only contiguous convolution is implemented. Correlation (positive
+  // |filter_stride|) and support for non-contiguous vectors are not
+  // implemented.
+  DCHECK_EQ(1, source_stride);
+  DCHECK_EQ(-1, filter_stride);
+  DCHECK_EQ(1, dest_stride);
+  size_t kernel_size = filter_size;
+  const float* input_p = source_p + kernel_size - 1;
+  const float* kernel_p = filter_p + 1 - kernel_size;
+  size_t i = 0;
+  // Convolution using SSE2. Currently only do this if both |kernel_size| and
+  // |frames_to_process| are multiples of 4. If not, use Scalar::Conv.
+  if ((kernel_size % 4 == 0) && (frames_to_process % 4 == 0)) {
+    // AudioFloatArray's are always aligned on at least a 32-byte boundary.
+    AudioFloatArray kernel_buffer(4 * kernel_size);
+    __m128* kernel_reversed = reinterpret_cast<__m128*>(kernel_buffer.Data());
+    // Reverse the kernel and repeat each value across a vector
+    for (i = 0; i < kernel_size; ++i) {
+      kernel_reversed[i] = _mm_set1_ps(kernel_p[kernel_size - i - 1]);
+    }
+    const float* input_start_p = input_p - kernel_size + 1;
+    // Do convolution with 4 inputs at a time.
+    for (i = 0; i < frames_to_process; i += 4) {
+      __m128 convolution_sum;
+      convolution_sum = _mm_setzero_ps();
+      // |kernel_size| is a multiple of 4 so we can unroll the loop by 4,
+      // manually.
+      for (size_t k = 0; k < kernel_size; k += 4) {
+        size_t data_offset = i + k;
+        for (size_t m = 0; m < 4; ++m) {
+          __m128 source_block;
+          __m128 product;
+          source_block = _mm_loadu_ps(input_start_p + data_offset + m);
+          product = _mm_mul_ps(kernel_reversed[k + m], source_block);
+          convolution_sum = _mm_add_ps(convolution_sum, product);
+        }
+      }
+      _mm_storeu_ps(dest_p + i, convolution_sum);
+    }
+  } else {
+    Scalar::Conv(source_p, source_stride, filter_p, filter_stride, dest_p,
+                 dest_stride, frames_to_process, filter_size);
+  }
+}
 static ALWAYS_INLINE void Vadd(const float* source1p,
                               int source_stride1,
                               const float* source2p,

--- a/third_party/WebKit/Source/platform/audio/mac/VectorMathMac.h
+++ b/third_party/WebKit/Source/platform/audio/mac/VectorMathMac.h
@@ -19,6 +19,23 @@ namespace Mac {
 // our namespaced function names, so we must handle this case differently. Other
 // architectures (64bit, ARM, etc.) do not include this header file.
+static ALWAYS_INLINE void Conv(const float* source_p,
+                               int source_stride,
+                               const float* filter_p,
+                               int filter_stride,
+                               float* dest_p,
+                               int dest_stride,
+                               size_t frames_to_process,
+                               size_t filter_size) {
+#if defined(ARCH_CPU_X86)
+  ::conv(source_p, source_stride, filter_p, filter_stride, dest_p, dest_stride,
+         frames_to_process, filter_size);
+#else
+  vDSP_conv(source_p, source_stride, filter_p, filter_stride, dest_p,
+            dest_stride, frames_to_process, filter_size);
+#endif
+}
 static ALWAYS_INLINE void Vadd(const float* source1p,
                               int source_stride1,
                               const float* source2p,