Commit 85d355e5 authored by Dale Curtis's avatar Dale Curtis Committed by Commit Bot

Enable SSE, SSE2, and run-time detected SSE4.1 for libopus.

Surprisingly this hadn't been done. So we are missing SSE, SSE2
optimizations which should be available on all platforms, and a
couple SSE4.1 optimizations which are runtime selected.

This condenses all include_dirs, cflags, and defines into a common
opus_private_config for sharing with intrinsic targets.

It looks like AVX support has been started in libopus, so the next
update that pulls in some of that code will need to add a opus_avx
target tagged with a -mavx cflag.

This doesn't seem to add any decoding speedup per media_perftests,
but much of this code is for encoding, so WebRTC and MediaRecorder
should benefit.

BUG=none
TEST=unittests all pass and bots are happy

Change-Id: I0a12460af899c4d94c16e4024c043ff25e896463
Reviewed-on: https://chromium-review.googlesource.com/c/1318807Reviewed-by: default avatarSergey Ulanov <sergeyu@chromium.org>
Commit-Queue: Dale Curtis <dalecurtis@chromium.org>
Cr-Commit-Position: refs/heads/master@{#606131}
parent a8b6b277
......@@ -13,10 +13,15 @@ use_opus_fixed_point = current_cpu == "arm" || current_cpu == "arm64"
use_opus_arm_optimization =
current_cpu == "arm" || (current_cpu == "arm64" && is_ios)
# NaCl, unlike Chrome, doesn't target SSE2 minimum, so skip optimizations for
# the sake of simplicity.
use_opus_x86_optimization =
!is_nacl && (current_cpu == "x86" || current_cpu == "x64")
# If OPUS Run Time CPU Detections (RTCD) shall be used.
# Based on the conditions in celt/arm/armcpu.c:
# defined(_MSC_VER) || defined(__linux__).
use_opus_rtcd = current_cpu == "arm" && (is_win || is_android || is_linux)
use_opus_arm_rtcd = current_cpu == "arm" && (is_win || is_android || is_linux)
config("opus_config") {
include_dirs = [ "src/include" ]
......@@ -26,6 +31,105 @@ config("opus_config") {
}
}
config("opus_private_config") {
defines = [
"OPUS_BUILD",
"OPUS_EXPORT=",
]
include_dirs = [
"src",
"src/celt",
"src/silk",
]
cflags = []
if (is_win) {
defines += [
"USE_ALLOCA",
"inline=__inline",
]
cflags += [
"/wd4305", # Disable truncation warning in celt/pitch.c .
"/wd4334", # Disable 32-bit shift warning in src/opus_encoder.c .
]
} else {
defines += [
"HAVE_LRINT",
"HAVE_LRINTF",
"VAR_ARRAYS",
]
}
if (is_debug) {
# Turn off a warning in opus_decoder.c when compiling without optimization.
defines += [ "OPUS_WILL_BE_SLOW" ]
}
if (use_opus_x86_optimization) {
defines += [
# Run Time CPU Detections (RTCD) is always enabled for x86.
"OPUS_HAVE_RTCD",
"CPU_INFO_BY_ASM",
# Chrome always targets SSE2+.
"OPUS_X86_MAY_HAVE_SSE",
"OPUS_X86_MAY_HAVE_SSE2",
"OPUS_X86_PRESUME_SSE",
"OPUS_X86_PRESUME_SSE2",
# Some systems may have SSE4.1+ support.
"OPUS_X86_MAY_HAVE_SSE4_1",
# At present libopus has no AVX functions so no sources are add for this,
# if you see linker errors on AVX code the this flag is why.
"OPUS_X86_MAY_HAVE_AVX",
]
}
if (use_opus_fixed_point) {
defines += [ "FIXED_POINT" ]
include_dirs += [ "src/silk/fixed" ]
} else {
include_dirs += [ "src/silk/float" ]
}
if (use_opus_arm_optimization) {
if (current_cpu == "arm") {
defines += [
"OPUS_ARM_ASM",
"OPUS_ARM_INLINE_ASM",
"OPUS_ARM_INLINE_EDSP",
]
}
if (use_opus_arm_rtcd) {
defines += [
"OPUS_ARM_MAY_HAVE_EDSP",
"OPUS_ARM_MAY_HAVE_MEDIA",
"OPUS_HAVE_RTCD",
]
}
if (arm_use_neon) {
defines += [
"OPUS_ARM_MAY_HAVE_NEON",
"OPUS_ARM_MAY_HAVE_NEON_INTR",
]
}
if (is_ios && current_cpu == "arm64") {
# Runtime detection of CPU features not available on iOS.
defines += [
"OPUS_ARM_PRESUME_NEON_INTR",
"OPUS_ARM_PRESUME_AARCH64_NEON_INTR",
]
}
}
}
config("opus_test_config") {
include_dirs = [
"src/celt",
......@@ -43,7 +147,7 @@ config("opus_test_config") {
}
}
if (use_opus_rtcd) {
if (use_opus_arm_rtcd) {
action("convert_rtcd_assembler") {
script = "convert_rtcd_assembler.py"
outputs = [
......@@ -58,6 +162,30 @@ if (use_opus_rtcd) {
}
}
if (use_opus_x86_optimization) {
source_set("opus_sse41") {
sources = [
"src/celt/x86/pitch_sse4_1.c",
"src/silk/x86/NSQ_del_dec_sse.c",
"src/silk/x86/NSQ_sse.c",
]
configs -= [ "//build/config/compiler:chromium_code" ]
configs += [ "//build/config/compiler:no_chromium_code" ]
configs += [
":opus_private_config",
":opus_config",
]
if (!is_win || is_clang) {
cflags = [ "-msse4.1" ]
}
}
# TODO(dalecurtis): If libopus ever adds AVX support, add an opus_avx block.
}
# Note: Do not add any defines or include_dirs to this target, those should all
# go in the opus_private_config so they can be shared with intrinsic targets.
static_library("opus") {
sources = [
"src/celt/_kiss_fft_guts.h",
......@@ -224,44 +352,12 @@ static_library("opus") {
"src/src/tansig_table.h",
]
defines = [
"OPUS_BUILD",
"OPUS_EXPORT=",
]
include_dirs = [
"src",
"src/celt",
"src/silk",
]
configs -= [ "//build/config/compiler:chromium_code" ]
configs += [ "//build/config/compiler:no_chromium_code" ]
configs += [
"//build/config/compiler:no_chromium_code",
":opus_private_config",
]
public_configs = [ ":opus_config" ]
cflags = []
if (is_win) {
defines += [
"USE_ALLOCA",
"inline=__inline",
]
cflags += [
"/wd4305", # Disable truncation warning in celt/pitch.c .
"/wd4334", # Disable 32-bit shift warning in src/opus_encoder.c .
]
} else {
defines += [
"HAVE_LRINT",
"HAVE_LRINTF",
"VAR_ARRAYS",
]
}
if (is_debug) {
# Turn off a warning in opus_decoder.c when compiling without optimization.
defines += [ "OPUS_WILL_BE_SLOW" ]
}
if (!is_debug && (current_cpu == "arm" || current_cpu == "arm64")) {
configs -= [ "//build/config/compiler:default_optimization" ]
......@@ -271,6 +367,27 @@ static_library("opus") {
configs += [ "//build/config/compiler:optimize_speed" ]
}
deps = []
if (use_opus_x86_optimization) {
sources += [
"src/celt/x86/celt_lpc_sse.h",
"src/celt/x86/pitch_sse.c",
"src/celt/x86/pitch_sse.h",
"src/celt/x86/pitch_sse2.c",
"src/celt/x86/vq_sse.h",
"src/celt/x86/vq_sse2.c",
"src/celt/x86/x86_celt_map.c",
"src/celt/x86/x86cpu.c",
"src/celt/x86/x86cpu.h",
"src/silk/x86/SigProc_FIX_sse.h",
"src/silk/x86/VAD_sse.c",
"src/silk/x86/main_sse.h",
"src/silk/x86/x86_silk_map.c",
]
deps += [ ":opus_sse41" ]
}
if (use_opus_fixed_point) {
sources += [
"src/silk/fixed/LTP_analysis_filter_FIX.c",
......@@ -299,10 +416,6 @@ static_library("opus") {
"src/silk/fixed/vector_ops_FIX.c",
"src/silk/fixed/warped_autocorrelation_FIX.c",
]
defines += [ "FIXED_POINT" ]
include_dirs += [ "src/silk/fixed" ]
} else {
sources += [
"src/silk/float/LPC_analysis_filter_FLP.c",
......@@ -337,8 +450,6 @@ static_library("opus") {
"src/silk/float/warped_autocorrelation_FLP.c",
"src/silk/float/wrappers_FLP.c",
]
include_dirs += [ "src/silk/float" ]
}
if (use_opus_arm_optimization) {
......@@ -356,15 +467,7 @@ static_library("opus") {
"src/silk/arm/macros_armv5e.h",
]
if (current_cpu == "arm") {
defines += [
"OPUS_ARM_ASM",
"OPUS_ARM_INLINE_ASM",
"OPUS_ARM_INLINE_EDSP",
]
}
if (use_opus_rtcd) {
if (use_opus_arm_rtcd) {
sources += [
"$target_gen_dir/celt_pitch_xcorr_arm_gnu.S",
"src/celt/arm/arm_celt_map.c",
......@@ -375,16 +478,7 @@ static_library("opus") {
"src/celt/arm/pitch_arm.h",
"src/silk/arm/arm_silk_map.c",
]
defines += [
"OPUS_ARM_MAY_HAVE_EDSP",
"OPUS_ARM_MAY_HAVE_MEDIA",
"OPUS_HAVE_RTCD",
]
deps = [
":convert_rtcd_assembler",
]
deps += [ ":convert_rtcd_assembler" ]
}
if (arm_use_neon) {
......@@ -402,19 +496,6 @@ static_library("opus") {
"src/silk/fixed/arm/warped_autocorrelation_FIX_arm.h",
"src/silk/fixed/arm/warped_autocorrelation_FIX_neon_intr.c",
]
defines += [
"OPUS_ARM_MAY_HAVE_NEON",
"OPUS_ARM_MAY_HAVE_NEON_INTR",
]
}
if (is_ios && current_cpu == "arm64") {
# Runtime detection of CPU features not available on iOS.
defines += [
"OPUS_ARM_PRESUME_NEON_INTR",
"OPUS_ARM_PRESUME_AARCH64_NEON_INTR",
]
}
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment