Commit 28c96230 authored by Adenilson Cavalcanti's avatar Adenilson Cavalcanti Committed by Commit Bot

Compute crc32 using ARMv8 specific instruction

CRC32 affects performance for both image decoding (PNG)
as also in general browsing while accessing websites that serve
content using compression (i.e. Content-Encoding: gzip).

This patch implements an optimized CRC32 function using the
dedicated instruction available in ARMv8a. We only support
ARM Little-Endian (LE).

This instruction is available in new Android devices featuring an
ARMv8 SoC, like Nexus 5x and Google Pixel. It should be between
3x (A72) to 7x faster (A53) than the C implementation currently used
by zlib for 8KB vectors.

This is performance critical code and can be called with both large (8KB)
or small vectors, therefore we must avoid extraneous function calls or
branching (otherwise the performance benefits are negated). So the use
of 'public' variables to read the CPU features status flags
(i.e. arm_cpu_enable_crc32 | pmull).

Finally it also introduces code to perform run-time ARM CPU feature
detection on the supported platforms: Android and Linux/CrOS. We build
and link the CRC32 instruction dependent code, but will decide to use it
at run-time if the ARM CPU supports the CRC32 instruction. Otherwise,
we fallback to using zlib's default C implementation.

This approach allows to use the instruction in both 32bits and 64bits and
works fine either in ARMv7 or ARMv8 processor. I tested the generated
Chrome apk in both a Nexus 6 (ARMv7) and a Google Pixel (ARMv8).

The crc32 function benefited from input from Yang Zang and Mike Klein,
while the arm_features benefited from input from Noel Gordon.

Bug: 709716
Change-Id: I315c1216f8b3a8d88607630a28737c41f52a2f5d
Reviewed-on: https://chromium-review.googlesource.com/801108Reviewed-by: default avatarChris Blume <cblume@chromium.org>
Reviewed-by: default avatarNoel Gordon <noel@chromium.org>
Commit-Queue: Noel Gordon <noel@chromium.org>
Cr-Commit-Position: refs/heads/master@{#537179}
parent 064dbbfe
......@@ -54,6 +54,63 @@ source_set("zlib_adler32_simd") {
public_configs = [ ":zlib_adler32_simd_config" ]
}
config("zlib_arm_crc32_config") {
if (current_cpu == "arm" || current_cpu == "arm64") {
# Restrictions:
# - Disabled for iPhones, as described in DDI0487C_a_armv8_arm:
# "All implementations of the ARMv8.1 architecture are required to
# implement the CRC32* instructions. These are optional in ARMv8.0."
# - ChromeOS has wrapper scripts that are borking the compiler flags.
# - Fuchsia just added a syscall for feature detection.
# TODO(cavalcantii): crbug.com/810125.
if (!is_ios && !is_chromeos && !is_fuchsia) {
defines = [ "CRC32_ARMV8_CRC32" ]
if (is_android) {
defines += [ "ARMV8_OS_ANDROID" ]
} else if (is_linux || is_chromeos) {
defines += [ "ARMV8_OS_LINUX" ]
}
}
}
}
if (current_cpu == "arm" || current_cpu == "arm64") {
source_set("zlib_arm_crc32") {
visibility = [ ":*" ]
if (!is_ios && !is_chromeos && !is_fuchsia) {
include_dirs = [ "." ]
if (is_android) {
import("//build/config/android/config.gni")
if (defined(android_ndk_root) && android_ndk_root != "") {
deps = [
"//third_party/android_tools:cpu_features",
]
}
}
sources = [
"arm_features.c",
"arm_features.h",
"crc32_simd.c",
"crc32_simd.h",
]
if (!is_win || is_clang) {
cflags_c = [ "-march=armv8-a+crc" ]
}
if (!is_debug) {
configs -= [ "//build/config/compiler:default_optimization" ]
configs += [ "//build/config/compiler:optimize_speed" ]
}
}
public_configs = [ ":zlib_arm_crc32_config" ]
}
}
config("zlib_inflate_chunk_simd_config") {
if (!is_ios && (current_cpu == "x86" || current_cpu == "x64")) {
defines = [ "INFLATE_CHUNK_SIMD_SSE2" ]
......@@ -222,6 +279,8 @@ static_library("zlib") {
if (arm_use_neon) {
deps += [ ":zlib_adler32_simd" ]
deps += [ ":zlib_arm_crc32" ]
deps += [ ":zlib_inflate_chunk_simd" ]
sources -= [ "inflate.c" ]
}
......
/* arm_features.c -- ARM processor features detection.
*
* Copyright 2018 The Chromium Authors. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
* found in the Chromium source repository LICENSE file.
*/
#include "arm_features.h"
#include "zutil.h"
#include <pthread.h>
#include <stdint.h>
#if defined(ARMV8_OS_ANDROID)
#include <cpu-features.h>
#elif defined(ARMV8_OS_LINUX)
#include <asm/hwcap.h>
#include <sys/auxv.h>
#else
#error ### No ARM CPU features detection in your platform/OS
#endif
int ZLIB_INTERNAL arm_cpu_enable_crc32 = 0;
int ZLIB_INTERNAL arm_cpu_enable_pmull = 0;
static pthread_once_t cpu_check_inited_once = PTHREAD_ONCE_INIT;
static void init_arm_features(void)
{
uint64_t flag_crc32 = 0, flag_pmull = 0, capabilities = 0;
#if defined(ARMV8_OS_ANDROID)
flag_crc32 = ANDROID_CPU_ARM_FEATURE_CRC32;
flag_pmull = ANDROID_CPU_ARM_FEATURE_PMULL;
capabilities = android_getCpuFeatures();
#elif defined(ARMV8_OS_LINUX)
#if defined(__aarch64__)
flag_crc32 = HWCAP_CRC32;
flag_pmull = HWCAP_PMULL;
capabilities = getauxval(AT_HWCAP);
#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
/* The use of HWCAP2 is for getting features of newer ARMv8-A SoCs
* while running in 32bits mode (i.e. aarch32).
*/
flag_crc32 = HWCAP2_CRC32;
flag_pmull = HWCAP2_PMULL;
capabilities = getauxval(AT_HWCAP2);
#endif
#endif
if (capabilities & flag_crc32)
arm_cpu_enable_crc32 = 1;
if (capabilities & flag_pmull)
arm_cpu_enable_pmull = 1;
}
void ZLIB_INTERNAL arm_check_features(void)
{
pthread_once(&cpu_check_inited_once, init_arm_features);
}
/* arm_features.h -- ARM processor features detection.
*
* Copyright 2018 The Chromium Authors. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
* found in the Chromium source repository LICENSE file.
*/
#include "zlib.h"
extern int arm_cpu_enable_crc32;
extern int arm_cpu_enable_pmull;
void arm_check_features(void);
......@@ -34,6 +34,9 @@
#if defined(CRC32_SIMD_SSE42_PCLMUL)
#include "crc32_simd.h"
#elif defined(CRC32_ARMV8_CRC32)
#include "arm_features.h"
#include "crc32_simd.h"
#endif
/* Definitions for doing the crc four data bytes at a time. */
......@@ -277,6 +280,22 @@ unsigned long ZEXPORT crc32(crc, buf, len)
const unsigned char FAR *buf;
uInt len;
{
#if defined(CRC32_ARMV8_CRC32)
/* We got to verify ARM CPU features, so exploit the common usage pattern
* of calling this function with Z_NULL for an initial valid crc value.
* This allows to cache the result of the feature check and avoid extraneous
* function calls.
* TODO: try to move this to crc32_z if we don't loose performance on ARM.
*/
if (buf == Z_NULL) {
if (!len) /* Assume user is calling crc32(0, NULL, 0); */
arm_check_features();
return 0UL;
}
if (arm_cpu_enable_crc32)
return armv8_crc32_little(crc, buf, len);
#endif
return crc32_z(crc, buf, len);
}
......
......@@ -154,4 +154,52 @@ uint32_t ZLIB_INTERNAL crc32_sse42_simd_( /* SSE4.2+PCLMUL */
return _mm_extract_epi32(x1, 1);
}
#endif /* CRC32_SIMD_SSE42_PCLMUL */
#elif defined(CRC32_ARMV8_CRC32)
/* CRC32 checksums using ARMv8-a crypto instructions.
*
* TODO: implement a version using the PMULL instruction.
*/
#include <arm_acle.h>
uint32_t ZLIB_INTERNAL armv8_crc32_little(unsigned long crc,
const unsigned char *buf,
z_size_t len)
{
uint32_t c = (uint32_t) ~crc;
while (len && ((uintptr_t)buf & 7)) {
c = __crc32b(c, *buf++);
--len;
}
const uint64_t *buf8 = (const uint64_t *)buf;
while (len >= 64) {
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
len -= 64;
}
while (len >= 8) {
c = __crc32d(c, *buf8++);
len -= 8;
}
buf = (const unsigned char *)buf8;
while (len--) {
c = __crc32b(c, *buf++);
}
return ~c;
}
#endif
......@@ -25,3 +25,11 @@ uint32_t ZLIB_INTERNAL crc32_sse42_simd_(
*/
#define Z_CRC32_SSE42_MINIMUM_LENGTH 64
#define Z_CRC32_SSE42_CHUNKSIZE_MASK 15
/*
* CRC32 checksums using ARMv8-a crypto instructions.
*/
uint32_t ZLIB_INTERNAL armv8_crc32_little(unsigned long crc,
const unsigned char* buf,
z_size_t len);
......@@ -174,4 +174,10 @@
/* Symbols added by crc32_simd.c */
#define crc32_sse42_simd_ Cr_z_crc32_sse42_simd_
/* Symbols added by armv8_crc32 */
#define arm_cpu_enable_crc32 Cr_z_arm_cpu_enable_crc32
#define arm_cpu_enable_pmull Cr_z_arm_cpu_enable_pmull
#define arm_check_features Cr_z_arm_check_features
#define armv8_crc32_little Cr_z_armv8_crc32_little
#endif /* THIRD_PARTY_ZLIB_NAMES_H_ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment