Commit 1364a33f authored by Adenilson Cavalcanti's avatar Adenilson Cavalcanti Committed by Commit Bot

ARM optimized insert_string

Using a faster hash function yields a considerable performance boost
in compression (average 8% on A53 and 24% on A72).

This change was enabled by previous patch with optimized crc32 using
ARMv8-1 crypto extensions for performing CPU feature detection
(so won't help older ARMv7 SoCs).

Bug: 873759
Change-Id: I88ece549a63d923beef4f96a046acdf09e529784
Reviewed-on: https://chromium-review.googlesource.com/1173262Reviewed-by: default avatarChris Blume <cblume@chromium.org>
Reviewed-by: default avatarMike Klein <mtklein@chromium.org>
Commit-Queue: Adenilson Cavalcanti <cavalcantii@chromium.org>
Cr-Commit-Position: refs/heads/master@{#583113}
parent b2fd9098
......@@ -202,4 +202,24 @@ uint32_t ZLIB_INTERNAL armv8_crc32_little(unsigned long crc,
return ~c;
}
Pos ZLIB_INTERNAL insert_string_arm(deflate_state *const s, const Pos str)
{
Pos ret;
unsigned *ip, val, h = 0;
ip = (unsigned *)&s->window[str];
val = *ip;
if (s->level >= 6)
val &= 0xFFFFFF;
h = __crc32w(h, val);
ret = s->head[h & s->hash_mask];
s->head[h & s->hash_mask] = str;
s->prev[str & s->w_mask] = ret;
return ret;
}
#endif
......@@ -9,6 +9,7 @@
#include "zconf.h"
#include "zutil.h"
#include "deflate.h"
/*
* crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer
......@@ -33,3 +34,8 @@ uint32_t ZLIB_INTERNAL armv8_crc32_little(unsigned long crc,
const unsigned char* buf,
z_size_t len);
/*
* Insert hash string.
*/
Pos ZLIB_INTERNAL insert_string_arm(deflate_state *const s, const Pos str);
......@@ -51,9 +51,17 @@
#include <assert.h>
#include "deflate.h"
#include "x86.h"
#if (defined(__ARM_NEON__) || defined(__ARM_NEON))
#include "contrib/optimizations/slide_hash_neon.h"
#endif
/* We need crypto extension crc32 to implement optimized hash in
* insert_string.
*/
#if defined(CRC32_ARMV8_CRC32)
#include "arm_features.h"
#include "crc32_simd.h"
#endif
const char deflate_copyright[] =
" deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
......@@ -207,12 +215,16 @@ local INLINE Pos insert_string_c(deflate_state *const s, const Pos str)
local INLINE Pos insert_string(deflate_state *const s, const Pos str)
{
#if defined(CRC32_ARMV8_CRC32)
if (arm_cpu_enable_crc32)
return insert_string_arm(s, str);
#endif
if (x86_cpu_enable_simd)
return insert_string_sse(s, str);
return insert_string_c(s, str);
}
/* ===========================================================================
* Initialize the hash table (avoiding 64K overflow for 16 bit systems).
* prev[] will be initialized on the fly.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment