Commit 1364a33f authored by Adenilson Cavalcanti's avatar Adenilson Cavalcanti Committed by Commit Bot

ARM optimized insert_string

Using a faster hash function yields a considerable performance boost
in compression (average 8% on A53 and 24% on A72).

This change was enabled by previous patch with optimized crc32 using
ARMv8-1 crypto extensions for performing CPU feature detection
(so won't help older ARMv7 SoCs).

Bug: 873759
Change-Id: I88ece549a63d923beef4f96a046acdf09e529784
Reviewed-on: https://chromium-review.googlesource.com/1173262Reviewed-by: default avatarChris Blume <cblume@chromium.org>
Reviewed-by: default avatarMike Klein <mtklein@chromium.org>
Commit-Queue: Adenilson Cavalcanti <cavalcantii@chromium.org>
Cr-Commit-Position: refs/heads/master@{#583113}
parent b2fd9098
...@@ -202,4 +202,24 @@ uint32_t ZLIB_INTERNAL armv8_crc32_little(unsigned long crc, ...@@ -202,4 +202,24 @@ uint32_t ZLIB_INTERNAL armv8_crc32_little(unsigned long crc,
return ~c; return ~c;
} }
Pos ZLIB_INTERNAL insert_string_arm(deflate_state *const s, const Pos str)
{
Pos ret;
unsigned *ip, val, h = 0;
ip = (unsigned *)&s->window[str];
val = *ip;
if (s->level >= 6)
val &= 0xFFFFFF;
h = __crc32w(h, val);
ret = s->head[h & s->hash_mask];
s->head[h & s->hash_mask] = str;
s->prev[str & s->w_mask] = ret;
return ret;
}
#endif #endif
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "zconf.h" #include "zconf.h"
#include "zutil.h" #include "zutil.h"
#include "deflate.h"
/* /*
* crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer * crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer
...@@ -33,3 +34,8 @@ uint32_t ZLIB_INTERNAL armv8_crc32_little(unsigned long crc, ...@@ -33,3 +34,8 @@ uint32_t ZLIB_INTERNAL armv8_crc32_little(unsigned long crc,
const unsigned char* buf, const unsigned char* buf,
z_size_t len); z_size_t len);
/*
* Insert hash string.
*/
Pos ZLIB_INTERNAL insert_string_arm(deflate_state *const s, const Pos str);
...@@ -51,9 +51,17 @@ ...@@ -51,9 +51,17 @@
#include <assert.h> #include <assert.h>
#include "deflate.h" #include "deflate.h"
#include "x86.h" #include "x86.h"
#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) #if (defined(__ARM_NEON__) || defined(__ARM_NEON))
#include "contrib/optimizations/slide_hash_neon.h" #include "contrib/optimizations/slide_hash_neon.h"
#endif #endif
/* We need crypto extension crc32 to implement optimized hash in
* insert_string.
*/
#if defined(CRC32_ARMV8_CRC32)
#include "arm_features.h"
#include "crc32_simd.h"
#endif
const char deflate_copyright[] = const char deflate_copyright[] =
" deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
...@@ -207,12 +215,16 @@ local INLINE Pos insert_string_c(deflate_state *const s, const Pos str) ...@@ -207,12 +215,16 @@ local INLINE Pos insert_string_c(deflate_state *const s, const Pos str)
local INLINE Pos insert_string(deflate_state *const s, const Pos str) local INLINE Pos insert_string(deflate_state *const s, const Pos str)
{ {
#if defined(CRC32_ARMV8_CRC32)
if (arm_cpu_enable_crc32)
return insert_string_arm(s, str);
#endif
if (x86_cpu_enable_simd) if (x86_cpu_enable_simd)
return insert_string_sse(s, str); return insert_string_sse(s, str);
return insert_string_c(s, str); return insert_string_c(s, str);
} }
/* =========================================================================== /* ===========================================================================
* Initialize the hash table (avoiding 64K overflow for 16 bit systems). * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
* prev[] will be initialized on the fly. * prev[] will be initialized on the fly.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment