Roll src/third_party/boringssl/src bac5544e9..f8f35c955

https://boringssl.googlesource.com/boringssl/+log/bac5544e9832c65c95283e95062263c79a9a6733..f8f35c95550e6796ab8b2c0f8e37e7ce4bc67302 Bug: none Change-Id: I454f1272780b63e64438bd021413a0802d3b9279 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2628934 Commit-Queue: David Benjamin <davidben@chromium.org> Commit-Queue: Adam Langley <agl@chromium.org> Auto-Submit: David Benjamin <davidben@chromium.org> Reviewed-by: Adam Langley <agl@chromium.org> Cr-Commit-Position: refs/heads/master@{#843796}

Roll src/third_party/boringssl/src bac5544e9..f8f35c955
https://boringssl.googlesource.com/boringssl/+log/bac5544e9832c65c95283e95062263c79a9a6733..f8f35c95550e6796ab8b2c0f8e37e7ce4bc67302 Bug: none Change-Id: I454f1272780b63e64438bd021413a0802d3b9279 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2628934 Commit-Queue: David Benjamin <davidben@chromium.org> Commit-Queue: Adam Langley <agl@chromium.org> Auto-Submit: David Benjamin <davidben@chromium.org> Reviewed-by: Adam Langley <agl@chromium.org> Cr-Commit-Position: refs/heads/master@{#843796}
82a6b0c0 · David Benjamin · Chromium LUCI CQ · fc092930 · 82a6b0c0 · 82a6b0c0
Commit 82a6b0c0 authored Jan 14, 2021 by David Benjamin Committed by Chromium LUCI CQ Jan 14, 2021
17 changed files
--- a/DEPS
+++ b/DEPS
@@ -226,7 +226,7 @@ vars = {
  #
  # Note this revision should be updated with
  # third_party/boringssl/roll_boringssl.py, not roll-dep.
-  'boringssl_revision': 'bac5544e9832c65c95283e95062263c79a9a6733',
+  'boringssl_revision': 'f8f35c95550e6796ab8b2c0f8e37e7ce4bc67302',
  # Three lines of non-changing comments so that
  # the commit queue can handle CLs rolling google-toolbox-for-mac
  # and whatever else without interference from each other.

--- a/third_party/boringssl/BUILD.generated.gni
+++ b/third_party/boringssl/BUILD.generated.gni
@@ -78,6 +78,7 @@ crypto_sources = [
  "src/crypto/conf/internal.h",
  "src/crypto/cpu-aarch64-fuchsia.c",
  "src/crypto/cpu-aarch64-linux.c",
+  "src/crypto/cpu-aarch64-win.c",
  "src/crypto/cpu-arm-linux.c",
  "src/crypto/cpu-arm-linux.h",
  "src/crypto/cpu-arm.c",
@@ -572,6 +573,19 @@ crypto_sources_mac_x86_64 = [
  "mac-x86_64/crypto/test/trampoline-x86_64.S",
 ]

+crypto_sources_win_aarch64 = [
+  "win-aarch64/crypto/chacha/chacha-armv8.S",
+  "win-aarch64/crypto/fipsmodule/aesv8-armx64.S",
+  "win-aarch64/crypto/fipsmodule/armv8-mont.S",
+  "win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S",
+  "win-aarch64/crypto/fipsmodule/ghashv8-armx64.S",
+  "win-aarch64/crypto/fipsmodule/sha1-armv8.S",
+  "win-aarch64/crypto/fipsmodule/sha256-armv8.S",
+  "win-aarch64/crypto/fipsmodule/sha512-armv8.S",
+  "win-aarch64/crypto/fipsmodule/vpaes-armv8.S",
+  "win-aarch64/crypto/test/trampoline-armv8.S",
+]
+
 crypto_sources_win_x86 = [
  "win-x86/crypto/chacha/chacha-x86.asm",
  "win-x86/crypto/fipsmodule/aesni-x86.asm",

--- a/third_party/boringssl/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
+++ b/third_party/boringssl/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
@@ -1008,7 +1008,7 @@ Lschedule_mangle_dec:

 Lschedule_mangle_both:
 	tbl	v3.16b, {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm3
-	add	x8, x8, #64-16			// add	$-16,	%r8
+	add	x8, x8, #48			// add	$-16,	%r8
 	and	x8, x8, #~(1<<6)		// and	$0x30,	%r8
 	st1	{v3.2d}, [x2]			// vmovdqu	%xmm3,	(%rdx)
 	ret

--- a/third_party/boringssl/linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
+++ b/third_party/boringssl/linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
@@ -1009,7 +1009,7 @@ _vpaes_schedule_mangle:

 .Lschedule_mangle_both:
 	tbl	v3.16b, {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm3
-	add	x8, x8, #64-16			// add	$-16,	%r8
+	add	x8, x8, #48			// add	$-16,	%r8
 	and	x8, x8, #~(1<<6)		// and	$0x30,	%r8
 	st1	{v3.2d}, [x2]			// vmovdqu	%xmm3,	(%rdx)
 	ret

--- a/third_party/boringssl/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
+++ b/third_party/boringssl/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
@@ -2104,6 +2104,9 @@ chacha20_poly1305_open:



+
+
+
 .globl	chacha20_poly1305_seal
 .hidden chacha20_poly1305_seal
 .type	chacha20_poly1305_seal,@function

--- a/third_party/boringssl/mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
+++ b/third_party/boringssl/mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
@@ -2089,6 +2089,9 @@ L$open_sse_128_xor_hash:



+
+
+
 .globl	_chacha20_poly1305_seal
 .private_extern _chacha20_poly1305_seal


--- a/third_party/boringssl/win-aarch64/crypto/chacha/chacha-armv8.S
+++ b/third_party/boringssl/win-aarch64/crypto/chacha/chacha-armv8.S
--- a/third_party/boringssl/win-aarch64/crypto/fipsmodule/aesv8-armx64.S
+++ b/third_party/boringssl/win-aarch64/crypto/fipsmodule/aesv8-armx64.S
--- a/third_party/boringssl/win-aarch64/crypto/fipsmodule/armv8-mont.S
+++ b/third_party/boringssl/win-aarch64/crypto/fipsmodule/armv8-mont.S
--- a/third_party/boringssl/win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
+++ b/third_party/boringssl/win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
--- a/third_party/boringssl/win-aarch64/crypto/fipsmodule/ghashv8-armx64.S
+++ b/third_party/boringssl/win-aarch64/crypto/fipsmodule/ghashv8-armx64.S
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__aarch64__)
+#if defined(BORINGSSL_PREFIX)
+#include <boringssl_prefix_symbols_asm.h>
+#endif
+#include <openssl/arm_arch.h>
+
+.text
+.arch	armv8-a+crypto
+.globl	gcm_init_v8
+
+.def gcm_init_v8
+   .type 32
+.endef
+.align	4
+gcm_init_v8:
+	AARCH64_VALID_CALL_TARGET
+	ld1	{v17.2d},[x1]		//load input H
+	movi	v19.16b,#0xe1
+	shl	v19.2d,v19.2d,#57		//0xc2.0
+	ext	v3.16b,v17.16b,v17.16b,#8
+	ushr	v18.2d,v19.2d,#63
+	dup	v17.4s,v17.s[1]
+	ext	v16.16b,v18.16b,v19.16b,#8		//t0=0xc2....01
+	ushr	v18.2d,v3.2d,#63
+	sshr	v17.4s,v17.4s,#31		//broadcast carry bit
+	and	v18.16b,v18.16b,v16.16b
+	shl	v3.2d,v3.2d,#1
+	ext	v18.16b,v18.16b,v18.16b,#8
+	and	v16.16b,v16.16b,v17.16b
+	orr	v3.16b,v3.16b,v18.16b		//H<<<=1
+	eor	v20.16b,v3.16b,v16.16b		//twisted H
+	st1	{v20.2d},[x0],#16		//store Htable[0]
+
+	//calculate H^2
+	ext	v16.16b,v20.16b,v20.16b,#8		//Karatsuba pre-processing
+	pmull	v0.1q,v20.1d,v20.1d
+	eor	v16.16b,v16.16b,v20.16b
+	pmull2	v2.1q,v20.2d,v20.2d
+	pmull	v1.1q,v16.1d,v16.1d
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase
+
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	eor	v0.16b,v1.16b,v18.16b
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v18.16b,v18.16b,v2.16b
+	eor	v22.16b,v0.16b,v18.16b
+
+	ext	v17.16b,v22.16b,v22.16b,#8		//Karatsuba pre-processing
+	eor	v17.16b,v17.16b,v22.16b
+	ext	v21.16b,v16.16b,v17.16b,#8		//pack Karatsuba pre-processed
+	st1	{v21.2d,v22.2d},[x0]		//store Htable[1..2]
+
+	ret
+
+.globl	gcm_gmult_v8
+
+.def gcm_gmult_v8
+   .type 32
+.endef
+.align	4
+gcm_gmult_v8:
+	AARCH64_VALID_CALL_TARGET
+	ld1	{v17.2d},[x0]		//load Xi
+	movi	v19.16b,#0xe1
+	ld1	{v20.2d,v21.2d},[x1]	//load twisted H, ...
+	shl	v19.2d,v19.2d,#57
+#ifndef __ARMEB__
+	rev64	v17.16b,v17.16b
+#endif
+	ext	v3.16b,v17.16b,v17.16b,#8
+
+	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
+	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
+	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
+	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
+
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	eor	v0.16b,v1.16b,v18.16b
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v18.16b,v18.16b,v2.16b
+	eor	v0.16b,v0.16b,v18.16b
+
+#ifndef __ARMEB__
+	rev64	v0.16b,v0.16b
+#endif
+	ext	v0.16b,v0.16b,v0.16b,#8
+	st1	{v0.2d},[x0]		//write out Xi
+
+	ret
+
+.globl	gcm_ghash_v8
+
+.def gcm_ghash_v8
+   .type 32
+.endef
+.align	4
+gcm_ghash_v8:
+	AARCH64_VALID_CALL_TARGET
+	ld1	{v0.2d},[x0]		//load [rotated] Xi
+						//"[rotated]" means that
+						//loaded value would have
+						//to be rotated in order to
+						//make it appear as in
+						//algorithm specification
+	subs	x3,x3,#32		//see if x3 is 32 or larger
+	mov	x12,#16		//x12 is used as post-
+						//increment for input pointer;
+						//as loop is modulo-scheduled
+						//x12 is zeroed just in time
+						//to preclude overstepping
+						//inp[len], which means that
+						//last block[s] are actually
+						//loaded twice, but last
+						//copy is not processed
+	ld1	{v20.2d,v21.2d},[x1],#32	//load twisted H, ..., H^2
+	movi	v19.16b,#0xe1
+	ld1	{v22.2d},[x1]
+	csel	x12,xzr,x12,eq			//is it time to zero x12?
+	ext	v0.16b,v0.16b,v0.16b,#8		//rotate Xi
+	ld1	{v16.2d},[x2],#16	//load [rotated] I[0]
+	shl	v19.2d,v19.2d,#57		//compose 0xc2.0 constant
+#ifndef __ARMEB__
+	rev64	v16.16b,v16.16b
+	rev64	v0.16b,v0.16b
+#endif
+	ext	v3.16b,v16.16b,v16.16b,#8		//rotate I[0]
+	b.lo	Lodd_tail_v8		//x3 was less than 32
+	ld1	{v17.2d},[x2],x12	//load [rotated] I[1]
+#ifndef __ARMEB__
+	rev64	v17.16b,v17.16b
+#endif
+	ext	v7.16b,v17.16b,v17.16b,#8
+	eor	v3.16b,v3.16b,v0.16b		//I[i]^=Xi
+	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
+	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
+	pmull2	v6.1q,v20.2d,v7.2d
+	b	Loop_mod2x_v8
+
+.align	4
+Loop_mod2x_v8:
+	ext	v18.16b,v3.16b,v3.16b,#8
+	subs	x3,x3,#32		//is there more data?
+	pmull	v0.1q,v22.1d,v3.1d		//H^2.lo·Xi.lo
+	csel	x12,xzr,x12,lo			//is it time to zero x12?
+
+	pmull	v5.1q,v21.1d,v17.1d
+	eor	v18.16b,v18.16b,v3.16b		//Karatsuba pre-processing
+	pmull2	v2.1q,v22.2d,v3.2d		//H^2.hi·Xi.hi
+	eor	v0.16b,v0.16b,v4.16b		//accumulate
+	pmull2	v1.1q,v21.2d,v18.2d		//(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
+	ld1	{v16.2d},[x2],x12	//load [rotated] I[i+2]
+
+	eor	v2.16b,v2.16b,v6.16b
+	csel	x12,xzr,x12,eq			//is it time to zero x12?
+	eor	v1.16b,v1.16b,v5.16b
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	ld1	{v17.2d},[x2],x12	//load [rotated] I[i+3]
+#ifndef __ARMEB__
+	rev64	v16.16b,v16.16b
+#endif
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
+
+#ifndef __ARMEB__
+	rev64	v17.16b,v17.16b
+#endif
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	ext	v7.16b,v17.16b,v17.16b,#8
+	ext	v3.16b,v16.16b,v16.16b,#8
+	eor	v0.16b,v1.16b,v18.16b
+	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
+	eor	v3.16b,v3.16b,v2.16b		//accumulate v3.16b early
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v3.16b,v3.16b,v18.16b
+	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
+	eor	v3.16b,v3.16b,v0.16b
+	pmull2	v6.1q,v20.2d,v7.2d
+	b.hs	Loop_mod2x_v8		//there was at least 32 more bytes
+
+	eor	v2.16b,v2.16b,v18.16b
+	ext	v3.16b,v16.16b,v16.16b,#8		//re-construct v3.16b
+	adds	x3,x3,#32		//re-construct x3
+	eor	v0.16b,v0.16b,v2.16b		//re-construct v0.16b
+	b.eq	Ldone_v8		//is x3 zero?
+Lodd_tail_v8:
+	ext	v18.16b,v0.16b,v0.16b,#8
+	eor	v3.16b,v3.16b,v0.16b		//inp^=Xi
+	eor	v17.16b,v16.16b,v18.16b		//v17.16b is rotated inp^Xi
+
+	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
+	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
+	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
+	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
+
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	eor	v0.16b,v1.16b,v18.16b
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v18.16b,v18.16b,v2.16b
+	eor	v0.16b,v0.16b,v18.16b
+
+Ldone_v8:
+#ifndef __ARMEB__
+	rev64	v0.16b,v0.16b
+#endif
+	ext	v0.16b,v0.16b,v0.16b,#8
+	st1	{v0.2d},[x0]		//write out Xi
+
+	ret
+
+.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#endif
+#endif  // !OPENSSL_NO_ASM
--- a/third_party/boringssl/win-aarch64/crypto/fipsmodule/sha1-armv8.S
+++ b/third_party/boringssl/win-aarch64/crypto/fipsmodule/sha1-armv8.S
--- a/third_party/boringssl/win-aarch64/crypto/fipsmodule/sha256-armv8.S
+++ b/third_party/boringssl/win-aarch64/crypto/fipsmodule/sha256-armv8.S
--- a/third_party/boringssl/win-aarch64/crypto/fipsmodule/sha512-armv8.S
+++ b/third_party/boringssl/win-aarch64/crypto/fipsmodule/sha512-armv8.S
--- a/third_party/boringssl/win-aarch64/crypto/fipsmodule/vpaes-armv8.S
+++ b/third_party/boringssl/win-aarch64/crypto/fipsmodule/vpaes-armv8.S
--- a/third_party/boringssl/win-aarch64/crypto/test/trampoline-armv8.S
+++ b/third_party/boringssl/win-aarch64/crypto/test/trampoline-armv8.S
--- a/third_party/boringssl/win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
+++ b/third_party/boringssl/win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
@@ -2123,6 +2123,9 @@ $L$SEH_end_chacha20_poly1305_open:



+
+
+
 global	chacha20_poly1305_seal

 ALIGN	64