Commit d3d193ff authored by Felicia Lim's avatar Felicia Lim Committed by Commit Bot

[Opus] Avoid processing LPC coeffs beyond the given order in NEON optimizations

Apply patch from
https://git.xiph.org/?p=opus.git;a=commit;h=812ae3fb5c589aaafe761b8ebf86bcbbb8f0ed76

Bug: 946351
Change-Id: I55f2abb3950ff86780a68ef7ee8d28e142c0fd23
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1713325Reviewed-by: default avatarJames Zern <jzern@google.com>
Commit-Queue: Felicia Lim <flim@chromium.org>
Cr-Commit-Position: refs/heads/master@{#680520}
parent c4b94875
...@@ -21,3 +21,4 @@ Local changes: ...@@ -21,3 +21,4 @@ Local changes:
* Saturate add to avoid int overflow to fix chromium:842528. This should be * Saturate add to avoid int overflow to fix chromium:842528. This should be
reverted when updating to v1.3 reverted when updating to v1.3
* Apply https://github.com/xiph/opus/commit/4f4b11c2398e96134dc62ee794bfe33ecd6e9bd2 * Apply https://github.com/xiph/opus/commit/4f4b11c2398e96134dc62ee794bfe33ecd6e9bd2
* Apply https://git.xiph.org/?p=opus.git;a=commit;h=812ae3fb5c589aaafe761b8ebf86bcbbb8f0ed76
...@@ -210,28 +210,32 @@ opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse predi ...@@ -210,28 +210,32 @@ opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse predi
/* Increase Q domain of the AR coefficients */ /* Increase Q domain of the AR coefficients */
t0_s16x8 = vld1q_s16( A_Q12 + 0 ); t0_s16x8 = vld1q_s16( A_Q12 + 0 );
t1_s16x8 = vld1q_s16( A_Q12 + 8 ); t1_s16x8 = vld1q_s16( A_Q12 + 8 );
t2_s16x8 = vld1q_s16( A_Q12 + 16 ); if ( order > 16 ) {
t2_s16x8 = vld1q_s16( A_Q12 + 16 );
}
t0_s32x4 = vpaddlq_s16( t0_s16x8 ); t0_s32x4 = vpaddlq_s16( t0_s16x8 );
switch( order - leftover ) switch( order - leftover )
{ {
case 24: case 24:
t0_s32x4 = vpadalq_s16( t0_s32x4, t2_s16x8 ); t0_s32x4 = vpadalq_s16( t0_s32x4, t2_s16x8 );
/* Intend to fall through */ vst1q_s32( Atmp_QA + 16, vshll_n_s16( vget_low_s16 ( t2_s16x8 ), QA - 12 ) );
vst1q_s32( Atmp_QA + 20, vshll_n_s16( vget_high_s16( t2_s16x8 ), QA - 12 ) );
/* FALLTHROUGH */
case 16: case 16:
t0_s32x4 = vpadalq_s16( t0_s32x4, t1_s16x8 ); t0_s32x4 = vpadalq_s16( t0_s32x4, t1_s16x8 );
vst1q_s32( Atmp_QA + 16, vshll_n_s16( vget_low_s16 ( t2_s16x8 ), QA - 12 ) ); vst1q_s32( Atmp_QA + 8, vshll_n_s16( vget_low_s16 ( t1_s16x8 ), QA - 12 ) );
vst1q_s32( Atmp_QA + 20, vshll_n_s16( vget_high_s16( t2_s16x8 ), QA - 12 ) ); vst1q_s32( Atmp_QA + 12, vshll_n_s16( vget_high_s16( t1_s16x8 ), QA - 12 ) );
/* Intend to fall through */ /* FALLTHROUGH */
case 8: case 8:
{ {
const int32x2_t t_s32x2 = vpadd_s32( vget_low_s32( t0_s32x4 ), vget_high_s32( t0_s32x4 ) ); const int32x2_t t_s32x2 = vpadd_s32( vget_low_s32( t0_s32x4 ), vget_high_s32( t0_s32x4 ) );
const int64x1_t t_s64x1 = vpaddl_s32( t_s32x2 ); const int64x1_t t_s64x1 = vpaddl_s32( t_s32x2 );
DC_resp = vget_lane_s32( vreinterpret_s32_s64( t_s64x1 ), 0 ); DC_resp = vget_lane_s32( vreinterpret_s32_s64( t_s64x1 ), 0 );
vst1q_s32( Atmp_QA + 8, vshll_n_s16( vget_low_s16 ( t1_s16x8 ), QA - 12 ) ); vst1q_s32( Atmp_QA + 0, vshll_n_s16( vget_low_s16 ( t0_s16x8 ), QA - 12 ) );
vst1q_s32( Atmp_QA + 12, vshll_n_s16( vget_high_s16( t1_s16x8 ), QA - 12 ) ); vst1q_s32( Atmp_QA + 4, vshll_n_s16( vget_high_s16( t0_s16x8 ), QA - 12 ) );
} }
break; break;
...@@ -246,17 +250,23 @@ opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse predi ...@@ -246,17 +250,23 @@ opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse predi
case 6: case 6:
DC_resp += (opus_int32)A_Q12[ 5 ]; DC_resp += (opus_int32)A_Q12[ 5 ];
DC_resp += (opus_int32)A_Q12[ 4 ]; DC_resp += (opus_int32)A_Q12[ 4 ];
/* Intend to fall through */ Atmp_QA[ order - leftover + 5 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 5 ], QA - 12 );
Atmp_QA[ order - leftover + 4 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 4 ], QA - 12 );
/* FALLTHROUGH */
case 4: case 4:
DC_resp += (opus_int32)A_Q12[ 3 ]; DC_resp += (opus_int32)A_Q12[ 3 ];
DC_resp += (opus_int32)A_Q12[ 2 ]; DC_resp += (opus_int32)A_Q12[ 2 ];
/* Intend to fall through */ Atmp_QA[ order - leftover + 3 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 3 ], QA - 12 );
Atmp_QA[ order - leftover + 2 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 2 ], QA - 12 );
/* FALLTHROUGH */
case 2: case 2:
DC_resp += (opus_int32)A_Q12[ 1 ]; DC_resp += (opus_int32)A_Q12[ 1 ];
DC_resp += (opus_int32)A_Q12[ 0 ]; DC_resp += (opus_int32)A_Q12[ 0 ];
/* Intend to fall through */ Atmp_QA[ order - leftover + 1 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 1 ], QA - 12 );
Atmp_QA[ order - leftover + 0 ] = silk_LSHIFT32( (opus_int32)A_Q12[ 0 ], QA - 12 );
/* FALLTHROUGH */
default: default:
break; break;
...@@ -266,8 +276,6 @@ opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse predi ...@@ -266,8 +276,6 @@ opus_int32 silk_LPC_inverse_pred_gain_neon( /* O Returns inverse predi
if( DC_resp >= 4096 ) { if( DC_resp >= 4096 ) {
invGain_Q30 = 0; invGain_Q30 = 0;
} else { } else {
vst1q_s32( Atmp_QA + 0, vshll_n_s16( vget_low_s16 ( t0_s16x8 ), QA - 12 ) );
vst1q_s32( Atmp_QA + 4, vshll_n_s16( vget_high_s16( t0_s16x8 ), QA - 12 ) );
invGain_Q30 = LPC_inverse_pred_gain_QA_neon( Atmp_QA, order ); invGain_Q30 = LPC_inverse_pred_gain_QA_neon( Atmp_QA, order );
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment