[AArch64] Add HF vector modes to lane-to-lane INS pattern

* config/aarch64/aarch64-simd.md (*aarch64_simd_vec_copy_lane<mode>):
        Use VALL_F16 iterator rather than VALL.

        * gcc.target/aarch64/hfmode_ins_1.c: New test.

From-SVN: r248835
This commit is contained in:
Kyrylo Tkachov 2017-06-02 15:03:54 +00:00 committed by Kyrylo Tkachov
parent d65d1bd970
commit b160939bf3
4 changed files with 35 additions and 5 deletions

View File

@ -8,6 +8,11 @@
* config/vx-common.h (DWARF_UNWIND_INFO): Switch #define to 1.
2017-06-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64-simd.md (*aarch64_simd_vec_copy_lane<mode>):
Use VALL_F16 iterator rather than VALL.
2017-06-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64.c (aarch64_split_compare_and_swap):

View File

@ -565,14 +565,14 @@
)
(define_insn "*aarch64_simd_vec_copy_lane<mode>"
[(set (match_operand:VALL 0 "register_operand" "=w")
(vec_merge:VALL
(vec_duplicate:VALL
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
(vec_merge:VALL_F16
(vec_duplicate:VALL_F16
(vec_select:<VEL>
(match_operand:VALL 3 "register_operand" "w")
(match_operand:VALL_F16 3 "register_operand" "w")
(parallel
[(match_operand:SI 4 "immediate_operand" "i")])))
(match_operand:VALL 1 "register_operand" "0")
(match_operand:VALL_F16 1 "register_operand" "0")
(match_operand:SI 2 "immediate_operand" "i")))]
"TARGET_SIMD"
{

View File

@ -6,6 +6,10 @@
* gcc.target/powerpc/fold-vec-minmax-longlong.c: New.
* gcc.target/powerpc/fold-vec-minmax-short.c: New.
2017-06-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/hfmode_ins_1.c: New test.
2017-06-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c: New test.

View File

@ -0,0 +1,21 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* Check that we can perform this in a single INS without doing any DUPs. */
#include <arm_neon.h>
float16x8_t
foo (float16x8_t a, float16x8_t b)
{
return vsetq_lane_f16 (vgetq_lane_f16 (b, 2), a, 3);
}
float16x4_t
bar (float16x4_t a, float16x4_t b)
{
return vset_lane_f16 (vget_lane_f16 (b, 2), a, 3);
}
/* { dg-final { scan-assembler-times "ins\\t" 2 } } */
/* { dg-final { scan-assembler-not "dup\\t" } } */