mirror of git://gcc.gnu.org/git/gcc.git
re PR target/55073 (Wrong Neon code generation at -O2 caused by -fschedule-insns)
PR target/55073 * arm/neon.md (neon_vtrn<mode>_internal): Split into expand and insn patterns. Re-order insn arguments to tie inputs to outputs. (neon_vzip<mode>_internal): Likewise. (neon_vuzp<mode>_internal): Likewise. * gcc.target/arm/pr55073.C: New test. From-SVN: r193943
This commit is contained in:
parent
1d02988986
commit
28131dfe44
|
|
@ -1,3 +1,12 @@
|
|||
2012-11-29 Richard Earnshaw <rearnsha@arm.com>
|
||||
|
||||
PR target/55073
|
||||
* arm/neon.md (neon_vtrn<mode>_internal): Split into expand
|
||||
and insn patterns. Re-order insn arguments to tie inputs to
|
||||
outputs.
|
||||
(neon_vzip<mode>_internal): Likewise.
|
||||
(neon_vuzp<mode>_internal): Likewise.
|
||||
|
||||
2012-11-29 Marc Glisse <marc.glisse@inria.fr>
|
||||
|
||||
PR c++/53094
|
||||
|
|
|
|||
|
|
@ -4411,16 +4411,29 @@
|
|||
[(set_attr "neon_type" "neon_bp_3cycle")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vtrn<mode>_internal"
|
||||
(define_expand "neon_vtrn<mode>_internal"
|
||||
[(parallel
|
||||
[(set (match_operand:VDQW 0 "s_register_operand" "")
|
||||
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
|
||||
(match_operand:VDQW 2 "s_register_operand" "")]
|
||||
UNSPEC_VTRN1))
|
||||
(set (match_operand:VDQW 3 "s_register_operand" "")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
|
||||
"TARGET_NEON"
|
||||
""
|
||||
)
|
||||
|
||||
;; Note: Different operand numbering to handle tied registers correctly.
|
||||
(define_insn "*neon_vtrn<mode>_insn"
|
||||
[(set (match_operand:VDQW 0 "s_register_operand" "=w")
|
||||
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
|
||||
(match_operand:VDQW 2 "s_register_operand" "w")]
|
||||
(match_operand:VDQW 3 "s_register_operand" "2")]
|
||||
UNSPEC_VTRN1))
|
||||
(set (match_operand:VDQW 3 "s_register_operand" "=2")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 2)]
|
||||
(set (match_operand:VDQW 2 "s_register_operand" "=w")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 3)]
|
||||
UNSPEC_VTRN2))]
|
||||
"TARGET_NEON"
|
||||
"vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
|
||||
"vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
|
||||
[(set (attr "neon_type")
|
||||
(if_then_else (match_test "<Is_d_reg>")
|
||||
(const_string "neon_bp_simple")
|
||||
|
|
@ -4438,16 +4451,29 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "neon_vzip<mode>_internal"
|
||||
(define_expand "neon_vzip<mode>_internal"
|
||||
[(parallel
|
||||
[(set (match_operand:VDQW 0 "s_register_operand" "")
|
||||
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
|
||||
(match_operand:VDQW 2 "s_register_operand" "")]
|
||||
UNSPEC_VZIP1))
|
||||
(set (match_operand:VDQW 3 "s_register_operand" "")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
|
||||
"TARGET_NEON"
|
||||
""
|
||||
)
|
||||
|
||||
;; Note: Different operand numbering to handle tied registers correctly.
|
||||
(define_insn "*neon_vzip<mode>_insn"
|
||||
[(set (match_operand:VDQW 0 "s_register_operand" "=w")
|
||||
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
|
||||
(match_operand:VDQW 2 "s_register_operand" "w")]
|
||||
(match_operand:VDQW 3 "s_register_operand" "2")]
|
||||
UNSPEC_VZIP1))
|
||||
(set (match_operand:VDQW 3 "s_register_operand" "=2")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 2)]
|
||||
(set (match_operand:VDQW 2 "s_register_operand" "=w")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 3)]
|
||||
UNSPEC_VZIP2))]
|
||||
"TARGET_NEON"
|
||||
"vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
|
||||
"vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
|
||||
[(set (attr "neon_type")
|
||||
(if_then_else (match_test "<Is_d_reg>")
|
||||
(const_string "neon_bp_simple")
|
||||
|
|
@ -4465,16 +4491,29 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "neon_vuzp<mode>_internal"
|
||||
(define_expand "neon_vuzp<mode>_internal"
|
||||
[(parallel
|
||||
[(set (match_operand:VDQW 0 "s_register_operand" "")
|
||||
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
|
||||
(match_operand:VDQW 2 "s_register_operand" "")]
|
||||
UNSPEC_VUZP1))
|
||||
(set (match_operand:VDQW 3 "s_register_operand" "")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
|
||||
"TARGET_NEON"
|
||||
""
|
||||
)
|
||||
|
||||
;; Note: Different operand numbering to handle tied registers correctly.
|
||||
(define_insn "*neon_vuzp<mode>_insn"
|
||||
[(set (match_operand:VDQW 0 "s_register_operand" "=w")
|
||||
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
|
||||
(match_operand:VDQW 2 "s_register_operand" "w")]
|
||||
(match_operand:VDQW 3 "s_register_operand" "2")]
|
||||
UNSPEC_VUZP1))
|
||||
(set (match_operand:VDQW 3 "s_register_operand" "=2")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 2)]
|
||||
(set (match_operand:VDQW 2 "s_register_operand" "=w")
|
||||
(unspec:VDQW [(match_dup 1) (match_dup 3)]
|
||||
UNSPEC_VUZP2))]
|
||||
"TARGET_NEON"
|
||||
"vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3"
|
||||
"vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
|
||||
[(set (attr "neon_type")
|
||||
(if_then_else (match_test "<Is_d_reg>")
|
||||
(const_string "neon_bp_simple")
|
||||
|
|
|
|||
|
|
@ -1,3 +1,8 @@
|
|||
2012-11-29 Richard Earnshaw <rearnsha@arm.com>
|
||||
|
||||
PR target/55073
|
||||
* gcc.target/arm/pr55073.C: New test.
|
||||
|
||||
2012-11-29 Marc Glisse <marc.glisse@inria.fr>
|
||||
|
||||
PR c++/53094
|
||||
|
|
|
|||
|
|
@ -0,0 +1,74 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target arm_neon } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-add-options arm_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
struct __attribute__((aligned(16))) _v16u8_ {
|
||||
uint8x16_t val;
|
||||
_v16u8_() { }
|
||||
|
||||
_v16u8_( const uint8x16_t &src) { val = src; }
|
||||
_v16u8_( const int16x8_t &src) { val = vreinterpretq_u8_s16(src); }
|
||||
_v16u8_( const uint32x4_t &src) { val = vreinterpretq_u8_u32(src); }
|
||||
|
||||
operator uint8x16_t () const { return val; }
|
||||
operator int8x16_t () const { return vreinterpretq_s8_u8 (val); }
|
||||
operator int16x8_t () const { return vreinterpretq_s16_u8(val); }
|
||||
operator uint32x4_t () const { return vreinterpretq_u32_u8(val); }
|
||||
operator int32x4_t () const { return vreinterpretq_s32_u8(val); }
|
||||
};
|
||||
typedef struct _v16u8_ v16u8;
|
||||
typedef const v16u8 cv16u8;
|
||||
|
||||
typedef v16u8 v16i8;
|
||||
typedef v16u8 v8i16;
|
||||
typedef v16u8 v4u32;
|
||||
|
||||
inline v16u8 __attribute__((always_inline)) mergelo( const v16u8 & s, const v16u8 & t )
|
||||
{
|
||||
uint8x8x2_t r = vzip_u8( vget_low_u8(s), vget_low_u8(t) );
|
||||
return vcombine_u8( r.val[0], r.val[1] );
|
||||
}
|
||||
|
||||
inline v8i16 __attribute__((always_inline)) unpacklo(const v16i8 & s)
|
||||
{
|
||||
return vmovl_s8( vget_low_s8( s ) );
|
||||
}
|
||||
|
||||
const uint32_t __attribute__((aligned(16))) _InA [4] = { 0xFF020001, 0xFF020001, 0xFF000101, 0xFF000101 } ;
|
||||
const uint32_t __attribute__((aligned(16))) _InB [4] = { 0xFF050002, 0xFF050002, 0xFF000303, 0xFF000203 } ;
|
||||
|
||||
__attribute__((noinline)) v16i8 test_func(void)
|
||||
{
|
||||
v16u8 A = vld1q_u8( (uint8_t*) _InA );
|
||||
v16u8 B = vld1q_u8( (uint8_t*) _InB );
|
||||
v8i16 r = vdupq_n_s16(2);
|
||||
|
||||
v16u8 _0 = mergelo( A, B );
|
||||
v16u8 _1 = mergelo( B, A );
|
||||
|
||||
v16u8 _2 = mergelo( _0, _1 );
|
||||
v16u8 _3 = mergelo( _1, _0 );
|
||||
|
||||
v8i16 _4 = vsubq_s16( unpacklo( _2 ), r );
|
||||
v8i16 _5 = vsubq_s16( unpacklo( _3 ), r );
|
||||
|
||||
v8i16 ret = vaddq_s16( _4, _5 );
|
||||
|
||||
return ( ret );
|
||||
}
|
||||
|
||||
int main (int argc, char **argv)
|
||||
{
|
||||
v16u8 val = test_func();
|
||||
|
||||
if (vgetq_lane_u32( val, 0 ) != 0xffffffff
|
||||
|| vgetq_lane_u32( val, 1 ) != 0xffffffff
|
||||
|| vgetq_lane_u32( val, 2 ) != 0xfffcfffc
|
||||
|| vgetq_lane_u32( val, 3 ) != 0xfffcfffc)
|
||||
abort ();
|
||||
exit (0);
|
||||
}
|
||||
Loading…
Reference in New Issue