rs6000-protos.h (rs6000_split_vec_extract_var): New declaration.

[gcc] 2016-07-28 Michael Meissner <meissner@linux.vnet.ibm.com> * config/rs6000/rs6000-protos.h (rs6000_split_vec_extract_var): New declaration. * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): Add support for vec_extract of vector double or vector long having a variable element number on 64-bit ISA 2.07 systems or newer. * config/rs6000/rs6000.c (rs6000_expand_vector_extract): Likewise. (rs6000_split_vec_extract_var): New function to split a vec_extract built-in function with variable element number. (rtx_is_swappable_p): Variable vec_extracts and shifts are not swappable. * config/rs6000/vsx.md (UNSPEC_VSX_VSLO): New unspec. (UNSPEC_VSX_EXTRACT): Likewise. (vsx_extract_<mode>, VSX_D iterator): Fix constraints to allow direct move instructions to be generated on 64-bit ISA 2.07 systems and newer, and to take advantage of the ISA 3.0 MFVSRLD instruction. (vsx_vslo_<mode>): New insn to do VSLO on V2DFmode and V2DImode arguments for vec_extract variable element. (vsx_extract_<mode>_var, VSX_D iterator): New insn to support vec_extract with variable element on V2DFmode and V2DImode vectors. * config/rs6000/rs6000.h (TARGET_VEXTRACTUB): Remove -mupper-regs-df requirement, since it isn't needed. (TARGET_DIRECT_MOVE_64BIT): New macro to say whether we can do direct moves on 64-bit systems, which allows optimization of vec_extract on 64-bit ISA 2.07 systems and newer. [gcc/testsuite] 2016-07-28 Michael Meissner <meissner@linux.vnet.ibm.com> * gcc.target/powerpc/vec-extract-1.c: New test. From-SVN: r238838
2016-07-28 21:02:06 +00:00 · 2016-07-28 21:02:06 +00:00 · e0d321856b
parent 1a2196e484
commit e0d321856b
8 changed files with 269 additions and 24 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,33 @@
 2016-07-28  Michael Meissner  <meissner@linux.vnet.ibm.com>
 	* config/rs6000/rs6000-protos.h (rs6000_split_vec_extract_var):
 	New declaration.
 	* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
 	Add support for vec_extract of vector double or vector long having
 	a variable element number on 64-bit ISA 2.07 systems or newer.
 	* config/rs6000/rs6000.c (rs6000_expand_vector_extract):
 	Likewise.
 	(rs6000_split_vec_extract_var): New function to split a
 	vec_extract built-in function with variable element number.
 	(rtx_is_swappable_p): Variable vec_extracts and shifts are not
 	swappable.
 	* config/rs6000/vsx.md (UNSPEC_VSX_VSLO): New unspec.
 	(UNSPEC_VSX_EXTRACT): Likewise.
 	(vsx_extract_<mode>, VSX_D iterator): Fix constraints to allow
 	direct move instructions to be generated on 64-bit ISA 2.07
 	systems and newer, and to take advantage of the ISA 3.0 MFVSRLD
 	instruction.
 	(vsx_vslo_<mode>): New insn to do VSLO on V2DFmode and V2DImode
 	arguments for vec_extract variable element.
 	(vsx_extract_<mode>_var, VSX_D iterator): New insn to support
 	vec_extract with variable element on V2DFmode and V2DImode
 	vectors.
 	* config/rs6000/rs6000.h (TARGET_VEXTRACTUB): Remove
 	-mupper-regs-df requirement, since it isn't needed.
 	(TARGET_DIRECT_MOVE_64BIT): New macro to say whether we can
 	do direct moves on 64-bit systems, which allows optimization of
 	vec_extract on 64-bit ISA 2.07 systems and newer.
 2016-07-28  Kristina Martsenko  <kristina.martsenko@arm.com>
 2016-07-28  Wilco Dijkstra  <wdijkstr@arm.com>
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@ -5105,29 +5105,61 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 				  arg2);
 	}
-      /* If we can use the VSX xxpermdi instruction, use that for extract.  */
+      /* See if we can optimize vec_extracts with the current VSX instruction
 	 set.  */
      mode = TYPE_MODE (arg1_type);
-      if ((mode == V2DFmode || mode == V2DImode) && VECTOR_MEM_VSX_P (mode)
+      if (VECTOR_MEM_VSX_P (mode))
-	  && TREE_CODE (arg2) == INTEGER_CST
+
 	  && wi::ltu_p (arg2, 2))
 	{
 	  tree call = NULL_TREE;
 	  int nunits = GET_MODE_NUNITS (mode);
-	  if (mode == V2DFmode)
+	  /* If the second argument is an integer constant, if the value is in
-	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
+	     the expected range, generate the built-in code if we can.  We need
-	  else if (mode == V2DImode)
+	     64-bit and direct move to extract the small integer vectors.  */
-	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+	  if (TREE_CODE (arg2) == INTEGER_CST && wi::ltu_p (arg2, nunits))
 	    {
 	      switch (mode)
 		{
 		default:
 		  break;
 		case V1TImode:
 		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V1TI];
 		  break;
 		case V2DFmode:
 		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
 		  break;
 		case V2DImode:
 		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
 		  break;
 		}
 	    }
 	  /* If the second argument is variable, we can optimize it if we are
 	     generating 64-bit code on a machine with direct move.  */
 	  else if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT)
 	    {
 	      switch (mode)
 		{
 		default:
 		  break;
 		case V2DFmode:
 		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
 		  break;
 		case V2DImode:
 		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
 		  break;
 		}
 	    }
 	  if (call)
 	    return build_call_expr (call, 2, arg1, arg2);
 	}
      else if (mode == V1TImode && VECTOR_MEM_VSX_P (mode)
 	       && TREE_CODE (arg2) == INTEGER_CST
 	       && wi::eq_p (arg2, 0))
 	{
 	  tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V1TI];
 	  return build_call_expr (call, 2, arg1, arg2);
 	}
      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */
      arg1_inner_type = TREE_TYPE (arg1_type);
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@ -62,6 +62,7 @@ extern void rs6000_expand_vector_init (rtx, rtx);
 extern void paired_expand_vector_init (rtx, rtx);
 extern void rs6000_expand_vector_set (rtx, rtx, int);
 extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
 extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
 extern bool altivec_expand_vec_perm_const (rtx op[4]);
 extern void altivec_expand_vec_perm_le (rtx op[4]);
 extern bool rs6000_expand_vec_perm_const (rtx op[4]);
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@ -6959,8 +6959,31 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
 	      emit_insn (gen_vsx_extract_v4si (target, vec, elt));
 	      return;
 	    }
-	  else
+	  break;
-	    break;
+	}
    }
  else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
 	   && TARGET_DIRECT_MOVE_64BIT)
    {
      if (GET_MODE (elt) != DImode)
 	{
 	  rtx tmp = gen_reg_rtx (DImode);
 	  convert_move (tmp, elt, 0);
 	  elt = tmp;
 	}
      switch (mode)
 	{
 	case V2DFmode:
 	  emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
 	  return;
 	case V2DImode:
 	  emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
 	  return;
 	default:
 	  gcc_unreachable ();
 	}
    }
@ -6978,6 +7001,99 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
  emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
 }
 /* Split a variable vec_extract operation into the component instructions.  */
 void
 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
 			      rtx tmp_altivec)
 {
  machine_mode mode = GET_MODE (src);
  machine_mode scalar_mode = GET_MODE (dest);
  unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
  int byte_shift = exact_log2 (scalar_size);
  gcc_assert (byte_shift >= 0);
  if (REG_P (src) || SUBREG_P (src))
    {
      int bit_shift = byte_shift + 3;
      rtx element2;
      gcc_assert (REG_P (tmp_gpr) && REG_P (tmp_altivec));
      /* For little endian, adjust element ordering.  For V2DI/V2DF, we can use
 	 an XOR, otherwise we need to subtract.  The shift amount is so VSLO
 	 will shift the element into the upper position (adding 3 to convert a
 	 byte shift into a bit shift).  */
      if (scalar_size == 8)
 	{
 	  if (!VECTOR_ELT_ORDER_BIG)
 	    {
 	      emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
 	      element2 = tmp_gpr;
 	    }
 	  else
 	    element2 = element;
 	  /* Generate RLDIC directly to shift left 6 bits and retrieve 1
 	     bit.  */
 	  emit_insn (gen_rtx_SET (tmp_gpr,
 				  gen_rtx_AND (DImode,
 					       gen_rtx_ASHIFT (DImode,
 							       element2,
 							       GEN_INT (6)),
 					       GEN_INT (64))));
 	}
      else
 	{
 	  if (!VECTOR_ELT_ORDER_BIG)
 	    {
 	      rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
 	      emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
 	      emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
 	      element2 = tmp_gpr;
 	    }
 	  else
 	    element2 = element;
 	  emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
 	}
      /* Get the value into the lower byte of the Altivec register where VSLO
 	 expects it.  */
      if (TARGET_P9_VECTOR)
 	emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
      else if (can_create_pseudo_p ())
 	emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
      else
 	{
 	  rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
 	  emit_move_insn (tmp_di, tmp_gpr);
 	  emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
 	}
      /* Do the VSLO to get the value into the final location.  */
      switch (mode)
 	{
 	case V2DFmode:
 	  emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
 	  return;
 	case V2DImode:
 	  emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
 	  return;
 	default:
 	  gcc_unreachable ();
 	}
      return;
    }
  else
    gcc_unreachable ();
 }
 /* Return TRUE if OP is an invalid SUBREG operation on the e500.  */
 bool
@ -38640,6 +38756,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
 	  case UNSPEC_VSX_CVDPSPN:
 	  case UNSPEC_VSX_CVSPDP:
 	  case UNSPEC_VSX_CVSPDPN:
 	  case UNSPEC_VSX_EXTRACT:
 	    return 0;
 	  case UNSPEC_VSPLT_DIRECT:
 	    *special = SH_SPLAT;
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@ -602,7 +602,6 @@ extern int rs6000_vector_align[];
 #define TARGET_DIRECT_MOVE_128	(TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \
 				 && TARGET_POWERPC64)
 #define TARGET_VEXTRACTUB	(TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \
 				 && TARGET_UPPER_REGS_DF \
 				 && TARGET_UPPER_REGS_DI && TARGET_POWERPC64)
 /* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
@ -761,6 +760,14 @@ extern int rs6000_vector_align[];
 				 && TARGET_SINGLE_FLOAT			\
 				 && TARGET_DOUBLE_FLOAT)
 /* Macro to say whether we can do optimization where we need to do parts of the
   calculation in 64-bit GPRs and then is transfered to the vector
   registers.  */
 #define TARGET_DIRECT_MOVE_64BIT	(TARGET_DIRECT_MOVE		\
 					 && TARGET_P8_VECTOR		\
 					 && TARGET_POWERPC64		\
 					 && TARGET_UPPER_REGS_DI)
 /* Whether the various reciprocal divide/square root estimate instructions
   exist, and whether we should automatically generate code for the instruction
   by default.  */
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@ -309,6 +309,8 @@
   UNSPEC_VSX_XVCVDPUXDS
   UNSPEC_VSX_SIGN_EXTEND
   UNSPEC_P9_MEMORY
   UNSPEC_VSX_VSLO
   UNSPEC_VSX_EXTRACT
  ])
 ;; VSX moves
@ -2118,16 +2120,13 @@
 ;; register was picked.  Limit the scalar value to FPRs for now.
 (define_insn "vsx_extract_<mode>"
-  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand"
+  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d,    d,     wr, wr")
            "=d,     wm,      wo,    d")
 	(vec_select:<VS_scalar>
-	 (match_operand:VSX_D 1 "gpc_reg_operand"
+	 (match_operand:VSX_D 1 "gpc_reg_operand"      "<VSa>, <VSa>, wm, wo")
            "<VSa>, <VSa>,  <VSa>,  <VSa>")
 	 (parallel
-	  [(match_operand:QI 2 "const_0_to_1_operand"
+	  [(match_operand:QI 2 "const_0_to_1_operand"  "wD,    n,     wD, n")])))]
            "wD,    wD,     wL,     n")])))]
  "VECTOR_MEM_VSX_P (<MODE>mode)"
 {
  int element = INTVAL (operands[2]);
@ -2205,6 +2204,34 @@
  [(set_attr "type" "fpstore")
   (set_attr "length" "4")])
 ;; Variable V2DI/V2DF extract shift
 (define_insn "vsx_vslo_<mode>"
  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
 	(unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v")
 			     (match_operand:V2DI 2 "gpc_reg_operand" "v")]
 			    UNSPEC_VSX_VSLO))]
  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
  "vslo %0,%1,%2"
  [(set_attr "type" "vecperm")])
 ;; Variable V2DI/V2DF extract
 (define_insn_and_split "vsx_extract_<mode>_var"
  [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v")
 	(unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v")
 			     (match_operand:DI 2 "gpc_reg_operand" "r")]
 			    UNSPEC_VSX_EXTRACT))
   (clobber (match_scratch:DI 3 "=r"))
   (clobber (match_scratch:V2DI 4 "=&v"))]
  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
  "#"
  "&& reload_completed"
  [(const_int 0)]
 {
  rs6000_split_vec_extract_var (operands[0], operands[1], operands[2],
 				operands[3], operands[4]);
  DONE;
 })
 ;; Extract a SF element from V4SF
 (define_insn_and_split "vsx_extract_v4sf"
  [(set (match_operand:SF 0 "vsx_register_operand" "=f,f")
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,7 @@
 2016-07-28  Michael Meissner  <meissner@linux.vnet.ibm.com>
 	* gcc.target/powerpc/vec-extract-1.c: New test.
 2016-07-28  Steven G. Kargl  <kargl@gcc.gnu.org>
 	PR fortran/71799
--- a/gcc/testsuite/gcc.target/powerpc/vec-extract-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-extract-1.c
@ -0,0 +1,27 @@
 /* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
 /* { dg-require-effective-target powerpc_p8vector_ok } */
 /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
 /* { dg-options "-mcpu=power8 -O2 -mupper-regs-df -mupper-regs-di" } */
 #include <altivec.h>
 double
 add_double (vector double a, int n)
 {
  return vec_extract (a, n) + 1.0;
 }
 long
 add_long (vector long a, int n)
 {
  return vec_extract (a, n) + 1;
 }
 /* { dg-final { scan-assembler     "vslo"    } } */
 /* { dg-final { scan-assembler     "mtvsrd"  } } */
 /* { dg-final { scan-assembler     "mfvsrd"  } } */
 /* { dg-final { scan-assembler-not "stxvd2x" } } */
 /* { dg-final { scan-assembler-not "stxvx"   } } */
 /* { dg-final { scan-assembler-not "stxv"    } } */
 /* { dg-final { scan-assembler-not "ldx"     } } */