s390: Optimize fmin/fmax.

On VXE targets, we can directly use the fp min/max instruction instead of calling into libm for fmin/fmax etc. Provide fmin/fmax versions also for vectors even though it cannot be called directly. This will be exploited with a follow-up patch when reductions are introduced. gcc/ChangeLog: * config/s390/s390.md: Update UNSPECs * config/s390/vector.md (fmax<mode>3): New expander. (fmin<mode>3): New expander. * config/s390/vx-builtins.md (*fmin<mode>): New insn. (vfmin<mode>): Redefined to use new insn. (*fmax<mode>): New insn. (vfmax<mode>): Redefined to use new insn. gcc/testsuite/ChangeLog: * gcc.target/s390/fminmax-1.c: New test. * gcc.target/s390/fminmax-2.c: New test. Signed-off-by: Juergen Christ <jchrist@linux.ibm.com>
2025-06-20 16:08:34 +02:00 · 2025-06-20 16:08:34 +02:00 · c476f554e3
parent 4b9f760c51
commit c476f554e3
5 changed files with 144 additions and 14 deletions
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@ -238,9 +238,6 @@
   UNSPEC_VEC_MSUM
   UNSPEC_VEC_VFMIN
   UNSPEC_VEC_VFMAX
   UNSPEC_VEC_VBLEND
   UNSPEC_VEC_VEVAL
   UNSPEC_VEC_VGEM
@ -253,6 +250,9 @@
   UNSPEC_NNPA_VCFN_V8HI
   UNSPEC_NNPA_VCNF_V8HI
   UNSPEC_FMAX
   UNSPEC_FMIN
 ])
 ;;
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@ -89,6 +89,13 @@
 (define_mode_iterator VF_HW [(V4SF "TARGET_VXE") V2DF (V1TF "TARGET_VXE")
 			     (TF "TARGET_VXE")])
 ; FP scalar and vector modes
 (define_mode_iterator VFT_BFP [SF DF
 			      (V1SF "TARGET_VXE") (V2SF "TARGET_VXE") (V4SF "TARGET_VXE")
 			      V1DF V2DF
 			      (V1TF "TARGET_VXE") (TF "TARGET_VXE")])
 (define_mode_iterator V_8   [V1QI])
 (define_mode_iterator V_16  [V2QI  V1HI])
 (define_mode_iterator V_32  [V4QI  V2HI V1SI V1SF])
@ -3602,3 +3609,21 @@
 	(umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand")
 				      (match_operand:VIT_HW_VXE3_DT 2 "register_operand")))]
  "TARGET_VX")
 ; fmax
 (define_expand "fmax<mode>3"
  [(set (match_operand:VFT_BFP                  0 "register_operand")
 	(unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand")
 	       (match_operand:VFT_BFP           2 "register_operand")
 	       (const_int 4)]
 	      UNSPEC_FMAX))]
  "TARGET_VXE")
 ; fmin
 (define_expand "fmin<mode>3"
  [(set (match_operand:VFT_BFP                  0 "register_operand")
 	(unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand")
 	       (match_operand:VFT_BFP           2 "register_operand")
 	       (const_int 4)]
 	      UNSPEC_FMIN))]
  "TARGET_VXE")
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@ -2134,23 +2134,22 @@
  "<vw>fche<sdx>bs\t%v2,%v0,%v1"
  [(set_attr "op_type" "VRR")])
 (define_insn "vfmin<mode>"
-  [(set (match_operand:VF_HW                0 "register_operand"  "=v")
+  [(set (match_operand:VFT_BFP                  0 "register_operand"  "=v")
-	(unspec:VF_HW [(match_operand:VF_HW 1 "register_operand"   "v")
+	(unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand"   "v")
-		       (match_operand:VF_HW 2 "register_operand"   "v")
+		         (match_operand:VFT_BFP 2 "register_operand"   "v")
-		       (match_operand:QI    3 "const_mask_operand" "C")]
+		         (match_operand:QI      3 "const_mask_operand" "C")]
-		      UNSPEC_VEC_VFMIN))]
+		        UNSPEC_FMIN))]
  "TARGET_VXE"
  "<vw>fmin<sdx>b\t%v0,%v1,%v2,%b3"
  [(set_attr "op_type" "VRR")])
 (define_insn "vfmax<mode>"
-  [(set (match_operand:VF_HW                0 "register_operand"  "=v")
+  [(set (match_operand:VFT_BFP                  0 "register_operand"  "=v")
-	(unspec:VF_HW [(match_operand:VF_HW 1 "register_operand"   "v")
+	(unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand"   "v")
-		       (match_operand:VF_HW 2 "register_operand"   "v")
+		         (match_operand:VFT_BFP 2 "register_operand"   "v")
-		       (match_operand:QI    3 "const_mask_operand" "C")]
+		         (match_operand:QI      3 "const_mask_operand" "C")]
-		      UNSPEC_VEC_VFMAX))]
+		        UNSPEC_FMAX))]
  "TARGET_VXE"
  "<vw>fmax<sdx>b\t%v0,%v1,%v2,%b3"
  [(set_attr "op_type" "VRR")])
--- a/gcc/testsuite/gcc.target/s390/fminmax-1.c
+++ b/gcc/testsuite/gcc.target/s390/fminmax-1.c
@ -0,0 +1,77 @@
 /* Check fmin/fmax expanders for scalars on VXE targets.  */
 /* { dg-do compile } */
 /* { dg-options "-O2 -march=z14 -mzarch" } */
 /* { dg-final { check-function-bodies "**" "" } } */
 /*
 ** dofmaxl:
 **	vl	(%v.),0\(%r3\),3
 **	vl	(%v.),0\(%r4\),3
 **	wfmaxxb	(%v.),\1,\2,4
 **	vst	\3,0\(%r2\),3
 **	br	%r14
 */
 long double
 dofmaxl (long double d1, long double d2)
 {
  return __builtin_fmaxl (d1, d2);
 }
 /*
 ** dofminl:
 **	vl	(%v.),0\(%r3\),3
 **	vl	(%v.),0\(%r4\),3
 **	wfminxb	(%v.),\1,\2,4
 **	vst	\3,0\(%r2\),3
 **	br	%r14
 */
 long double
 dofminl (long double d1, long double d2)
 {
  return __builtin_fminl (d1, d2);
 }
 /*
 ** dofmax:
 **	wfmaxdb	%v0,%v0,%v2,4
 **	br	%r14
 */
 double
 dofmax (double d1, double d2)
 {
  return __builtin_fmax (d1, d2);
 }
 /*
 ** dofmin:
 **	wfmindb	%v0,%v0,%v2,4
 **	br	%r14
 */
 double
 dofmin (double d1, double d2)
 {
  return __builtin_fmin (d1, d2);
 }
 /*
 ** dofmaxf:
 **	wfmaxsb	%v0,%v0,%v2,4
 **	br	%r14
 */
 float
 dofmaxf (float f1, float f2)
 {
  return __builtin_fmaxf (f1, f2);
 }
 /*
 ** dofminf:
 **	wfminsb	%v0,%v0,%v2,4
 **	br	%r14
 */
 float
 dofminf (float f1, float f2)
 {
  return __builtin_fminf (f1, f2);
 }
--- a/gcc/testsuite/gcc.target/s390/fminmax-2.c
+++ b/gcc/testsuite/gcc.target/s390/fminmax-2.c
@ -0,0 +1,29 @@
 /* Check fmin/fmax expanders for scalars on non-VXE targets.  */
 /* { dg-do compile } */
 /* { dg-options "-O2 -march=z13 -mzarch" } */
 /* { dg-final { scan-assembler-times "jg" 4 } } */
 double
 dofmax (double d1, double d2)
 {
  return __builtin_fmax (d1, d2);
 }
 double
 dofmin (double d1, double d2)
 {
  return __builtin_fmin (d1, d2);
 }
 float
 dofmaxf (float f1, float f2)
 {
  return __builtin_fmaxf (f1, f2);
 }
 float
 dofminf (float f1, float f2)
 {
  return __builtin_fminf (f1, f2);
 }