Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -555,6 +555,18 @@ setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); + // For v2i64, these are only valid with P8Vector. This is corrected after + // the loop. + setOperationAction(ISD::SMAX, VT, Legal); + setOperationAction(ISD::SMIN, VT, Legal); + setOperationAction(ISD::UMAX, VT, Legal); + setOperationAction(ISD::UMIN, VT, Legal); + + if (Subtarget.hasVSX()) { + setOperationAction(ISD::FMAXNUM, VT, Legal); + setOperationAction(ISD::FMINNUM, VT, Legal); + } + // Vector instructions introduced in P8 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { setOperationAction(ISD::CTPOP, VT, Legal); @@ -638,6 +650,12 @@ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } } + if (!Subtarget.hasP8Vector()) { + setOperationAction(ISD::SMAX, MVT::v2i64, Expand); + setOperationAction(ISD::SMIN, MVT::v2i64, Expand); + setOperationAction(ISD::UMAX, MVT::v2i64, Expand); + setOperationAction(ISD::UMIN, MVT::v2i64, Expand); + } for (auto VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16, MVT::v16i8}) setOperationAction(ISD::ABS, VT, Custom); Index: llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrAltivec.td @@ -900,6 +900,32 @@ def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>; def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>; +// Max/Min +def : Pat<(v16i8 (umax v16i8:$src1, v16i8:$src2)), + (v16i8 (VMAXUB $src1, $src2))>; +def : Pat<(v16i8 (smax v16i8:$src1, v16i8:$src2)), + (v16i8 (VMAXSB $src1, $src2))>; +def : Pat<(v8i16 (umax v8i16:$src1, v8i16:$src2)), + (v8i16 (VMAXUH $src1, $src2))>; +def : Pat<(v8i16 (smax v8i16:$src1, v8i16:$src2)), + (v8i16 (VMAXSH $src1, $src2))>; +def : Pat<(v4i32 (umax v4i32:$src1, v4i32:$src2)), + (v4i32 (VMAXUW $src1, $src2))>; +def : Pat<(v4i32 (smax v4i32:$src1, v4i32:$src2)), + (v4i32 (VMAXSW $src1, $src2))>; +def : Pat<(v16i8 (umin v16i8:$src1, v16i8:$src2)), + (v16i8 (VMINUB $src1, $src2))>; +def : Pat<(v16i8 (smin v16i8:$src1, v16i8:$src2)), + (v16i8 (VMINSB $src1, $src2))>; +def : Pat<(v8i16 (umin v8i16:$src1, v8i16:$src2)), + (v8i16 (VMINUH $src1, $src2))>; +def : Pat<(v8i16 (smin v8i16:$src1, v8i16:$src2)), + (v8i16 (VMINSH $src1, $src2))>; +def : Pat<(v4i32 (umin v4i32:$src1, v4i32:$src2)), + (v4i32 (VMINUW $src1, $src2))>; +def : Pat<(v4i32 (smin v4i32:$src1, v4i32:$src2)), + (v4i32 (VMINSW $src1, $src2))>; + // Shuffles. // Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x) Index: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td @@ -1188,6 +1188,15 @@ def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC), (XXSEL $vC, $vB, $vA)>; +def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)), + (v4f32 (XVMAXSP $src1, $src2))>; +def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)), + (v4f32 (XVMINSP $src1, $src2))>; +def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)), + (v2f64 (XVMAXDP $src1, $src2))>; +def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)), + (v2f64 (XVMINDP $src1, $src2))>; + let Predicates = [IsLittleEndian] in { def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))), (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; @@ -1522,6 +1531,18 @@ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4), (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)), + (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC), + (COPY_TO_REGCLASS $src2, VRRC)))>; + def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)), + (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC), + (COPY_TO_REGCLASS $src2, VRRC)))>; + def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)), + (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC), + (COPY_TO_REGCLASS $src2, VRRC)))>; + def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)), + (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC), + (COPY_TO_REGCLASS $src2, VRRC)))>; } // AddedComplexity = 400 } // HasP8Vector Index: llvm/trunk/test/CodeGen/PowerPC/ctr-minmaxnum.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ctr-minmaxnum.ll +++ llvm/trunk/test/CodeGen/PowerPC/ctr-minmaxnum.ll @@ -58,13 +58,9 @@ } ; CHECK-LABEL: test1v: -; CHECK: bl fminf -; CHECK-NOT: mtctr -; CHECK: bl fminf -; CHECK-NOT: mtctr -; CHECK: bl fminf -; CHECK-NOT: mtctr -; CHECK: bl fminf +; CHECK: xvminsp +; CHECK-NOT: bl fminf +; CHECK: mtctr ; CHECK-NOT: bl fminf ; CHECK: blr @@ -136,13 +132,10 @@ } ; CHECK-LABEL: test2v: -; CHECK: bl fmax -; CHECK-NOT: mtctr -; CHECK: bl fmax -; CHECK-NOT: mtctr -; CHECK: bl fmax -; CHECK-NOT: mtctr -; CHECK: bl fmax +; CHECK: xvmaxdp +; CHECK: xvmaxdp +; CHECK-NOT: bl fmax +; CHECK: mtctr ; CHECK-NOT: bl fmax ; CHECK: blr Index: llvm/trunk/test/CodeGen/PowerPC/sat-add.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/sat-add.ll +++ llvm/trunk/test/CodeGen/PowerPC/sat-add.ll @@ -382,8 +382,7 @@ ; CHECK-NEXT: lvx 3, 0, 3 ; CHECK-NEXT: addis 3, 2, .LCPI24_1@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI24_1@toc@l -; CHECK-NEXT: vcmpgtub 4, 3, 2 -; CHECK-NEXT: xxsel 34, 35, 34, 36 +; CHECK-NEXT: vminub 2, 2, 3 ; CHECK-NEXT: lvx 3, 0, 3 ; CHECK-NEXT: vaddubm 2, 2, 3 ; CHECK-NEXT: blr @@ -438,8 +437,7 @@ ; CHECK-NEXT: lvx 3, 0, 3 ; CHECK-NEXT: addis 3, 2, .LCPI27_1@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI27_1@toc@l -; CHECK-NEXT: vcmpgtuh 4, 3, 2 -; CHECK-NEXT: xxsel 34, 35, 34, 36 +; CHECK-NEXT: vminuh 2, 2, 3 ; CHECK-NEXT: lvx 3, 0, 3 ; CHECK-NEXT: vadduhm 2, 2, 3 ; CHECK-NEXT: blr @@ -494,8 +492,7 @@ ; CHECK-NEXT: lvx 3, 0, 3 ; CHECK-NEXT: addis 3, 2, .LCPI30_1@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI30_1@toc@l -; CHECK-NEXT: vcmpgtuw 4, 3, 2 -; CHECK-NEXT: xxsel 34, 35, 34, 36 +; CHECK-NEXT: vminuw 2, 2, 3 ; CHECK-NEXT: lvx 3, 0, 3 ; CHECK-NEXT: vadduwm 2, 2, 3 ; CHECK-NEXT: blr @@ -552,8 +549,7 @@ ; CHECK-NEXT: addi 3, 3, .LCPI33_1@toc@l ; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: vcmpgtud 4, 3, 2 -; CHECK-NEXT: xxsel 34, 35, 34, 36 +; CHECK-NEXT: vminud 2, 2, 3 ; CHECK-NEXT: xxswapd 35, 0 ; CHECK-NEXT: vaddudm 2, 2, 3 ; CHECK-NEXT: blr @@ -607,8 +603,7 @@ ; CHECK-LABEL: unsigned_sat_variable_v16i8_using_min: ; CHECK: # %bb.0: ; CHECK-NEXT: xxlnor 36, 35, 35 -; CHECK-NEXT: vcmpgtub 5, 4, 2 -; CHECK-NEXT: xxsel 34, 36, 34, 37 +; CHECK-NEXT: vminub 2, 2, 4 ; CHECK-NEXT: vaddubm 2, 2, 3 ; CHECK-NEXT: blr %noty = xor <16 x i8> %y, @@ -652,8 +647,7 @@ ; CHECK-LABEL: unsigned_sat_variable_v8i16_using_min: ; CHECK: # %bb.0: ; CHECK-NEXT: xxlnor 36, 35, 35 -; CHECK-NEXT: vcmpgtuh 5, 4, 2 -; CHECK-NEXT: xxsel 34, 36, 34, 37 +; CHECK-NEXT: vminuh 2, 2, 4 ; CHECK-NEXT: vadduhm 2, 2, 3 ; CHECK-NEXT: blr %noty = xor <8 x i16> %y, @@ -697,8 +691,7 @@ ; CHECK-LABEL: unsigned_sat_variable_v4i32_using_min: ; CHECK: # %bb.0: ; CHECK-NEXT: xxlnor 36, 35, 35 -; CHECK-NEXT: vcmpgtuw 5, 4, 2 -; CHECK-NEXT: xxsel 34, 36, 34, 37 +; CHECK-NEXT: vminuw 2, 2, 4 ; CHECK-NEXT: vadduwm 2, 2, 3 ; CHECK-NEXT: blr %noty = xor <4 x i32> %y, @@ -742,8 +735,7 @@ ; CHECK-LABEL: unsigned_sat_variable_v2i64_using_min: ; CHECK: # %bb.0: ; CHECK-NEXT: xxlnor 36, 35, 35 -; CHECK-NEXT: vcmpgtud 5, 4, 2 -; CHECK-NEXT: xxsel 34, 36, 34, 37 +; CHECK-NEXT: vminud 2, 2, 4 ; CHECK-NEXT: vaddudm 2, 2, 3 ; CHECK-NEXT: blr %noty = xor <2 x i64> %y, Index: llvm/trunk/test/CodeGen/PowerPC/vec-min-max.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/vec-min-max.ll +++ llvm/trunk/test/CodeGen/PowerPC/vec-min-max.ll @@ -0,0 +1,239 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr7 \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=NOP8VEC +define <16 x i8> @getsmaxi8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: getsmaxi8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxsb 2, 2, 3 +; CHECK-NEXT: blr +; +; NOP8VEC-LABEL: getsmaxi8: +; NOP8VEC: # %bb.0: # %entry +; NOP8VEC-NEXT: vmaxsb 2, 2, 3 +; NOP8VEC-NEXT: blr +entry: + %0 = icmp sgt <16 x i8> %a, %b + %1 = select <16 x i1> %0, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %1 +} + +define <8 x i16> @getsmaxi16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: getsmaxi16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxsh 2, 2, 3 +; CHECK-NEXT: blr +; +; NOP8VEC-LABEL: getsmaxi16: +; NOP8VEC: # %bb.0: # %entry +; NOP8VEC-NEXT: vmaxsh 2, 2, 3 +; NOP8VEC-NEXT: blr +entry: + %0 = icmp sgt <8 x i16> %a, %b + %1 = select <8 x i1> %0, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %1 +} + +define <4 x i32> @getsmaxi32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: getsmaxi32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxsw 2, 2, 3 +; CHECK-NEXT: blr +; +; NOP8VEC-LABEL: getsmaxi32: +; NOP8VEC: # %bb.0: # %entry +; NOP8VEC-NEXT: vmaxsw 2, 2, 3 +; NOP8VEC-NEXT: blr +entry: + %0 = icmp sgt <4 x i32> %a, %b + %1 = select <4 x i1> %0, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %1 +} + +define <2 x i64> @getsmaxi64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: getsmaxi64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxsd 2, 2, 3 +; CHECK-NEXT: blr +; +; NOP8VEC-LABEL: getsmaxi64: +; NOP8VEC: # %bb.0: # %entry +; NOP8VEC-NEXT: xxswapd 0, 35 +; NOP8VEC-NEXT: addi 3, 1, -32 +; NOP8VEC-NEXT: addi 4, 1, -48 +; NOP8VEC-NEXT: xxswapd 1, 34 +; NOP8VEC-NEXT: stxvd2x 0, 0, 3 +; NOP8VEC-NEXT: stxvd2x 1, 0, 4 +; NOP8VEC-NEXT: ld 3, -24(1) +; NOP8VEC-NEXT: ld 4, -40(1) +; NOP8VEC-NEXT: cmpd 4, 3 +; NOP8VEC-NEXT: li 3, 0 +; NOP8VEC-NEXT: li 4, -1 +; NOP8VEC-NEXT: isel 5, 4, 3, 1 +; NOP8VEC-NEXT: std 5, -8(1) +; NOP8VEC-NEXT: ld 5, -32(1) +; NOP8VEC-NEXT: ld 6, -48(1) +; NOP8VEC-NEXT: cmpd 6, 5 +; NOP8VEC-NEXT: isel 3, 4, 3, 1 +; NOP8VEC-NEXT: std 3, -16(1) +; NOP8VEC-NEXT: addi 3, 1, -16 +; NOP8VEC-NEXT: lxvd2x 0, 0, 3 +; NOP8VEC-NEXT: xxswapd 36, 0 +; NOP8VEC-NEXT: xxsel 34, 35, 34, 36 +; NOP8VEC-NEXT: blr +entry: + %0 = icmp sgt <2 x i64> %a, %b + %1 = select <2 x i1> %0, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %1 +} + +define <4 x float> @getsmaxf32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: getsmaxf32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxsp 34, 34, 35 +; CHECK-NEXT: blr +; +; NOP8VEC-LABEL: getsmaxf32: +; NOP8VEC: # %bb.0: # %entry +; NOP8VEC-NEXT: xvmaxsp 34, 34, 35 +; NOP8VEC-NEXT: blr +entry: + %0 = fcmp fast oge <4 x float> %a, %b + %1 = select <4 x i1> %0, <4 x float> %a, <4 x float> %b + ret <4 x float> %1 +} + +define <2 x double> @getsmaxf64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: getsmaxf64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxdp 34, 34, 35 +; CHECK-NEXT: blr +; +; NOP8VEC-LABEL: getsmaxf64: +; NOP8VEC: # %bb.0: # %entry +; NOP8VEC-NEXT: xvmaxdp 34, 34, 35 +; NOP8VEC-NEXT: blr +entry: + %0 = fcmp fast oge <2 x double> %a, %b + %1 = select <2 x i1> %0, <2 x double> %a, <2 x double> %b + ret <2 x double> %1 +} + +define <16 x i8> @getsmini8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: getsmini8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vminsb 2, 2, 3 +; CHECK-NEXT: blr +; +; NOP8VEC-LABEL: getsmini8: +; NOP8VEC: # %bb.0: # %entry +; NOP8VEC-NEXT: vminsb 2, 2, 3 +; NOP8VEC-NEXT: blr +entry: + %0 = icmp slt <16 x i8> %a, %b + %1 = select <16 x i1> %0, <16 x i8> %a, <16 x i8> %b + ret <16 x i8> %1 +} + +define <8 x i16> @getsmini16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: getsmini16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vminsh 2, 2, 3 +; CHECK-NEXT: blr +; +; NOP8VEC-LABEL: getsmini16: +; NOP8VEC: # %bb.0: # %entry +; NOP8VEC-NEXT: vminsh 2, 2, 3 +; NOP8VEC-NEXT: blr +entry: + %0 = icmp slt <8 x i16> %a, %b + %1 = select <8 x i1> %0, <8 x i16> %a, <8 x i16> %b + ret <8 x i16> %1 +} + +define <4 x i32> @getsmini32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: getsmini32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vminsw 2, 2, 3 +; CHECK-NEXT: blr +; +; NOP8VEC-LABEL: getsmini32: +; NOP8VEC: # %bb.0: # %entry +; NOP8VEC-NEXT: vminsw 2, 2, 3 +; NOP8VEC-NEXT: blr +entry: + %0 = icmp slt <4 x i32> %a, %b + %1 = select <4 x i1> %0, <4 x i32> %a, <4 x i32> %b + ret <4 x i32> %1 +} + +define <2 x i64> @getsmini64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: getsmini64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vminsd 2, 2, 3 +; CHECK-NEXT: blr +; +; NOP8VEC-LABEL: getsmini64: +; NOP8VEC: # %bb.0: # %entry +; NOP8VEC-NEXT: xxswapd 0, 35 +; NOP8VEC-NEXT: addi 3, 1, -32 +; NOP8VEC-NEXT: addi 4, 1, -48 +; NOP8VEC-NEXT: xxswapd 1, 34 +; NOP8VEC-NEXT: stxvd2x 0, 0, 3 +; NOP8VEC-NEXT: stxvd2x 1, 0, 4 +; NOP8VEC-NEXT: ld 3, -24(1) +; NOP8VEC-NEXT: ld 4, -40(1) +; NOP8VEC-NEXT: cmpd 4, 3 +; NOP8VEC-NEXT: li 3, 0 +; NOP8VEC-NEXT: li 4, -1 +; NOP8VEC-NEXT: isel 5, 4, 3, 0 +; NOP8VEC-NEXT: std 5, -8(1) +; NOP8VEC-NEXT: ld 5, -32(1) +; NOP8VEC-NEXT: ld 6, -48(1) +; NOP8VEC-NEXT: cmpd 6, 5 +; NOP8VEC-NEXT: isel 3, 4, 3, 0 +; NOP8VEC-NEXT: std 3, -16(1) +; NOP8VEC-NEXT: addi 3, 1, -16 +; NOP8VEC-NEXT: lxvd2x 0, 0, 3 +; NOP8VEC-NEXT: xxswapd 36, 0 +; NOP8VEC-NEXT: xxsel 34, 35, 34, 36 +; NOP8VEC-NEXT: blr +entry: + %0 = icmp slt <2 x i64> %a, %b + %1 = select <2 x i1> %0, <2 x i64> %a, <2 x i64> %b + ret <2 x i64> %1 +} + +define <4 x float> @getsminf32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: getsminf32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvminsp 34, 34, 35 +; CHECK-NEXT: blr +; +; NOP8VEC-LABEL: getsminf32: +; NOP8VEC: # %bb.0: # %entry +; NOP8VEC-NEXT: xvminsp 34, 34, 35 +; NOP8VEC-NEXT: blr +entry: + %0 = fcmp fast ole <4 x float> %a, %b + %1 = select <4 x i1> %0, <4 x float> %a, <4 x float> %b + ret <4 x float> %1 +} + +define <2 x double> @getsminf64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: getsminf64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmindp 34, 34, 35 +; CHECK-NEXT: blr +; +; NOP8VEC-LABEL: getsminf64: +; NOP8VEC: # %bb.0: # %entry +; NOP8VEC-NEXT: xvmindp 34, 34, 35 +; NOP8VEC-NEXT: blr +entry: + %0 = fcmp fast ole <2 x double> %a, %b + %1 = select <2 x i1> %0, <2 x double> %a, <2 x double> %b + ret <2 x double> %1 +} + Index: llvm/trunk/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ llvm/trunk/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -662,7 +662,6 @@ ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr -; %rem = call <4 x double> @llvm.experimental.constrained.frem.v4f64( <4 x double> , @@ -1495,7 +1494,6 @@ ; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: xvsqrtdp 35, 0 ; PC64LE9-NEXT: blr -; entry: %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64( <4 x double> @llvm.experimental.constrained.rint.v3f32( <3 x float> , @@ -5709,62 +5706,26 @@ define <2 x double> @constrained_vector_maxnum_v2f64() { ; PC64LE-LABEL: constrained_vector_maxnum_v2f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -64(1) -; PC64LE-NEXT: .cfi_def_cfa_offset 64 -; PC64LE-NEXT: .cfi_offset lr, 16 ; PC64LE-NEXT: addis 3, 2, .LCPI86_0@toc@ha ; PC64LE-NEXT: addis 4, 2, .LCPI86_1@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI86_0@toc@l(3) -; PC64LE-NEXT: lfs 2, .LCPI86_1@toc@l(4) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI86_3@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI86_2@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI86_3@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI86_2@toc@l(3) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 1, 0 -; PC64LE-NEXT: addi 1, 1, 64 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addi 3, 3, .LCPI86_0@toc@l +; PC64LE-NEXT: addi 4, 4, .LCPI86_1@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: lxvd2x 1, 0, 4 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 1, 1 +; PC64LE-NEXT: xvmaxdp 34, 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_maxnum_v2f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -48(1) -; PC64LE9-NEXT: .cfi_def_cfa_offset 48 -; PC64LE9-NEXT: .cfi_offset lr, 16 ; PC64LE9-NEXT: addis 3, 2, .LCPI86_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI86_0@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI86_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI86_1@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI86_1@toc@l(3) -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI86_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI86_2@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI86_3@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI86_3@toc@l(3) -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 1, 0 -; PC64LE9-NEXT: addi 1, 1, 48 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI86_1@toc@l +; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: xvmaxdp 34, 1, 0 ; PC64LE9-NEXT: blr entry: %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64( @@ -5893,43 +5854,29 @@ ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: .cfi_def_cfa_offset 80 +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: .cfi_def_cfa_offset 32 ; PC64LE-NEXT: .cfi_offset lr, 16 -; PC64LE-NEXT: .cfi_offset v31, -16 -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: addis 4, 2, .LCPI88_1@toc@ha -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI88_0@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI88_1@toc@l(4) +; PC64LE-NEXT: addis 4, 2, .LCPI88_1@toc@ha ; PC64LE-NEXT: lfs 1, .LCPI88_0@toc@l(3) +; PC64LE-NEXT: lfs 2, .LCPI88_1@toc@l(4) ; PC64LE-NEXT: bl fmax ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI88_3@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI88_2@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI88_3@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI88_2@toc@l(3) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: addis 4, 2, .LCPI88_5@toc@ha -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI88_4@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI88_5@toc@l(4) -; PC64LE-NEXT: xxmrghd 63, 1, 0 -; PC64LE-NEXT: lfs 1, .LCPI88_4@toc@l(3) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: addis 4, 2, .LCPI88_3@toc@ha ; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 -; PC64LE-NEXT: xxlor 2, 63, 63 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 3, 3, .LCPI88_2@toc@l +; PC64LE-NEXT: addi 4, 4, .LCPI88_3@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: lxvd2x 2, 0, 4 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 2, 2 +; PC64LE-NEXT: xvmaxdp 2, 2, 0 +; PC64LE-NEXT: xxswapd 0, 2 +; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE-NEXT: fmr 1, 0 +; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -5938,39 +5885,27 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) -; PC64LE9-NEXT: .cfi_def_cfa_offset 64 +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: .cfi_def_cfa_offset 32 ; PC64LE9-NEXT: .cfi_offset lr, 16 -; PC64LE9-NEXT: .cfi_offset v31, -16 ; PC64LE9-NEXT: addis 3, 2, .LCPI88_0@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI88_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI88_1@toc@ha ; PC64LE9-NEXT: lfs 2, .LCPI88_1@toc@l(3) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl fmax ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI88_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI88_2@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI88_2@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI88_3@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI88_3@toc@l(3) -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addis 3, 2, .LCPI88_4@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 -; PC64LE9-NEXT: lfs 1, .LCPI88_4@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI88_5@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI88_5@toc@l(3) -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 3, 3, .LCPI88_3@toc@l ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 -; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: xvmaxdp 2, 1, 0 +; PC64LE9-NEXT: xxswapd 1, 2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -5986,108 +5921,42 @@ define <4 x double> @constrained_vector_maxnum_v4f64() { ; PC64LE-LABEL: constrained_vector_maxnum_v4f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: .cfi_def_cfa_offset 80 -; PC64LE-NEXT: .cfi_offset lr, 16 -; PC64LE-NEXT: .cfi_offset v31, -16 -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: addis 4, 2, .LCPI89_1@toc@ha -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI89_0@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI89_1@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI89_0@toc@l(3) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI89_3@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI89_2@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI89_3@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI89_2@toc@l(3) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: addis 4, 2, .LCPI89_5@toc@ha -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI89_4@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI89_5@toc@l(4) -; PC64LE-NEXT: xxmrghd 63, 1, 0 -; PC64LE-NEXT: lfs 1, .LCPI89_4@toc@l(3) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI89_7@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI89_6@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI89_7@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI89_6@toc@l(3) -; PC64LE-NEXT: bl fmax -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: vmr 2, 31 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 35, 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addis 4, 2, .LCPI89_1@toc@ha +; PC64LE-NEXT: addis 5, 2, .LCPI89_2@toc@ha +; PC64LE-NEXT: addis 6, 2, .LCPI89_3@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI89_0@toc@l +; PC64LE-NEXT: addi 4, 4, .LCPI89_1@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: lxvd2x 1, 0, 4 +; PC64LE-NEXT: addi 3, 5, .LCPI89_2@toc@l +; PC64LE-NEXT: addi 4, 6, .LCPI89_3@toc@l +; PC64LE-NEXT: lxvd2x 2, 0, 3 +; PC64LE-NEXT: lxvd2x 3, 0, 4 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 1, 1 +; PC64LE-NEXT: xxswapd 2, 2 +; PC64LE-NEXT: xxswapd 3, 3 +; PC64LE-NEXT: xvmaxdp 34, 1, 0 +; PC64LE-NEXT: xvmaxdp 35, 3, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_maxnum_v4f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) -; PC64LE9-NEXT: .cfi_def_cfa_offset 64 -; PC64LE9-NEXT: .cfi_offset lr, 16 -; PC64LE9-NEXT: .cfi_offset v31, -16 ; PC64LE9-NEXT: addis 3, 2, .LCPI89_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI89_0@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI89_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI89_1@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI89_1@toc@l(3) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 3, 3, .LCPI89_1@toc@l +; PC64LE9-NEXT: lxvx 1, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI89_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI89_2@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI89_2@toc@l +; PC64LE9-NEXT: xvmaxdp 34, 1, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI89_3@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI89_3@toc@l(3) -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addis 3, 2, .LCPI89_4@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 -; PC64LE9-NEXT: lfs 1, .LCPI89_4@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI89_5@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI89_5@toc@l(3) -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI89_6@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI89_6@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI89_7@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI89_7@toc@l(3) -; PC64LE9-NEXT: bl fmax -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: vmr 2, 31 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 35, 1, 0 -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI89_3@toc@l +; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: xvmaxdp 35, 1, 0 ; PC64LE9-NEXT: blr entry: %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64( @@ -6140,7 +6009,6 @@ ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr -; entry: %min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32( <1 x float> , <1 x float> , @@ -6152,62 +6020,26 @@ define <2 x double> @constrained_vector_minnum_v2f64() { ; PC64LE-LABEL: constrained_vector_minnum_v2f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -64(1) -; PC64LE-NEXT: .cfi_def_cfa_offset 64 -; PC64LE-NEXT: .cfi_offset lr, 16 ; PC64LE-NEXT: addis 3, 2, .LCPI91_0@toc@ha ; PC64LE-NEXT: addis 4, 2, .LCPI91_1@toc@ha -; PC64LE-NEXT: lfs 1, .LCPI91_0@toc@l(3) -; PC64LE-NEXT: lfs 2, .LCPI91_1@toc@l(4) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI91_3@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI91_2@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI91_3@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI91_2@toc@l(3) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 34, 1, 0 -; PC64LE-NEXT: addi 1, 1, 64 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addi 3, 3, .LCPI91_0@toc@l +; PC64LE-NEXT: addi 4, 4, .LCPI91_1@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: lxvd2x 1, 0, 4 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 1, 1 +; PC64LE-NEXT: xvmindp 34, 1, 0 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_minnum_v2f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -48(1) -; PC64LE9-NEXT: .cfi_def_cfa_offset 48 -; PC64LE9-NEXT: .cfi_offset lr, 16 ; PC64LE9-NEXT: addis 3, 2, .LCPI91_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI91_0@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI91_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI91_1@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI91_1@toc@l(3) -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI91_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI91_2@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI91_3@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI91_3@toc@l(3) -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 34, 1, 0 -; PC64LE9-NEXT: addi 1, 1, 48 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI91_1@toc@l +; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: xvmindp 34, 1, 0 ; PC64LE9-NEXT: blr entry: %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64( @@ -6336,43 +6168,29 @@ ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: .cfi_def_cfa_offset 80 +; PC64LE-NEXT: stdu 1, -32(1) +; PC64LE-NEXT: .cfi_def_cfa_offset 32 ; PC64LE-NEXT: .cfi_offset lr, 16 -; PC64LE-NEXT: .cfi_offset v31, -16 -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: addis 4, 2, .LCPI93_1@toc@ha -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI93_0@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI93_1@toc@l(4) +; PC64LE-NEXT: addis 4, 2, .LCPI93_1@toc@ha ; PC64LE-NEXT: lfs 1, .LCPI93_0@toc@l(3) +; PC64LE-NEXT: lfs 2, .LCPI93_1@toc@l(4) ; PC64LE-NEXT: bl fmin ; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI93_3@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI93_2@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI93_3@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI93_2@toc@l(3) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: addis 4, 2, .LCPI93_5@toc@ha -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI93_4@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI93_5@toc@l(4) -; PC64LE-NEXT: xxmrghd 63, 1, 0 -; PC64LE-NEXT: lfs 1, .LCPI93_4@toc@l(3) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: addis 4, 2, .LCPI93_3@toc@ha ; PC64LE-NEXT: fmr 3, 1 -; PC64LE-NEXT: xxlor 1, 63, 63 -; PC64LE-NEXT: xxlor 2, 63, 63 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addi 1, 1, 80 +; PC64LE-NEXT: addi 3, 3, .LCPI93_2@toc@l +; PC64LE-NEXT: addi 4, 4, .LCPI93_3@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: lxvd2x 2, 0, 4 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 2, 2 +; PC64LE-NEXT: xvmindp 2, 2, 0 +; PC64LE-NEXT: xxswapd 0, 2 +; PC64LE-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE-NEXT: fmr 1, 0 +; PC64LE-NEXT: addi 1, 1, 32 ; PC64LE-NEXT: ld 0, 16(1) ; PC64LE-NEXT: mtlr 0 ; PC64LE-NEXT: blr @@ -6381,39 +6199,27 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) -; PC64LE9-NEXT: .cfi_def_cfa_offset 64 +; PC64LE9-NEXT: stdu 1, -32(1) +; PC64LE9-NEXT: .cfi_def_cfa_offset 32 ; PC64LE9-NEXT: .cfi_offset lr, 16 -; PC64LE9-NEXT: .cfi_offset v31, -16 ; PC64LE9-NEXT: addis 3, 2, .LCPI93_0@toc@ha ; PC64LE9-NEXT: lfs 1, .LCPI93_0@toc@l(3) ; PC64LE9-NEXT: addis 3, 2, .LCPI93_1@toc@ha ; PC64LE9-NEXT: lfs 2, .LCPI93_1@toc@l(3) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl fmin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: addis 3, 2, .LCPI93_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI93_2@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI93_2@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI93_3@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI93_3@toc@l(3) -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addis 3, 2, .LCPI93_4@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 -; PC64LE9-NEXT: lfs 1, .LCPI93_4@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI93_5@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI93_5@toc@l(3) -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 3, 3, .LCPI93_3@toc@l ; PC64LE9-NEXT: fmr 3, 1 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 -; PC64LE9-NEXT: xscpsgndp 2, 63, 63 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addi 1, 1, 64 +; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: xvmindp 2, 1, 0 +; PC64LE9-NEXT: xxswapd 1, 2 +; PC64LE9-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; PC64LE9-NEXT: # kill: def $f2 killed $f2 killed $vsl2 +; PC64LE9-NEXT: addi 1, 1, 32 ; PC64LE9-NEXT: ld 0, 16(1) ; PC64LE9-NEXT: mtlr 0 ; PC64LE9-NEXT: blr @@ -6429,108 +6235,42 @@ define <4 x double> @constrained_vector_minnum_v4f64() { ; PC64LE-LABEL: constrained_vector_minnum_v4f64: ; PC64LE: # %bb.0: # %entry -; PC64LE-NEXT: mflr 0 -; PC64LE-NEXT: std 0, 16(1) -; PC64LE-NEXT: stdu 1, -80(1) -; PC64LE-NEXT: .cfi_def_cfa_offset 80 -; PC64LE-NEXT: .cfi_offset lr, 16 -; PC64LE-NEXT: .cfi_offset v31, -16 -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: addis 4, 2, .LCPI94_1@toc@ha -; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: addis 3, 2, .LCPI94_0@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI94_1@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI94_0@toc@l(3) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI94_3@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI94_2@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI94_3@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI94_2@toc@l(3) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: addis 4, 2, .LCPI94_5@toc@ha -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: addis 3, 2, .LCPI94_4@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI94_5@toc@l(4) -; PC64LE-NEXT: xxmrghd 63, 1, 0 -; PC64LE-NEXT: lfs 1, .LCPI94_4@toc@l(3) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: addis 4, 2, .LCPI94_7@toc@ha -; PC64LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: addis 3, 2, .LCPI94_6@toc@ha -; PC64LE-NEXT: lfs 2, .LCPI94_7@toc@l(4) -; PC64LE-NEXT: lfs 1, .LCPI94_6@toc@l(3) -; PC64LE-NEXT: bl fmin -; PC64LE-NEXT: nop -; PC64LE-NEXT: li 3, 48 -; PC64LE-NEXT: vmr 2, 31 -; PC64LE-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: li 3, 64 -; PC64LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload -; PC64LE-NEXT: xxmrghd 35, 1, 0 -; PC64LE-NEXT: addi 1, 1, 80 -; PC64LE-NEXT: ld 0, 16(1) -; PC64LE-NEXT: mtlr 0 +; PC64LE-NEXT: addis 4, 2, .LCPI94_1@toc@ha +; PC64LE-NEXT: addis 5, 2, .LCPI94_2@toc@ha +; PC64LE-NEXT: addis 6, 2, .LCPI94_3@toc@ha +; PC64LE-NEXT: addi 3, 3, .LCPI94_0@toc@l +; PC64LE-NEXT: addi 4, 4, .LCPI94_1@toc@l +; PC64LE-NEXT: lxvd2x 0, 0, 3 +; PC64LE-NEXT: lxvd2x 1, 0, 4 +; PC64LE-NEXT: addi 3, 5, .LCPI94_2@toc@l +; PC64LE-NEXT: addi 4, 6, .LCPI94_3@toc@l +; PC64LE-NEXT: lxvd2x 2, 0, 3 +; PC64LE-NEXT: lxvd2x 3, 0, 4 +; PC64LE-NEXT: xxswapd 0, 0 +; PC64LE-NEXT: xxswapd 1, 1 +; PC64LE-NEXT: xxswapd 2, 2 +; PC64LE-NEXT: xxswapd 3, 3 +; PC64LE-NEXT: xvmindp 34, 1, 0 +; PC64LE-NEXT: xvmindp 35, 3, 2 ; PC64LE-NEXT: blr ; ; PC64LE9-LABEL: constrained_vector_minnum_v4f64: ; PC64LE9: # %bb.0: # %entry -; PC64LE9-NEXT: mflr 0 -; PC64LE9-NEXT: std 0, 16(1) -; PC64LE9-NEXT: stdu 1, -64(1) -; PC64LE9-NEXT: .cfi_def_cfa_offset 64 -; PC64LE9-NEXT: .cfi_offset lr, 16 -; PC64LE9-NEXT: .cfi_offset v31, -16 ; PC64LE9-NEXT: addis 3, 2, .LCPI94_0@toc@ha -; PC64LE9-NEXT: lfs 1, .LCPI94_0@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI94_0@toc@l +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI94_1@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI94_1@toc@l(3) -; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop +; PC64LE9-NEXT: addi 3, 3, .LCPI94_1@toc@l +; PC64LE9-NEXT: lxvx 1, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI94_2@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI94_2@toc@l(3) +; PC64LE9-NEXT: addi 3, 3, .LCPI94_2@toc@l +; PC64LE9-NEXT: xvmindp 34, 1, 0 +; PC64LE9-NEXT: lxvx 0, 0, 3 ; PC64LE9-NEXT: addis 3, 2, .LCPI94_3@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI94_3@toc@l(3) -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: addis 3, 2, .LCPI94_4@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 63, 1, 0 -; PC64LE9-NEXT: lfs 1, .LCPI94_4@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI94_5@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI94_5@toc@l(3) -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: addis 3, 2, .LCPI94_6@toc@ha -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: stxv 1, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: lfs 1, .LCPI94_6@toc@l(3) -; PC64LE9-NEXT: addis 3, 2, .LCPI94_7@toc@ha -; PC64LE9-NEXT: lfs 2, .LCPI94_7@toc@l(3) -; PC64LE9-NEXT: bl fmin -; PC64LE9-NEXT: nop -; PC64LE9-NEXT: lxv 0, 32(1) # 16-byte Folded Reload -; PC64LE9-NEXT: vmr 2, 31 -; PC64LE9-NEXT: lxv 63, 48(1) # 16-byte Folded Reload -; PC64LE9-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; PC64LE9-NEXT: xxmrghd 35, 1, 0 -; PC64LE9-NEXT: addi 1, 1, 64 -; PC64LE9-NEXT: ld 0, 16(1) -; PC64LE9-NEXT: mtlr 0 +; PC64LE9-NEXT: addi 3, 3, .LCPI94_3@toc@l +; PC64LE9-NEXT: lxvx 1, 0, 3 +; PC64LE9-NEXT: xvmindp 35, 1, 0 ; PC64LE9-NEXT: blr entry: %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(