diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -218,9 +218,16 @@ // Now process the remaining operands. for (unsigned i = 1; i < NumOpers; ++i) { SDValue Oper = N->getOperand(i); + EVT OpVT = Oper.getValueType(); - if (Oper.getValueType().isVector()) + // The result needs scalarizing, but it's not a given that the source does. + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) Oper = GetScalarizedVector(Oper); + else if (OpVT.isVector()) { + EVT VT = OpVT.getVectorElementType(); + Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, + Oper, DAG.getVectorIdxConstant(0, dl)); + } Opers[i] = Oper; } diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll @@ -5,6 +5,9 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux \ ; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr | FileCheck \ ; RUN: --check-prefix=P9 %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mattr=-vsx | \ +; RUN: FileCheck --check-prefix=NOVSX %s ; FIXME: Constrained fpext would fail if VSX feature disabled. Add no-vsx @@ -49,6 +52,11 @@ ; P9: # %bb.0: ; P9-NEXT: frip f1, f1 ; P9-NEXT: blr +; +; NOVSX-LABEL: ceil_f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: frip f1, f1 +; NOVSX-NEXT: blr %res = call float @llvm.experimental.constrained.ceil.f32( float %f1, metadata !"fpexcept.strict") @@ -65,6 +73,11 @@ ; P9: # %bb.0: ; P9-NEXT: xsrdpip f1, f1 ; P9-NEXT: blr +; +; NOVSX-LABEL: ceil_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: frip f1, f1 +; NOVSX-NEXT: blr %res = call double @llvm.experimental.constrained.ceil.f64( double %f1, metadata !"fpexcept.strict") @@ -81,6 +94,26 @@ ; P9: # %bb.0: ; P9-NEXT: xvrspip v2, v2 ; P9-NEXT: blr +; +; NOVSX-LABEL: ceil_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: addi r3, r1, -32 +; NOVSX-NEXT: stvx v2, 0, r3 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: lfs f0, -20(r1) +; NOVSX-NEXT: frip f0, f0 +; NOVSX-NEXT: stfs f0, -4(r1) +; NOVSX-NEXT: lfs f0, -24(r1) +; NOVSX-NEXT: frip f0, f0 +; NOVSX-NEXT: stfs f0, -8(r1) +; NOVSX-NEXT: lfs f0, -28(r1) +; NOVSX-NEXT: frip f0, f0 +; NOVSX-NEXT: stfs f0, -12(r1) +; NOVSX-NEXT: lfs f0, -32(r1) +; NOVSX-NEXT: frip f0, f0 +; NOVSX-NEXT: stfs f0, -16(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.ceil.v4f32( <4 x float> %vf1, metadata !"fpexcept.strict") @@ -97,6 +130,12 @@ ; P9: # %bb.0: ; P9-NEXT: xvrdpip v2, v2 ; P9-NEXT: blr +; +; NOVSX-LABEL: ceil_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: frip f2, f2 +; NOVSX-NEXT: frip f1, f1 +; NOVSX-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.ceil.v2f64( <2 x double> %vf1, metadata !"fpexcept.strict") @@ -113,6 +152,11 @@ ; P9: # %bb.0: ; P9-NEXT: frim f1, f1 ; P9-NEXT: blr +; +; NOVSX-LABEL: floor_f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: frim f1, f1 +; NOVSX-NEXT: blr %res = call float @llvm.experimental.constrained.floor.f32( float %f1, metadata !"fpexcept.strict") @@ -129,6 +173,11 @@ ; P9: # %bb.0: ; P9-NEXT: xsrdpim f1, f1 ; P9-NEXT: blr +; +; NOVSX-LABEL: floor_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: frim f1, f1 +; NOVSX-NEXT: blr %res = call double @llvm.experimental.constrained.floor.f64( double %f1, metadata !"fpexcept.strict") @@ -145,6 +194,26 @@ ; P9: # %bb.0: ; P9-NEXT: xvrspim v2, v2 ; P9-NEXT: blr +; +; NOVSX-LABEL: floor_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: addi r3, r1, -32 +; NOVSX-NEXT: stvx v2, 0, r3 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: lfs f0, -20(r1) +; NOVSX-NEXT: frim f0, f0 +; NOVSX-NEXT: stfs f0, -4(r1) +; NOVSX-NEXT: lfs f0, -24(r1) +; NOVSX-NEXT: frim f0, f0 +; NOVSX-NEXT: stfs f0, -8(r1) +; NOVSX-NEXT: lfs f0, -28(r1) +; NOVSX-NEXT: frim f0, f0 +; NOVSX-NEXT: stfs f0, -12(r1) +; NOVSX-NEXT: lfs f0, -32(r1) +; NOVSX-NEXT: frim f0, f0 +; NOVSX-NEXT: stfs f0, -16(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.floor.v4f32( <4 x float> %vf1, metadata !"fpexcept.strict") @@ -161,6 +230,12 @@ ; P9: # %bb.0: ; P9-NEXT: xvrdpim v2, v2 ; P9-NEXT: blr +; +; NOVSX-LABEL: floor_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: frim f2, f2 +; NOVSX-NEXT: frim f1, f1 +; NOVSX-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.floor.v2f64( <2 x double> %vf1, metadata !"fpexcept.strict") @@ -177,6 +252,20 @@ ; P9: # %bb.0: ; P9-NEXT: xsrdpic f1, f1 ; P9-NEXT: blr +; +; NOVSX-LABEL: nearbyint_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -112(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 112 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl nearbyint +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 112 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr %res = call double @llvm.experimental.constrained.nearbyint.f64( double %f1, metadata !"round.dynamic", @@ -194,6 +283,38 @@ ; P9: # %bb.0: ; P9-NEXT: xvrspic v2, v2 ; P9-NEXT: blr +; +; NOVSX-LABEL: nearbyint_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -144(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 144 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: addi r3, r1, 112 +; NOVSX-NEXT: stvx v2, 0, r3 +; NOVSX-NEXT: lfs f1, 124(r1) +; NOVSX-NEXT: bl nearbyintf +; NOVSX-NEXT: nop +; NOVSX-NEXT: stfs f1, 140(r1) +; NOVSX-NEXT: lfs f1, 120(r1) +; NOVSX-NEXT: bl nearbyintf +; NOVSX-NEXT: nop +; NOVSX-NEXT: stfs f1, 136(r1) +; NOVSX-NEXT: lfs f1, 116(r1) +; NOVSX-NEXT: bl nearbyintf +; NOVSX-NEXT: nop +; NOVSX-NEXT: stfs f1, 132(r1) +; NOVSX-NEXT: lfs f1, 112(r1) +; NOVSX-NEXT: bl nearbyintf +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r3, r1, 128 +; NOVSX-NEXT: stfs f1, 128(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: addi r1, r1, 144 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32( <4 x float> %vf1, metadata !"round.dynamic", @@ -211,6 +332,33 @@ ; P9: # %bb.0: ; P9-NEXT: xvrdpic v2, v2 ; P9-NEXT: blr +; +; NOVSX-LABEL: nearbyint_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -128(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 128 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: .cfi_offset f30, -16 +; NOVSX-NEXT: .cfi_offset f31, -8 +; NOVSX-NEXT: stfd f31, 120(r1) # 8-byte Folded Spill +; NOVSX-NEXT: fmr f31, f1 +; NOVSX-NEXT: fmr f1, f2 +; NOVSX-NEXT: stfd f30, 112(r1) # 8-byte Folded Spill +; NOVSX-NEXT: bl nearbyint +; NOVSX-NEXT: nop +; NOVSX-NEXT: fmr f30, f1 +; NOVSX-NEXT: fmr f1, f31 +; NOVSX-NEXT: bl nearbyint +; NOVSX-NEXT: nop +; NOVSX-NEXT: fmr f2, f30 +; NOVSX-NEXT: lfd f31, 120(r1) # 8-byte Folded Reload +; NOVSX-NEXT: lfd f30, 112(r1) # 8-byte Folded Reload +; NOVSX-NEXT: addi r1, r1, 128 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( <2 x double> %vf1, metadata !"round.dynamic", @@ -245,6 +393,16 @@ ; P9-NEXT: xxmrghd v3, vs1, vs2 ; P9-NEXT: xxlor v2, vs0, vs0 ; P9-NEXT: blr +; +; NOVSX-LABEL: fpext_v4f64_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: stvx v2, 0, r3 +; NOVSX-NEXT: lfs f1, -16(r1) +; NOVSX-NEXT: lfs f2, -12(r1) +; NOVSX-NEXT: lfs f3, -8(r1) +; NOVSX-NEXT: lfs f4, -4(r1) +; NOVSX-NEXT: blr %res = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32( <4 x float> %vf1, metadata !"fpexcept.strict") @@ -268,6 +426,14 @@ ; P9-NEXT: xscvspdpn f1, vs1 ; P9-NEXT: xxmrghd v2, vs1, vs0 ; P9-NEXT: blr +; +; NOVSX-LABEL: fpext_v2f64_v2f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: stvx v2, 0, r3 +; NOVSX-NEXT: lfs f1, -16(r1) +; NOVSX-NEXT: lfs f2, -12(r1) +; NOVSX-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32( <2 x float> %vf1, metadata !"fpexcept.strict") @@ -284,6 +450,11 @@ ; P9: # %bb.0: ; P9-NEXT: xsrsp f1, f1 ; P9-NEXT: blr +; +; NOVSX-LABEL: fptrunc_f32_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: frsp f1, f1 +; NOVSX-NEXT: blr %res = call float @llvm.experimental.constrained.fptrunc.f32.f64( double %f1, metadata !"round.dynamic", @@ -309,6 +480,20 @@ ; P9-NEXT: xvcvdpsp v2, vs0 ; P9-NEXT: vmrgew v2, v2, v4 ; P9-NEXT: blr +; +; NOVSX-LABEL: fptrunc_v4f32_v4f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: frsp f0, f4 +; NOVSX-NEXT: frsp f3, f3 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: frsp f2, f2 +; NOVSX-NEXT: frsp f1, f1 +; NOVSX-NEXT: stfs f0, -4(r1) +; NOVSX-NEXT: stfs f3, -8(r1) +; NOVSX-NEXT: stfs f2, -12(r1) +; NOVSX-NEXT: stfs f1, -16(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64( <4 x double> %vf1, metadata !"round.dynamic", @@ -338,6 +523,19 @@ ; P9-NEXT: xxsldwi v2, vs0, vs0, 1 ; P9-NEXT: vmrglw v2, v3, v2 ; P9-NEXT: blr +; +; NOVSX-LABEL: fptrunc_v2f32_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: frsp f0, f2 +; NOVSX-NEXT: frsp f1, f1 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: addi r4, r1, -32 +; NOVSX-NEXT: stfs f0, -16(r1) +; NOVSX-NEXT: stfs f1, -32(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: lvx v3, 0, r4 +; NOVSX-NEXT: vmrghw v2, v3, v2 +; NOVSX-NEXT: blr %res = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64( <2 x double> %vf1, metadata !"round.dynamic", @@ -355,6 +553,11 @@ ; P9: # %bb.0: ; P9-NEXT: frin f1, f1 ; P9-NEXT: blr +; +; NOVSX-LABEL: round_f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: frin f1, f1 +; NOVSX-NEXT: blr %res = call float @llvm.experimental.constrained.round.f32( float %f1, metadata !"fpexcept.strict") @@ -371,6 +574,11 @@ ; P9: # %bb.0: ; P9-NEXT: xsrdpi f1, f1 ; P9-NEXT: blr +; +; NOVSX-LABEL: round_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: frin f1, f1 +; NOVSX-NEXT: blr %res = call double @llvm.experimental.constrained.round.f64( double %f1, metadata !"fpexcept.strict") @@ -387,6 +595,26 @@ ; P9: # %bb.0: ; P9-NEXT: xvrspi v2, v2 ; P9-NEXT: blr +; +; NOVSX-LABEL: round_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: addi r3, r1, -32 +; NOVSX-NEXT: stvx v2, 0, r3 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: lfs f0, -20(r1) +; NOVSX-NEXT: frin f0, f0 +; NOVSX-NEXT: stfs f0, -4(r1) +; NOVSX-NEXT: lfs f0, -24(r1) +; NOVSX-NEXT: frin f0, f0 +; NOVSX-NEXT: stfs f0, -8(r1) +; NOVSX-NEXT: lfs f0, -28(r1) +; NOVSX-NEXT: frin f0, f0 +; NOVSX-NEXT: stfs f0, -12(r1) +; NOVSX-NEXT: lfs f0, -32(r1) +; NOVSX-NEXT: frin f0, f0 +; NOVSX-NEXT: stfs f0, -16(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.round.v4f32( <4 x float> %vf1, metadata !"fpexcept.strict") @@ -403,6 +631,12 @@ ; P9: # %bb.0: ; P9-NEXT: xvrdpi v2, v2 ; P9-NEXT: blr +; +; NOVSX-LABEL: round_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: frin f2, f2 +; NOVSX-NEXT: frin f1, f1 +; NOVSX-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.round.v2f64( <2 x double> %vf1, metadata !"fpexcept.strict") @@ -419,6 +653,11 @@ ; P9: # %bb.0: ; P9-NEXT: friz f1, f1 ; P9-NEXT: blr +; +; NOVSX-LABEL: trunc_f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: friz f1, f1 +; NOVSX-NEXT: blr %res = call float @llvm.experimental.constrained.trunc.f32( float %f1, metadata !"fpexcept.strict") @@ -435,6 +674,11 @@ ; P9: # %bb.0: ; P9-NEXT: xsrdpiz f1, f1 ; P9-NEXT: blr +; +; NOVSX-LABEL: trunc_f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: friz f1, f1 +; NOVSX-NEXT: blr %res = call double @llvm.experimental.constrained.trunc.f64( double %f1, metadata !"fpexcept.strict") @@ -451,6 +695,26 @@ ; P9: # %bb.0: ; P9-NEXT: xvrspiz v2, v2 ; P9-NEXT: blr +; +; NOVSX-LABEL: trunc_v4f32: +; NOVSX: # %bb.0: +; NOVSX-NEXT: addi r3, r1, -32 +; NOVSX-NEXT: stvx v2, 0, r3 +; NOVSX-NEXT: addi r3, r1, -16 +; NOVSX-NEXT: lfs f0, -20(r1) +; NOVSX-NEXT: friz f0, f0 +; NOVSX-NEXT: stfs f0, -4(r1) +; NOVSX-NEXT: lfs f0, -24(r1) +; NOVSX-NEXT: friz f0, f0 +; NOVSX-NEXT: stfs f0, -8(r1) +; NOVSX-NEXT: lfs f0, -28(r1) +; NOVSX-NEXT: friz f0, f0 +; NOVSX-NEXT: stfs f0, -12(r1) +; NOVSX-NEXT: lfs f0, -32(r1) +; NOVSX-NEXT: friz f0, f0 +; NOVSX-NEXT: stfs f0, -16(r1) +; NOVSX-NEXT: lvx v2, 0, r3 +; NOVSX-NEXT: blr %res = call <4 x float> @llvm.experimental.constrained.trunc.v4f32( <4 x float> %vf1, metadata !"fpexcept.strict") @@ -467,6 +731,12 @@ ; P9: # %bb.0: ; P9-NEXT: xvrdpiz v2, v2 ; P9-NEXT: blr +; +; NOVSX-LABEL: trunc_v2f64: +; NOVSX: # %bb.0: +; NOVSX-NEXT: friz f2, f2 +; NOVSX-NEXT: friz f1, f1 +; NOVSX-NEXT: blr %res = call <2 x double> @llvm.experimental.constrained.trunc.v2f64( <2 x double> %vf1, metadata !"fpexcept.strict")