Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1414,6 +1414,26 @@ default: break; + case Intrinsic::powi: + if (auto *RHSC = dyn_cast(Args[1])) { + bool ShouldOptForSize = I->getParent()->getParent()->hasOptSize(); + if (getTLI()->isBeneficialToExpandPowI(RHSC->getSExtValue(), + ShouldOptForSize)) { + // The cost is modeled on the expansion performed by ExpandPowI in + // SelectionDAGBuilder. + APInt Exponent = RHSC->getValue().abs(); + unsigned ActiveBits = Exponent.getActiveBits(); + unsigned PopCount = Exponent.countPopulation(); + InstructionCost Cost = (ActiveBits + PopCount - 2) * + thisT()->getArithmeticInstrCost( + Instruction::FMul, RetTy, CostKind); + if (RHSC->getSExtValue() < 0) + Cost += thisT()->getArithmeticInstrCost(Instruction::FDiv, RetTy, + CostKind); + return Cost; + } + } + break; case Intrinsic::cttz: // FIXME: If necessary, this should go in target-specific overrides. if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz()) Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -2196,6 +2196,18 @@ return false; } + /// Return true if it is beneficial to expand an @llvm.powi.* intrinsic. + /// If not optimizing for size, expanding @llvm.powi.* intrinsics is always + /// considered beneficial. + /// If optimizing for size, expansion is only considered beneficial for upto + /// 5 multiplies and a divide (if the exponent is negative). + bool isBeneficialToExpandPowI(int Exponent, bool OptForSize) const { + if (Exponent < 0) + Exponent = -Exponent; + return !OptForSize || + (countPopulation((unsigned int)Exponent) + Log2_32(Exponent) < 7); + } + //===--------------------------------------------------------------------===// // TargetLowering Configuration Methods - These methods should be invoked by // the derived class constructor to configure this object for the target. Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5346,38 +5346,36 @@ /// ExpandPowI - Expand a llvm.powi intrinsic. static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { - // If RHS is a constant, we can expand this out to a multiplication tree, - // otherwise we end up lowering to a call to __powidf2 (for example). When - // optimizing for size, we only want to do this if the expansion would produce - // a small number of multiplies, otherwise we do the full expansion. + // If RHS is a constant, we can expand this out to a multiplication tree if + // it's beneficial on the target, otherwise we end up lowering to a call to + // __powidf2 (for example). if (ConstantSDNode *RHSC = dyn_cast(RHS)) { - // Get the exponent as a positive value. unsigned Val = RHSC->getSExtValue(); - if ((int)Val < 0) Val = -Val; // powi(x, 0) -> 1.0 if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); - bool OptForSize = DAG.shouldOptForSize(); - if (!OptForSize || - // If optimizing for size, don't insert too many multiplies. - // This inserts up to 5 multiplies. - countPopulation(Val) + Log2_32(Val) < 7) { + if (DAG.getTargetLoweringInfo().isBeneficialToExpandPowI( + Val, DAG.shouldOptForSize())) { + // Get the exponent as a positive value. + if ((int)Val < 0) + Val = -Val; // We use the simple binary decomposition method to generate the multiply // sequence. There are more optimal ways to do this (for example, // powi(x,15) generates one more multiply than it should), but this has // the benefit of being both really simple and much better than a libcall. - SDValue Res; // Logically starts equal to 1.0 + SDValue Res; // Logically starts equal to 1.0 SDValue CurSquare = LHS; // TODO: Intrinsics should have fast-math-flags that propagate to these // nodes. while (Val) { if (Val & 1) { if (Res.getNode()) - Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); + Res = + DAG.getNode(ISD::FMUL, DL, Res.getValueType(), Res, CurSquare); else - Res = CurSquare; // 1.0*CurSquare. + Res = CurSquare; // 1.0*CurSquare. } CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), Index: llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -225,12 +225,12 @@ declare @llvm.experimental.vector.reverse.nxv4i1() declare @llvm.experimental.vector.reverse.nxv2i1() -define void @unsupported_fp_ops( %vec) { +define void @unsupported_fp_ops( %vec, i32 %extraarg) { ; CHECK-LABEL: 'unsupported_fp_ops' ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %sin = call @llvm.sin.nxv4f32( %vec) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %cos = call @llvm.cos.nxv4f32( %vec) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %pow = call @llvm.pow.nxv4f32( %vec, %vec) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 %extraarg) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %exp = call @llvm.exp.nxv4f32( %vec) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %exp2 = call @llvm.exp2.nxv4f32( %vec) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %log = call @llvm.log.nxv4f32( %vec) @@ -242,7 +242,7 @@ %sin = call @llvm.sin.nxv4f32( %vec) %cos = call @llvm.cos.nxv4f32( %vec) %pow = call @llvm.pow.nxv4f32( %vec, %vec) - %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) + %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 %extraarg) %exp = call @llvm.exp.nxv4f32( %vec) %exp2 = call @llvm.exp2.nxv4f32( %vec) %log = call @llvm.log.nxv4f32( %vec) @@ -251,6 +251,15 @@ ret void } +define void @powi( %vec) { +; CHECK-LABEL: 'powi' +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) + ret void +} + declare @llvm.sin.nxv4f32() declare @llvm.cos.nxv4f32() declare @llvm.pow.nxv4f32(, ) Index: llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll =================================================================== --- llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll +++ llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -passes='print' 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v | FileCheck %s -define void @unsupported_fp_ops( %vec) { +define void @unsupported_fp_ops( %vec, i32 %extraarg) { ; CHECK-LABEL: 'unsupported_fp_ops' ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %sin = call @llvm.sin.nxv4f32( %vec) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %cos = call @llvm.cos.nxv4f32( %vec) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %pow = call @llvm.pow.nxv4f32( %vec, %vec) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 %extraarg) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %exp = call @llvm.exp.nxv4f32( %vec) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %exp2 = call @llvm.exp2.nxv4f32( %vec) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %log = call @llvm.log.nxv4f32( %vec) @@ -20,7 +20,7 @@ %sin = call @llvm.sin.nxv4f32( %vec) %cos = call @llvm.cos.nxv4f32( %vec) %pow = call @llvm.pow.nxv4f32( %vec, %vec) - %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) + %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 %extraarg) %exp = call @llvm.exp.nxv4f32( %vec) %exp2 = call @llvm.exp2.nxv4f32( %vec) %log = call @llvm.log.nxv4f32( %vec) @@ -31,6 +31,15 @@ ret void } +define void @powi( %vec) { +; CHECK-LABEL: 'powi' +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %powi = call @llvm.powi.nxv4f32.i32( %vec, i32 42) + ret void +} + define void @fshr( %a, %b, %c) { ; CHECK-LABEL: 'fshr' ; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call @llvm.fshr.nxv1i32( %a, %b, %c) Index: llvm/test/Analysis/CostModel/X86/powi.ll =================================================================== --- llvm/test/Analysis/CostModel/X86/powi.ll +++ llvm/test/Analysis/CostModel/X86/powi.ll @@ -74,55 +74,55 @@ define i32 @powi_3() { ; SSE-LABEL: 'powi_3' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 3) -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 3) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 3) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 3) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 3) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 3) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 3) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 3) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 3) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 3) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 3) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; ; AVX1-LABEL: 'powi_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; ; AVX2-LABEL: 'powi_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; ; AVX512-LABEL: 'powi_3' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; %F32 = call float @llvm.powi.f32(float poison, i32 3) @@ -142,55 +142,55 @@ define i32 @powi_n3() { ; SSE-LABEL: 'powi_n3' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 -3) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 -3) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 -3) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 -3) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 -3) -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 -3) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 -3) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 -3) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 -3) -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 -3) +; SSE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 -3) +; SSE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 -3) +; SSE-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 -3) +; SSE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 -3) +; SSE-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 -3) +; SSE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 -3) +; SSE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 -3) +; SSE-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 -3) +; SSE-NEXT: Cost Model: Found an estimated cost of 292 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 -3) +; SSE-NEXT: Cost Model: Found an estimated cost of 584 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 -3) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; ; AVX1-LABEL: 'powi_n3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 -3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 -3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 -3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 -3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 -3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 -3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 -3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 -3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 -3) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 -3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 -3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 -3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 -3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 -3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 -3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 -3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 -3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 -3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 -3) +; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 -3) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; ; AVX2-LABEL: 'powi_n3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 -3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 -3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 -3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 -3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 -3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 -3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 -3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 -3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 -3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 -3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 -3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 -3) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 -3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 -3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 -3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 -3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 -3) +; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 -3) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; ; AVX512-LABEL: 'powi_n3' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 -3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 -3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 -3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 -3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 -3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 -3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 -3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 -3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 -3) -; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 -3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 -3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 -3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 -3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 -3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 -3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 -3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 -3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 -3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 -3) +; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 -3) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; %F32 = call float @llvm.powi.f32(float poison, i32 -3) @@ -210,25 +210,25 @@ define i32 @powi_6() { ; SSE-LABEL: 'powi_6' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 6) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 6) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 6) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 6) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 6) -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 6) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 6) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 6) -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 6) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 6) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 6) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 6) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 6) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 6) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 6) +; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 6) +; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 6) +; SSE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 6) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; ; AVX1-LABEL: 'powi_6' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 6) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 6) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 6) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 6) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 6) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 6) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 6) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 6) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 6) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 6) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 6) ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 6) @@ -236,29 +236,29 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; ; AVX2-LABEL: 'powi_6' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 6) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 6) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 6) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 6) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 6) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 6) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 6) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 6) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 6) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 6) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 6) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 6) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 6) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 6) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 6) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 6) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; ; AVX512-LABEL: 'powi_6' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 6) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 6) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 6) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 6) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 6) -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 6) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 6) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 6) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 6) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 6) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 6) -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 6) -; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 6) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 6) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 6) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 6) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; %F32 = call float @llvm.powi.f32(float poison, i32 6) @@ -278,55 +278,55 @@ define i32 @powi_16() { ; SSE-LABEL: 'powi_16' -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 16) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 16) -; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 16) -; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 16) -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 16) -; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) -; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 16) -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 16) -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 16) -; SSE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 16) +; SSE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 16) ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; ; AVX1-LABEL: 'powi_16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 16) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 16) -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 16) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 16) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 16) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 16) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 16) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 16) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 16) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; ; AVX2-LABEL: 'powi_16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 16) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 16) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 16) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 16) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 16) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 16) -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 16) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 16) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 16) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; ; AVX512-LABEL: 'powi_16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 16) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 16) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 16) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 16) -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 16) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 16) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 16) -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 16) -; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 6) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> poison, i32 16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = call <16 x double> @llvm.powi.v16f64.i32(<16 x double> poison, i32 16) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 poison ; %F32 = call float @llvm.powi.f32(float poison, i32 16) Index: llvm/test/Transforms/SLPVectorizer/X86/powi-regression.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/X86/powi-regression.ll +++ llvm/test/Transforms/SLPVectorizer/X86/powi-regression.ll @@ -6,13 +6,8 @@ define <2 x double> @PR53887_v2f64(<2 x double> noundef %x) { ; CHECK-LABEL: @PR53887_v2f64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x double> [[X:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT]], i32 6) -; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x double> undef, double [[TMP0]], i64 0 -; CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <2 x double> [[X]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = tail call fast double @llvm.powi.f64.i32(double [[VECEXT1]], i32 6) -; CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <2 x double> [[VECINIT]], double [[TMP1]], i64 1 -; CHECK-NEXT: ret <2 x double> [[VECINIT3]] +; CHECK-NEXT: [[TMP0:%.*]] = call fast <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[X:%.*]], i32 6) +; CHECK-NEXT: ret <2 x double> [[TMP0]] ; entry: %vecext = extractelement <2 x double> %x, i64 0 @@ -27,20 +22,8 @@ define <4 x double> @PR53887_v4f64(<4 x double> noundef %x) { ; CHECK-LABEL: @PR53887_v4f64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x double> [[X:%.*]], i64 0 -; CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x double> [[X]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[VECEXT]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[VECEXT1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = call fast <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[TMP1]], i32 6) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[VECEXT4:%.*]] = extractelement <4 x double> [[X]], i64 2 -; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x double> [[X]], i64 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[VECEXT4]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[VECEXT7]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = call fast <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[TMP5]], i32 6) -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[VECINIT91:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> [[TMP7]], <4 x i32> -; CHECK-NEXT: ret <4 x double> [[VECINIT91]] +; CHECK-NEXT: [[TMP0:%.*]] = call fast <4 x double> @llvm.powi.v4f64.i32(<4 x double> [[X:%.*]], i32 6) +; CHECK-NEXT: ret <4 x double> [[TMP0]] ; entry: %vecext = extractelement <4 x double> %x, i64 0 Index: llvm/test/Transforms/SLPVectorizer/X86/powi.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/X86/powi.ll +++ llvm/test/Transforms/SLPVectorizer/X86/powi.ll @@ -1,18 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-linux-gnu -mcpu=x86-64 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE -; RUN: opt < %s -mtriple=x86_64-linux-gnu -mcpu=x86-64-v2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX1 -; RUN: opt < %s -mtriple=x86_64-linux-gnu -mcpu=x86-64-v3 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX2 -; RUN: opt < %s -mtriple=x86_64-linux-gnu -mcpu=x86-64-v4 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX512 +; RUN: opt < %s -mtriple=x86_64-linux-gnu -mcpu=x86-64 -basic-aa -slp-vectorizer -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-linux-gnu -mcpu=x86-64-v2 -basic-aa -slp-vectorizer -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-linux-gnu -mcpu=x86-64-v3 -basic-aa -slp-vectorizer -S | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-linux-gnu -mcpu=x86-64-v4 -basic-aa -slp-vectorizer -S | FileCheck %s define <2 x double> @buildvector_powi_2f64_6(<2 x double> %a) { ; CHECK-LABEL: @buildvector_powi_2f64_6( -; CHECK-NEXT: [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0 -; CHECK-NEXT: [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1 -; CHECK-NEXT: [[C0:%.*]] = call double @llvm.powi.f64.i32(double [[A0]], i32 6) -; CHECK-NEXT: [[C1:%.*]] = call double @llvm.powi.f64.i32(double [[A1]], i32 6) -; CHECK-NEXT: [[R0:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 -; CHECK-NEXT: [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[C1]], i32 1 -; CHECK-NEXT: ret <2 x double> [[R1]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[A:%.*]], i32 6) +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %a0 = extractelement <2 x double> %a, i32 0 %a1 = extractelement <2 x double> %a, i32 1 @@ -43,69 +38,9 @@ } define <4 x float> @buildvector_powi_4f32_3(<4 x float> %a) { -; SSE-LABEL: @buildvector_powi_4f32_3( -; SSE-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 -; SSE-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 -; SSE-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 -; SSE-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 -; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0 -; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[A1]], i32 1 -; SSE-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[TMP2]], i32 3) -; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0 -; SSE-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A3]], i32 1 -; SSE-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[TMP5]], i32 3) -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> -; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <4 x i32> -; SSE-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> -; SSE-NEXT: ret <4 x float> [[R31]] -; -; AVX1-LABEL: @buildvector_powi_4f32_3( -; AVX1-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 -; AVX1-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 -; AVX1-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 -; AVX1-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 -; AVX1-NEXT: [[C0:%.*]] = call float @llvm.powi.f32.i32(float [[A0]], i32 3) -; AVX1-NEXT: [[C1:%.*]] = call float @llvm.powi.f32.i32(float [[A1]], i32 3) -; AVX1-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0 -; AVX1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[A3]], i32 1 -; AVX1-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[TMP2]], i32 3) -; AVX1-NEXT: [[R0:%.*]] = insertelement <4 x float> poison, float [[C0]], i32 0 -; AVX1-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[C1]], i32 1 -; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> -; AVX1-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[R1]], <4 x float> [[TMP4]], <4 x i32> -; AVX1-NEXT: ret <4 x float> [[R31]] -; -; AVX2-LABEL: @buildvector_powi_4f32_3( -; AVX2-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 -; AVX2-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 -; AVX2-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 -; AVX2-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 -; AVX2-NEXT: [[C0:%.*]] = call float @llvm.powi.f32.i32(float [[A0]], i32 3) -; AVX2-NEXT: [[C1:%.*]] = call float @llvm.powi.f32.i32(float [[A1]], i32 3) -; AVX2-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0 -; AVX2-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[A3]], i32 1 -; AVX2-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[TMP2]], i32 3) -; AVX2-NEXT: [[R0:%.*]] = insertelement <4 x float> poison, float [[C0]], i32 0 -; AVX2-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[C1]], i32 1 -; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> -; AVX2-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[R1]], <4 x float> [[TMP4]], <4 x i32> -; AVX2-NEXT: ret <4 x float> [[R31]] -; -; AVX512-LABEL: @buildvector_powi_4f32_3( -; AVX512-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 -; AVX512-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 -; AVX512-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 -; AVX512-NEXT: [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3 -; AVX512-NEXT: [[C0:%.*]] = call float @llvm.powi.f32.i32(float [[A0]], i32 3) -; AVX512-NEXT: [[C1:%.*]] = call float @llvm.powi.f32.i32(float [[A1]], i32 3) -; AVX512-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0 -; AVX512-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[A3]], i32 1 -; AVX512-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[TMP2]], i32 3) -; AVX512-NEXT: [[R0:%.*]] = insertelement <4 x float> poison, float [[C0]], i32 0 -; AVX512-NEXT: [[R1:%.*]] = insertelement <4 x float> [[R0]], float [[C1]], i32 1 -; AVX512-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> -; AVX512-NEXT: [[R31:%.*]] = shufflevector <4 x float> [[R1]], <4 x float> [[TMP4]], <4 x i32> -; AVX512-NEXT: ret <4 x float> [[R31]] +; CHECK-LABEL: @buildvector_powi_4f32_3( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[A:%.*]], i32 3) +; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %a0 = extractelement <4 x float> %a, i32 0 %a1 = extractelement <4 x float> %a, i32 1 @@ -127,45 +62,9 @@ ; define <4 x double> @buildvector_powi_4f64_16(<4 x double> %a) { -; SSE-LABEL: @buildvector_powi_4f64_16( -; SSE-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0 -; SSE-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1 -; SSE-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2 -; SSE-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3 -; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 -; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A1]], i32 1 -; SSE-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[TMP2]], i32 16) -; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[A2]], i32 0 -; SSE-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A3]], i32 1 -; SSE-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[TMP5]], i32 16) -; SSE-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; SSE-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> -; SSE-NEXT: [[R31:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> -; SSE-NEXT: ret <4 x double> [[R31]] -; -; AVX1-LABEL: @buildvector_powi_4f64_16( -; AVX1-NEXT: [[A0:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0 -; AVX1-NEXT: [[A1:%.*]] = extractelement <4 x double> [[A]], i32 1 -; AVX1-NEXT: [[A2:%.*]] = extractelement <4 x double> [[A]], i32 2 -; AVX1-NEXT: [[A3:%.*]] = extractelement <4 x double> [[A]], i32 3 -; AVX1-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 -; AVX1-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A1]], i32 1 -; AVX1-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[TMP2]], i32 16) -; AVX1-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[A2]], i32 0 -; AVX1-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A3]], i32 1 -; AVX1-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[TMP5]], i32 16) -; AVX1-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> -; AVX1-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> -; AVX1-NEXT: [[R31:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> -; AVX1-NEXT: ret <4 x double> [[R31]] -; -; AVX2-LABEL: @buildvector_powi_4f64_16( -; AVX2-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> [[A:%.*]], i32 16) -; AVX2-NEXT: ret <4 x double> [[TMP1]] -; -; AVX512-LABEL: @buildvector_powi_4f64_16( -; AVX512-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> [[A:%.*]], i32 16) -; AVX512-NEXT: ret <4 x double> [[TMP1]] +; CHECK-LABEL: @buildvector_powi_4f64_16( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> [[A:%.*]], i32 16) +; CHECK-NEXT: ret <4 x double> [[TMP1]] ; %a0 = extractelement <4 x double> %a, i32 0 %a1 = extractelement <4 x double> %a, i32 1 @@ -183,66 +82,9 @@ } define <8 x float> @buildvector_powi_8f32_4(<8 x float> %a) { -; SSE-LABEL: @buildvector_powi_8f32_4( -; SSE-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0 -; SSE-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1 -; SSE-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2 -; SSE-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3 -; SSE-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4 -; SSE-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5 -; SSE-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6 -; SSE-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7 -; SSE-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A0]], i32 0 -; SSE-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[A1]], i32 1 -; SSE-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[A2]], i32 2 -; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[A3]], i32 3 -; SSE-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[TMP4]], i32 4) -; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x float> poison, float [[A4]], i32 0 -; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[A5]], i32 1 -; SSE-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[A6]], i32 2 -; SSE-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP8]], float [[A7]], i32 3 -; SSE-NEXT: [[TMP10:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[TMP9]], i32 4) -; SSE-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <8 x i32> -; SSE-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> -; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[TMP11]], <8 x float> [[TMP12]], <8 x i32> -; SSE-NEXT: ret <8 x float> [[R71]] -; -; AVX1-LABEL: @buildvector_powi_8f32_4( -; AVX1-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i32 0 -; AVX1-NEXT: [[A1:%.*]] = extractelement <8 x float> [[A]], i32 1 -; AVX1-NEXT: [[A2:%.*]] = extractelement <8 x float> [[A]], i32 2 -; AVX1-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i32 3 -; AVX1-NEXT: [[A4:%.*]] = extractelement <8 x float> [[A]], i32 4 -; AVX1-NEXT: [[A5:%.*]] = extractelement <8 x float> [[A]], i32 5 -; AVX1-NEXT: [[A6:%.*]] = extractelement <8 x float> [[A]], i32 6 -; AVX1-NEXT: [[A7:%.*]] = extractelement <8 x float> [[A]], i32 7 -; AVX1-NEXT: [[C0:%.*]] = call float @llvm.powi.f32.i32(float [[A0]], i32 4) -; AVX1-NEXT: [[C1:%.*]] = call float @llvm.powi.f32.i32(float [[A1]], i32 4) -; AVX1-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0 -; AVX1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[A3]], i32 1 -; AVX1-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[TMP2]], i32 4) -; AVX1-NEXT: [[C4:%.*]] = call float @llvm.powi.f32.i32(float [[A4]], i32 4) -; AVX1-NEXT: [[C5:%.*]] = call float @llvm.powi.f32.i32(float [[A5]], i32 4) -; AVX1-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A6]], i32 0 -; AVX1-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A7]], i32 1 -; AVX1-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[TMP5]], i32 4) -; AVX1-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[C0]], i32 0 -; AVX1-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[C1]], i32 1 -; AVX1-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <8 x i32> -; AVX1-NEXT: [[R32:%.*]] = shufflevector <8 x float> [[R1]], <8 x float> [[TMP7]], <8 x i32> -; AVX1-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R32]], float [[C4]], i32 4 -; AVX1-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[C5]], i32 5 -; AVX1-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <8 x i32> -; AVX1-NEXT: [[R71:%.*]] = shufflevector <8 x float> [[R5]], <8 x float> [[TMP8]], <8 x i32> -; AVX1-NEXT: ret <8 x float> [[R71]] -; -; AVX2-LABEL: @buildvector_powi_8f32_4( -; AVX2-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> [[A:%.*]], i32 4) -; AVX2-NEXT: ret <8 x float> [[TMP1]] -; -; AVX512-LABEL: @buildvector_powi_8f32_4( -; AVX512-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> [[A:%.*]], i32 4) -; AVX512-NEXT: ret <8 x float> [[TMP1]] +; CHECK-LABEL: @buildvector_powi_8f32_4( +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> [[A:%.*]], i32 4) +; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %a0 = extractelement <8 x float> %a, i32 0 %a1 = extractelement <8 x float> %a, i32 1 @@ -276,61 +118,9 @@ ; define <8 x double> @buildvector_powi_8f64_5(<8 x double> %a) { -; SSE-LABEL: @buildvector_powi_8f64_5( -; SSE-NEXT: [[A0:%.*]] = extractelement <8 x double> [[A:%.*]], i32 0 -; SSE-NEXT: [[A1:%.*]] = extractelement <8 x double> [[A]], i32 1 -; SSE-NEXT: [[A2:%.*]] = extractelement <8 x double> [[A]], i32 2 -; SSE-NEXT: [[A3:%.*]] = extractelement <8 x double> [[A]], i32 3 -; SSE-NEXT: [[A4:%.*]] = extractelement <8 x double> [[A]], i32 4 -; SSE-NEXT: [[A5:%.*]] = extractelement <8 x double> [[A]], i32 5 -; SSE-NEXT: [[A6:%.*]] = extractelement <8 x double> [[A]], i32 6 -; SSE-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i32 7 -; SSE-NEXT: [[TMP1:%.*]] = insertelement <4 x double> poison, double [[A0]], i32 0 -; SSE-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[A1]], i32 1 -; SSE-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[A2]], i32 2 -; SSE-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[A3]], i32 3 -; SSE-NEXT: [[TMP5:%.*]] = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> [[TMP4]], i32 5) -; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x double> poison, double [[A4]], i32 0 -; SSE-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double [[A5]], i32 1 -; SSE-NEXT: [[TMP8:%.*]] = insertelement <4 x double> [[TMP7]], double [[A6]], i32 2 -; SSE-NEXT: [[TMP9:%.*]] = insertelement <4 x double> [[TMP8]], double [[A7]], i32 3 -; SSE-NEXT: [[TMP10:%.*]] = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> [[TMP9]], i32 5) -; SSE-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> poison, <8 x i32> -; SSE-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> poison, <8 x i32> -; SSE-NEXT: [[R71:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> [[TMP12]], <8 x i32> -; SSE-NEXT: ret <8 x double> [[R71]] -; -; AVX1-LABEL: @buildvector_powi_8f64_5( -; AVX1-NEXT: [[A0:%.*]] = extractelement <8 x double> [[A:%.*]], i32 0 -; AVX1-NEXT: [[A1:%.*]] = extractelement <8 x double> [[A]], i32 1 -; AVX1-NEXT: [[A2:%.*]] = extractelement <8 x double> [[A]], i32 2 -; AVX1-NEXT: [[A3:%.*]] = extractelement <8 x double> [[A]], i32 3 -; AVX1-NEXT: [[A4:%.*]] = extractelement <8 x double> [[A]], i32 4 -; AVX1-NEXT: [[A5:%.*]] = extractelement <8 x double> [[A]], i32 5 -; AVX1-NEXT: [[A6:%.*]] = extractelement <8 x double> [[A]], i32 6 -; AVX1-NEXT: [[A7:%.*]] = extractelement <8 x double> [[A]], i32 7 -; AVX1-NEXT: [[TMP1:%.*]] = insertelement <4 x double> poison, double [[A0]], i32 0 -; AVX1-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[A1]], i32 1 -; AVX1-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[A2]], i32 2 -; AVX1-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[A3]], i32 3 -; AVX1-NEXT: [[TMP5:%.*]] = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> [[TMP4]], i32 5) -; AVX1-NEXT: [[TMP6:%.*]] = insertelement <4 x double> poison, double [[A4]], i32 0 -; AVX1-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double [[A5]], i32 1 -; AVX1-NEXT: [[TMP8:%.*]] = insertelement <4 x double> [[TMP7]], double [[A6]], i32 2 -; AVX1-NEXT: [[TMP9:%.*]] = insertelement <4 x double> [[TMP8]], double [[A7]], i32 3 -; AVX1-NEXT: [[TMP10:%.*]] = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> [[TMP9]], i32 5) -; AVX1-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> poison, <8 x i32> -; AVX1-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> poison, <8 x i32> -; AVX1-NEXT: [[R71:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> [[TMP12]], <8 x i32> -; AVX1-NEXT: ret <8 x double> [[R71]] -; -; AVX2-LABEL: @buildvector_powi_8f64_5( -; AVX2-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> [[A:%.*]], i32 5) -; AVX2-NEXT: ret <8 x double> [[TMP1]] -; -; AVX512-LABEL: @buildvector_powi_8f64_5( -; AVX512-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> [[A:%.*]], i32 5) -; AVX512-NEXT: ret <8 x double> [[TMP1]] +; CHECK-LABEL: @buildvector_powi_8f64_5( +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> [[A:%.*]], i32 5) +; CHECK-NEXT: ret <8 x double> [[TMP1]] ; %a0 = extractelement <8 x double> %a, i32 0 %a1 = extractelement <8 x double> %a, i32 1 @@ -415,108 +205,9 @@ } define <16 x float> @buildvector_powi_16f32_n13(<16 x float> %a) { -; SSE-LABEL: @buildvector_powi_16f32_n13( -; SSE-NEXT: [[A0:%.*]] = extractelement <16 x float> [[A:%.*]], i32 0 -; SSE-NEXT: [[A1:%.*]] = extractelement <16 x float> [[A]], i32 1 -; SSE-NEXT: [[A2:%.*]] = extractelement <16 x float> [[A]], i32 2 -; SSE-NEXT: [[A3:%.*]] = extractelement <16 x float> [[A]], i32 3 -; SSE-NEXT: [[A4:%.*]] = extractelement <16 x float> [[A]], i32 4 -; SSE-NEXT: [[A5:%.*]] = extractelement <16 x float> [[A]], i32 5 -; SSE-NEXT: [[A6:%.*]] = extractelement <16 x float> [[A]], i32 6 -; SSE-NEXT: [[A7:%.*]] = extractelement <16 x float> [[A]], i32 7 -; SSE-NEXT: [[A8:%.*]] = extractelement <16 x float> [[A]], i32 8 -; SSE-NEXT: [[A9:%.*]] = extractelement <16 x float> [[A]], i32 9 -; SSE-NEXT: [[A10:%.*]] = extractelement <16 x float> [[A]], i32 10 -; SSE-NEXT: [[A11:%.*]] = extractelement <16 x float> [[A]], i32 11 -; SSE-NEXT: [[A12:%.*]] = extractelement <16 x float> [[A]], i32 12 -; SSE-NEXT: [[A13:%.*]] = extractelement <16 x float> [[A]], i32 13 -; SSE-NEXT: [[A14:%.*]] = extractelement <16 x float> [[A]], i32 14 -; SSE-NEXT: [[A15:%.*]] = extractelement <16 x float> [[A]], i32 15 -; SSE-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[A0]], i32 0 -; SSE-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[TMP1]], float [[A1]], i32 1 -; SSE-NEXT: [[TMP3:%.*]] = insertelement <8 x float> [[TMP2]], float [[A2]], i32 2 -; SSE-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP3]], float [[A3]], i32 3 -; SSE-NEXT: [[TMP5:%.*]] = insertelement <8 x float> [[TMP4]], float [[A4]], i32 4 -; SSE-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[A5]], i32 5 -; SSE-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[A6]], i32 6 -; SSE-NEXT: [[TMP8:%.*]] = insertelement <8 x float> [[TMP7]], float [[A7]], i32 7 -; SSE-NEXT: [[TMP9:%.*]] = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> [[TMP8]], i32 -13) -; SSE-NEXT: [[TMP10:%.*]] = insertelement <8 x float> poison, float [[A8]], i32 0 -; SSE-NEXT: [[TMP11:%.*]] = insertelement <8 x float> [[TMP10]], float [[A9]], i32 1 -; SSE-NEXT: [[TMP12:%.*]] = insertelement <8 x float> [[TMP11]], float [[A10]], i32 2 -; SSE-NEXT: [[TMP13:%.*]] = insertelement <8 x float> [[TMP12]], float [[A11]], i32 3 -; SSE-NEXT: [[TMP14:%.*]] = insertelement <8 x float> [[TMP13]], float [[A12]], i32 4 -; SSE-NEXT: [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[A13]], i32 5 -; SSE-NEXT: [[TMP16:%.*]] = insertelement <8 x float> [[TMP15]], float [[A14]], i32 6 -; SSE-NEXT: [[TMP17:%.*]] = insertelement <8 x float> [[TMP16]], float [[A15]], i32 7 -; SSE-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> [[TMP17]], i32 -13) -; SSE-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <16 x i32> -; SSE-NEXT: [[TMP20:%.*]] = shufflevector <8 x float> [[TMP18]], <8 x float> poison, <16 x i32> -; SSE-NEXT: [[R151:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> -; SSE-NEXT: ret <16 x float> [[R151]] -; -; AVX1-LABEL: @buildvector_powi_16f32_n13( -; AVX1-NEXT: [[A0:%.*]] = extractelement <16 x float> [[A:%.*]], i32 0 -; AVX1-NEXT: [[A1:%.*]] = extractelement <16 x float> [[A]], i32 1 -; AVX1-NEXT: [[A2:%.*]] = extractelement <16 x float> [[A]], i32 2 -; AVX1-NEXT: [[A3:%.*]] = extractelement <16 x float> [[A]], i32 3 -; AVX1-NEXT: [[A4:%.*]] = extractelement <16 x float> [[A]], i32 4 -; AVX1-NEXT: [[A5:%.*]] = extractelement <16 x float> [[A]], i32 5 -; AVX1-NEXT: [[A6:%.*]] = extractelement <16 x float> [[A]], i32 6 -; AVX1-NEXT: [[A7:%.*]] = extractelement <16 x float> [[A]], i32 7 -; AVX1-NEXT: [[A8:%.*]] = extractelement <16 x float> [[A]], i32 8 -; AVX1-NEXT: [[A9:%.*]] = extractelement <16 x float> [[A]], i32 9 -; AVX1-NEXT: [[A10:%.*]] = extractelement <16 x float> [[A]], i32 10 -; AVX1-NEXT: [[A11:%.*]] = extractelement <16 x float> [[A]], i32 11 -; AVX1-NEXT: [[A12:%.*]] = extractelement <16 x float> [[A]], i32 12 -; AVX1-NEXT: [[A13:%.*]] = extractelement <16 x float> [[A]], i32 13 -; AVX1-NEXT: [[A14:%.*]] = extractelement <16 x float> [[A]], i32 14 -; AVX1-NEXT: [[A15:%.*]] = extractelement <16 x float> [[A]], i32 15 -; AVX1-NEXT: [[C0:%.*]] = call float @llvm.powi.f32.i32(float [[A0]], i32 -13) -; AVX1-NEXT: [[C1:%.*]] = call float @llvm.powi.f32.i32(float [[A1]], i32 -13) -; AVX1-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0 -; AVX1-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[A3]], i32 1 -; AVX1-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[TMP2]], i32 -13) -; AVX1-NEXT: [[C4:%.*]] = call float @llvm.powi.f32.i32(float [[A4]], i32 -13) -; AVX1-NEXT: [[C5:%.*]] = call float @llvm.powi.f32.i32(float [[A5]], i32 -13) -; AVX1-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[A6]], i32 0 -; AVX1-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A7]], i32 1 -; AVX1-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[TMP5]], i32 -13) -; AVX1-NEXT: [[C8:%.*]] = call float @llvm.powi.f32.i32(float [[A8]], i32 -13) -; AVX1-NEXT: [[C9:%.*]] = call float @llvm.powi.f32.i32(float [[A9]], i32 -13) -; AVX1-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[A10]], i32 0 -; AVX1-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[A11]], i32 1 -; AVX1-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[TMP8]], i32 -13) -; AVX1-NEXT: [[C12:%.*]] = call float @llvm.powi.f32.i32(float [[A12]], i32 -13) -; AVX1-NEXT: [[C13:%.*]] = call float @llvm.powi.f32.i32(float [[A13]], i32 -13) -; AVX1-NEXT: [[TMP10:%.*]] = insertelement <2 x float> poison, float [[A14]], i32 0 -; AVX1-NEXT: [[TMP11:%.*]] = insertelement <2 x float> [[TMP10]], float [[A15]], i32 1 -; AVX1-NEXT: [[TMP12:%.*]] = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> [[TMP11]], i32 -13) -; AVX1-NEXT: [[R0:%.*]] = insertelement <16 x float> poison, float [[C0]], i32 0 -; AVX1-NEXT: [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[C1]], i32 1 -; AVX1-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <16 x i32> -; AVX1-NEXT: [[R34:%.*]] = shufflevector <16 x float> [[R1]], <16 x float> [[TMP13]], <16 x i32> -; AVX1-NEXT: [[R4:%.*]] = insertelement <16 x float> [[R34]], float [[C4]], i32 4 -; AVX1-NEXT: [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[C5]], i32 5 -; AVX1-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <16 x i32> -; AVX1-NEXT: [[R73:%.*]] = shufflevector <16 x float> [[R5]], <16 x float> [[TMP14]], <16 x i32> -; AVX1-NEXT: [[R8:%.*]] = insertelement <16 x float> [[R73]], float [[C8]], i32 8 -; AVX1-NEXT: [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[C9]], i32 9 -; AVX1-NEXT: [[TMP15:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <16 x i32> -; AVX1-NEXT: [[R112:%.*]] = shufflevector <16 x float> [[R9]], <16 x float> [[TMP15]], <16 x i32> -; AVX1-NEXT: [[R12:%.*]] = insertelement <16 x float> [[R112]], float [[C12]], i32 12 -; AVX1-NEXT: [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[C13]], i32 13 -; AVX1-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP12]], <2 x float> poison, <16 x i32> -; AVX1-NEXT: [[R151:%.*]] = shufflevector <16 x float> [[R13]], <16 x float> [[TMP16]], <16 x i32> -; AVX1-NEXT: ret <16 x float> [[R151]] -; -; AVX2-LABEL: @buildvector_powi_16f32_n13( -; AVX2-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> [[A:%.*]], i32 -13) -; AVX2-NEXT: ret <16 x float> [[TMP1]] -; -; AVX512-LABEL: @buildvector_powi_16f32_n13( -; AVX512-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> [[A:%.*]], i32 -13) -; AVX512-NEXT: ret <16 x float> [[TMP1]] +; CHECK-LABEL: @buildvector_powi_16f32_n13( +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> [[A:%.*]], i32 -13) +; CHECK-NEXT: ret <16 x float> [[TMP1]] ; %a0 = extractelement <16 x float> %a, i32 0 %a1 = extractelement <16 x float> %a, i32 1