Index: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -131,6 +131,7 @@ SDValue ExpandCTLZ(SDValue Op); SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op); SDValue ExpandStrictFPOp(SDValue Op); + SDValue ExpandStrictFPPOWI(SDValue Op); /// Implements vector promotion. /// @@ -738,6 +739,8 @@ case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: return ExpandStrictFPOp(Op); + case ISD::STRICT_FPOWI: + return ExpandStrictFPPOWI(Op); default: return DAG.UnrollVectorOp(Op.getNode()); } @@ -1134,8 +1137,8 @@ SDValue Chain = Op.getOperand(0); SDLoc dl(Op); - SmallVector OpValues; - SmallVector OpChains; + SmallVector OpValues; + SmallVector OpChains; for (unsigned i = 0; i < NumElems; ++i) { SmallVector Opers; SDValue Idx = DAG.getConstant(i, dl, @@ -1166,6 +1169,45 @@ return NewChain; } +SDValue VectorLegalizer::ExpandStrictFPPOWI(SDValue Op) { + EVT VT = Op.getValueType(); + EVT EltVT = VT.getVectorElementType(); + unsigned NumElems = VT.getVectorNumElements(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT ValueVTs[] = {EltVT, MVT::Other}; + SDValue Chain = Op.getOperand(0); + SDValue BaseVec = Op.getOperand(1); + SDValue Power = Op.getOperand(2); + SDLoc dl(Op); + + SmallVector OpValues; + SmallVector OpChains; + for (unsigned i = 0; i < NumElems; ++i) { + SmallVector Opers; + SDValue Idx = DAG.getConstant(i, dl, + TLI.getVectorIdxTy(DAG.getDataLayout())); + SDValue Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + EltVT, BaseVec, Idx); + + Opers.push_back(Chain); + Opers.push_back(Base); + Opers.push_back(Power); + + SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers); + + OpValues.push_back(ScalarOp.getValue(0)); + OpChains.push_back(ScalarOp.getValue(1)); + } + + SDValue Result = DAG.getBuildVector(VT, dl, OpValues); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); + + AddLegalizedOperand(Op.getValue(0), Result); + AddLegalizedOperand(Op.getValue(1), NewChain); + + return NewChain; +} + SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { EVT VT = Op.getValueType(); unsigned NumElems = VT.getVectorNumElements(); Index: test/CodeGen/X86/vector-constrained-fp-intrinsics.ll =================================================================== --- test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -100,6 +100,32 @@ ret <2 x double> %pow } +define <2 x double> @constrained_vector_powi() { +; CHECK-LABEL: constrained_vector_powi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movl $3, %edi +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movl $3, %edi +; CHECK-NEXT: callq __powidf2 +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64( + <2 x double> , + i32 3, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %powi +} + define <2 x double> @constrained_vector_sin() { ; CHECK-LABEL: constrained_vector_sin: ; CHECK: # %bb.0: # %entry @@ -315,6 +341,7 @@ declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata)