Index: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -130,7 +130,8 @@ SDValue ExpandBITREVERSE(SDValue Op); SDValue ExpandCTLZ(SDValue Op); SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op); - + SDValue ExpandStrictFPOp(SDValue Op); + /// Implements vector promotion. /// /// This is essentially just bitcasting the operands to a different type and @@ -288,10 +289,30 @@ if (!HasVectorValue) return TranslateLegalizeResults(Op, Result); - EVT QueryType; + TargetLowering::LegalizeAction Action = TargetLowering::Legal; switch (Op.getOpcode()) { default: return TranslateLegalizeResults(Op, Result); + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + // These pseudo-ops get legalized as if they were their non-strict + // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT + // is also legal, but if ISD::FSQRT requires expansion then so does + // ISD::STRICT_FSQRT. + Action = TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)); + break; case ISD::ADD: case ISD::SUB: case ISD::MUL: @@ -368,26 +389,30 @@ case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: case ISD::FCANONICALIZE: - QueryType = Node->getValueType(0); + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::FP_ROUND_INREG: - QueryType = cast(Node->getOperand(1))->getVT(); + Action = TLI.getOperationAction(Node->getOpcode(), + cast(Node->getOperand(1))->getVT()); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - QueryType = Node->getOperand(0).getValueType(); + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); break; case ISD::MSCATTER: - QueryType = cast(Node)->getValue().getValueType(); + Action = TLI.getOperationAction(Node->getOpcode(), + cast(Node)->getValue().getValueType()); break; case ISD::MSTORE: - QueryType = cast(Node)->getValue().getValueType(); + Action = TLI.getOperationAction(Node->getOpcode(), + cast(Node)->getValue().getValueType()); break; } LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); - switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { + switch (Action) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Promote: Result = Promote(Op); @@ -700,6 +725,19 @@ return ExpandCTLZ(Op); case ISD::CTTZ_ZERO_UNDEF: return ExpandCTTZ_ZERO_UNDEF(Op); + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + return ExpandStrictFPOp(Op); default: return DAG.UnrollVectorOp(Op.getNode()); } @@ -1086,6 +1124,48 @@ return DAG.UnrollVectorOp(Op.getNode()); } +SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { + EVT VT = Op.getValueType(); + EVT EltVT = VT.getVectorElementType(); + unsigned NumElems = VT.getVectorNumElements(); + unsigned NumOpers = Op.getNumOperands(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT ValueVTs[] = {EltVT, MVT::Other}; + SDValue Chain = Op.getOperand(0); + SDLoc dl(Op); + + SmallVector OpValues; + SmallVector OpChains; + for (unsigned i = 0; i < NumElems; ++i) { + SmallVector Opers; + SDValue Idx = DAG.getConstant(i, dl, + TLI.getVectorIdxTy(DAG.getDataLayout())); + + // The Chain is the first operand. + Opers.push_back(Chain); + + // Now process the remaining operands. + for (unsigned j = 1; j < NumOpers; ++j) { + SDValue Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + EltVT, Op.getOperand(j), Idx); + Opers.push_back(Oper); + } + + SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers); + + OpValues.push_back(ScalarOp.getValue(0)); + OpChains.push_back(ScalarOp.getValue(1)); + } + + SDValue Result = DAG.getBuildVector(VT, dl, OpValues); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); + + AddLegalizedOperand(Op.getValue(0), Result); + AddLegalizedOperand(Op.getValue(1), NewChain); + + return NewChain; +} + SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { EVT VT = Op.getValueType(); unsigned NumElems = VT.getVectorNumElements(); Index: test/CodeGen/X86/vector-constrained-fp-intrinsics.ll =================================================================== --- test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -0,0 +1,200 @@ +; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s + +; CHECK-LABEL: constrained_vector_fdiv +; CHECK: divpd +define <2 x double> @constrained_vector_fdiv() { +entry: + %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64( + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %div +} + +; CHECK-LABEL: constrained_vector_fmul +; CHECK: mulpd +define <2 x double> @constrained_vector_fmul(<2 x double> %a) { +entry: + %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64( + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %mul +} + +; CHECK-LABEL: constrained_vector_fadd +; CHECK: addpd +define <2 x double> @constrained_vector_fadd() { +entry: + %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64( + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %add +} + +; CHECK-LABEL: constrained_vector_fsub +; CHECK: subpd +define <2 x double> @constrained_vector_fsub() { +entry: + %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %sub +} + +; CHECK-LABEL: constrained_vector_sqrt +; CHECK: sqrtpd +define <2 x double> @constrained_vector_sqrt() { +entry: + %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %sqrt +} + +; CHECK-LABEL: constrained_vector_pow +; CHECK: pow +; CHECK: movlhps +define <2 x double> @constrained_vector_pow() { +entry: + %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64( + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %pow +} + +; CHECK-LABEL: constrained_vector_sin +; CHECK: sin +; CHECK: movlhps +define <2 x double> @constrained_vector_sin() { +entry: + %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %sin +} + +; CHECK-LABEL: constrained_vector_cos +; CHECK: cos +; CHECK: movlhps +define <2 x double> @constrained_vector_cos() { +entry: + %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %cos +} + +; CHECK-LABEL: constrained_vector_exp +; CHECK: exp +; CHECK: movlhps +define <2 x double> @constrained_vector_exp() { +entry: + %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %exp +} + +; CHECK-LABEL: constrained_vector_exp2 +; CHECK: exp2 +; CHECK: movlhps +define <2 x double> @constrained_vector_exp2() { +entry: + %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %exp2 +} + +; CHECK-LABEL: constrained_vector_log +; CHECK: log +; CHECK: movlhps +define <2 x double> @constrained_vector_log() { +entry: + %log = call <2 x double> @llvm.experimental.constrained.log.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %log +} + +; CHECK-LABEL: constrained_vector_log10 +; CHECK: log10 +; CHECK: movlhps +define <2 x double> @constrained_vector_log10() { +entry: + %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %log10 +} + +; CHECK-LABEL: constrained_vector_log2 +; CHECK: log2 +; CHECK: movlhps +define <2 x double> @constrained_vector_log2() { +entry: + %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %log2 +} + +; CHECK-LABEL: constrained_vector_rint +; CHECK: rint +; CHECK: movlhps +define <2 x double> @constrained_vector_rint() { +entry: + %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %rint +} + +; unknown. +; CHECK-LABEL: constrained_vector_nearbyint +; CHECK: nearbyint +; CHECK: movlhps +define <2 x double> @constrained_vector_nearbyint() { +entry: + %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %nearby +} + + +declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)