Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -130,7 +130,8 @@ SDValue ExpandBITREVERSE(SDValue Op); SDValue ExpandCTLZ(SDValue Op); SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op); - + SDValue ExpandStrictFPOp(SDValue Op); + /// Implements vector promotion. /// /// This is essentially just bitcasting the operands to a different type and @@ -288,10 +289,30 @@ if (!HasVectorValue) return TranslateLegalizeResults(Op, Result); - EVT QueryType; + TargetLowering::LegalizeAction Action = TargetLowering::Legal; switch (Op.getOpcode()) { default: return TranslateLegalizeResults(Op, Result); + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + // These pseudo-ops get legalized as if they were their non-strict + // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT + // is also legal, but if ISD::FSQRT requires expansion then so does + // ISD::STRICT_FSQRT. + Action = TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)); + break; case ISD::ADD: case ISD::SUB: case ISD::MUL: @@ -368,26 +389,30 @@ case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: case ISD::FCANONICALIZE: - QueryType = Node->getValueType(0); + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::FP_ROUND_INREG: - QueryType = cast(Node->getOperand(1))->getVT(); + Action = TLI.getOperationAction(Node->getOpcode(), + cast(Node->getOperand(1))->getVT()); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - QueryType = Node->getOperand(0).getValueType(); + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); break; case ISD::MSCATTER: - QueryType = cast(Node)->getValue().getValueType(); + Action = TLI.getOperationAction(Node->getOpcode(), + cast(Node)->getValue().getValueType()); break; case ISD::MSTORE: - QueryType = cast(Node)->getValue().getValueType(); + Action = TLI.getOperationAction(Node->getOpcode(), + cast(Node)->getValue().getValueType()); break; } LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); - switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { + switch (Action) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Promote: Result = Promote(Op); @@ -700,6 +725,19 @@ return ExpandCTLZ(Op); case ISD::CTTZ_ZERO_UNDEF: return ExpandCTTZ_ZERO_UNDEF(Op); + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + return ExpandStrictFPOp(Op); default: return DAG.UnrollVectorOp(Op.getNode()); } @@ -1086,6 +1124,48 @@ return DAG.UnrollVectorOp(Op.getNode()); } +SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { + EVT VT = Op.getValueType(); + EVT EltVT = VT.getVectorElementType(); + unsigned NumElems = VT.getVectorNumElements(); + unsigned NumOpers = Op.getNumOperands(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT ValueVTs[] = {EltVT, MVT::Other}; + SDValue Chain = Op.getOperand(0); + SDLoc dl(Op); + + SmallVector OpValues; + SmallVector OpChains; + for (unsigned i = 0; i < NumElems; ++i) { + SmallVector Opers; + SDValue Idx = DAG.getConstant(i, dl, + TLI.getVectorIdxTy(DAG.getDataLayout())); + + // The Chain is the first operand. + Opers.push_back(Chain); + + // Now process the remaining operands. + for (unsigned j = 1; j < NumOpers; ++j) { + SDValue Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + EltVT, Op.getOperand(j), Idx); + Opers.push_back(Oper); + } + + SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers); + + OpValues.push_back(ScalarOp.getValue(0)); + OpChains.push_back(ScalarOp.getValue(1)); + } + + SDValue Result = DAG.getBuildVector(VT, dl, OpValues); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); + + AddLegalizedOperand(Op.getValue(0), Result); + AddLegalizedOperand(Op.getValue(1), NewChain); + + return NewChain; +} + SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { EVT VT = Op.getValueType(); unsigned NumElems = VT.getVectorNumElements(); Index: llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -0,0 +1,326 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s + +define <2 x double> @constrained_vector_fdiv() { +; CHECK-LABEL: constrained_vector_fdiv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.000000e+00,2.000000e+00] +; CHECK-NEXT: divpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq +entry: + %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64( + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %div +} + +define <2 x double> @constrained_vector_fmul(<2 x double> %a) { +; CHECK-LABEL: constrained_vector_fmul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308] +; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq +entry: + %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64( + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %mul +} + +define <2 x double> @constrained_vector_fadd() { +; CHECK-LABEL: constrained_vector_fadd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.797693e+308,1.797693e+308] +; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq +entry: + %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64( + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %add +} + +define <2 x double> @constrained_vector_fsub() { +; CHECK-LABEL: constrained_vector_fsub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.797693e+308,-1.797693e+308] +; CHECK-NEXT: subpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq +entry: + %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %sub +} + +define <2 x double> @constrained_vector_sqrt() { +; CHECK-LABEL: constrained_vector_sqrt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sqrtpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq +entry: + %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %sqrt +} + +define <2 x double> @constrained_vector_pow() { +; CHECK-LABEL: constrained_vector_pow: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: callq pow +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: callq pow +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64( + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %pow +} + +define <2 x double> @constrained_vector_sin() { +; CHECK-LABEL: constrained_vector_sin: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq sin +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq sin +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %sin +} + +define <2 x double> @constrained_vector_cos() { +; CHECK-LABEL: constrained_vector_cos: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq cos +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq cos +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %cos +} + +define <2 x double> @constrained_vector_exp() { +; CHECK-LABEL: constrained_vector_exp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq exp +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq exp +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %exp +} + +define <2 x double> @constrained_vector_exp2() { +; CHECK-LABEL: constrained_vector_exp2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq exp2 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq exp2 +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %exp2 +} + +define <2 x double> @constrained_vector_log() { +; CHECK-LABEL: constrained_vector_log: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq log +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq log +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %log = call <2 x double> @llvm.experimental.constrained.log.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %log +} + +define <2 x double> @constrained_vector_log10() { +; CHECK-LABEL: constrained_vector_log10: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq log10 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq log10 +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %log10 +} + +define <2 x double> @constrained_vector_log2() { +; CHECK-LABEL: constrained_vector_log2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq log2 +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq log2 +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %log2 +} + +define <2 x double> @constrained_vector_rint() { +; CHECK-LABEL: constrained_vector_rint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq rint +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq rint +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %rint +} + +define <2 x double> @constrained_vector_nearbyint() { +; CHECK-LABEL: constrained_vector_nearbyint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq nearbyint +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq nearbyint +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %nearby +} + + +declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)