Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -931,60 +931,125 @@ // and the node legalization. As such this pass basically does "really // late" legalization of these inline with the X86 isel pass. // FIXME: This should only happen when not compiled with -O0. - if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) - continue; + switch (N->getOpcode()) { + default: continue; + case ISD::FP_ROUND: + case ISD::FP_EXTEND: + { + MVT SrcVT = N->getOperand(0).getSimpleValueType(); + MVT DstVT = N->getSimpleValueType(0); - MVT SrcVT = N->getOperand(0).getSimpleValueType(); - MVT DstVT = N->getSimpleValueType(0); + // If any of the sources are vectors, no fp stack involved. + if (SrcVT.isVector() || DstVT.isVector()) + continue; - // If any of the sources are vectors, no fp stack involved. - if (SrcVT.isVector() || DstVT.isVector()) - continue; + // If the source and destination are SSE registers, then this is a legal + // conversion that should not be lowered. + const X86TargetLowering *X86Lowering = + static_cast(TLI); + bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); + bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); + if (SrcIsSSE && DstIsSSE) + continue; - // If the source and destination are SSE registers, then this is a legal - // conversion that should not be lowered. - const X86TargetLowering *X86Lowering = - static_cast(TLI); - bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); - bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); - if (SrcIsSSE && DstIsSSE) - continue; + if (!SrcIsSSE && !DstIsSSE) { + // If this is an FPStack extension, it is a noop. + if (N->getOpcode() == ISD::FP_EXTEND) + continue; + // If this is a value-preserving FPStack truncation, it is a noop. + if (N->getConstantOperandVal(1)) + continue; + } - if (!SrcIsSSE && !DstIsSSE) { - // If this is an FPStack extension, it is a noop. - if (N->getOpcode() == ISD::FP_EXTEND) + // Here we could have an FP stack truncation or an FPStack <-> SSE convert. + // FPStack has extload and truncstore. SSE can fold direct loads into other + // operations. Based on this, decide what we want to do. + MVT MemVT; + if (N->getOpcode() == ISD::FP_ROUND) + MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. + else + MemVT = SrcIsSSE ? SrcVT : DstVT; + + SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); + SDLoc dl(N); + + // FIXME: optimize the case where the src/dest is a load or store? + + SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0), + MemTmp, MachinePointerInfo(), MemVT); + SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, + MachinePointerInfo(), MemVT); + + // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the + // extload we created. This will cause general havok on the dag because + // anything below the conversion could be folded into other existing nodes. + // To avoid invalidating 'I', back it up to the convert node. + --I; + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + break; + } + + //The sequence of events for lowering STRICT_FP versions of these nodes requires + //dealing with the chain differently, as there is already a preexisting chain. + case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_EXTEND: + { + MVT SrcVT = N->getOperand(1).getSimpleValueType(); + MVT DstVT = N->getSimpleValueType(0); + + // If any of the sources are vectors, no fp stack involved. + if (SrcVT.isVector() || DstVT.isVector()) continue; - // If this is a value-preserving FPStack truncation, it is a noop. - if (N->getConstantOperandVal(1)) + + // If the source and destination are SSE registers, then this is a legal + // conversion that should not be lowered. + const X86TargetLowering *X86Lowering = + static_cast(TLI); + bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); + bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); + if (SrcIsSSE && DstIsSSE) continue; - } - // Here we could have an FP stack truncation or an FPStack <-> SSE convert. - // FPStack has extload and truncstore. SSE can fold direct loads into other - // operations. Based on this, decide what we want to do. - MVT MemVT; - if (N->getOpcode() == ISD::FP_ROUND) - MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. - else - MemVT = SrcIsSSE ? SrcVT : DstVT; + if (!SrcIsSSE && !DstIsSSE) { + // If this is an FPStack extension, it is a noop. + if (N->getOpcode() == ISD::STRICT_FP_EXTEND) + continue; + // If this is a value-preserving FPStack truncation, it is a noop. + if (N->getConstantOperandVal(2)) + continue; + } - SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); - SDLoc dl(N); + // Here we could have an FP stack truncation or an FPStack <-> SSE convert. + // FPStack has extload and truncstore. SSE can fold direct loads into other + // operations. Based on this, decide what we want to do. + MVT MemVT; + if (N->getOpcode() == ISD::STRICT_FP_ROUND) + MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. + else + MemVT = SrcIsSSE ? SrcVT : DstVT; - // FIXME: optimize the case where the src/dest is a load or store? - SDValue Store = - CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0), - MemTmp, MachinePointerInfo(), MemVT); - SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, - MachinePointerInfo(), MemVT); + SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); + SDLoc dl(N); - // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the - // extload we created. This will cause general havok on the dag because - // anything below the conversion could be folded into other existing nodes. - // To avoid invalidating 'I', back it up to the convert node. - --I; - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + // FIXME: optimize the case where the src/dest is a load or store? + //Since th operation is StrictFP, use the preexisting chain. + SDValue Store = CurDAG->getTruncStore(N->getOperand(0), dl, N->getOperand(1), + MemTmp, MachinePointerInfo(), MemVT); + SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, + MachinePointerInfo(), MemVT); + + // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the + // extload we created. This will cause general havok on the dag because + // anything below the conversion could be folded into other existing nodes. + // To avoid invalidating 'I', back it up to the convert node. + --I; + CurDAG->ReplaceAllUsesWith(N, Result.getNode()); + break; + } + } + + // Now that we did that, the node is dead. Increment the iterator to the // next node to process, then delete N. ++I; Index: test/CodeGen/X86/constrained-fp80-trunc-ext.ll =================================================================== --- test/CodeGen/X86/constrained-fp80-trunc-ext.ll +++ test/CodeGen/X86/constrained-fp80-trunc-ext.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -mtriple=x86_64-gnu-linux < %s | FileCheck %s + +define x86_fp80 @constrained_fpext_f32_as_fp80(float %mem) { +; CHECK-LABEL: constrained_fpext_f32_as_fp80: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: flds -{{[0-9]+}}(%rsp) +; CHECK-NEXT: retq +entry: + %ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32( + float %mem, + metadata !"fpexcept.strict") + ret x86_fp80 %ext +} + +define float @constrained_fptrunc_f80_to_f32(x86_fp80 %reg) { +; CHECK-LABEL: constrained_fptrunc_f80_to_f32: +; CHECK: # %bb.0: +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstps -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: retq + %trunc = call float @llvm.experimental.constrained.fptrunc.f32.f80( + x86_fp80 %reg, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret float %trunc +} + +define x86_fp80 @constrained_fpext_f64_to_f80(double %mem) { +; CHECK-LABEL: constrained_fpext_f64_to_f80: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: retq +entry: + %ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f64( + double %mem, + metadata !"fpexcept.strict") + ret x86_fp80 %ext +} + +define double @constrained_fptrunc_f80_to_f64(x86_fp80 %reg) { +; CHECK-LABEL: constrained_fptrunc_f80_to_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fstpl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: retq + %trunc = call double @llvm.experimental.constrained.fptrunc.f64.f80( + x86_fp80 %reg, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret double %trunc +} + +declare x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(float, metadata) +declare x86_fp80 @llvm.experimental.constrained.fpext.f80.f64(double, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32.f80(x86_fp80, metadata, metadata) +declare double @llvm.experimental.constrained.fptrunc.f64.f80(x86_fp80, metadata, metadata)