diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp --- a/llvm/lib/CodeGen/LiveRangeShrink.cpp +++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp @@ -156,7 +156,8 @@ // If MI has side effects, it should become a barrier for code motion. // IOM is rebuild from the next instruction to prevent later // instructions from being moved before this MI. - if (MI.hasUnmodeledSideEffects() && Next != MBB.end()) { + if (MI.hasUnmodeledSideEffects() && !MI.isPseudoProbe() && + Next != MBB.end()) { BuildInstOrderMap(Next, IOM); SawStore = false; } diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1462,7 +1462,8 @@ } bool MachineInstr::isLoadFoldBarrier() const { - return mayStore() || isCall() || hasUnmodeledSideEffects(); + return mayStore() || isCall() || + (hasUnmodeledSideEffects() && !isPseudoProbe()); } /// allDefsAreDead - Return true if all the defs of this instruction are dead. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9682,8 +9682,9 @@ // We will look through cast uses, so ignore them completely. if (I.isCast()) continue; - // Ignore debug info intrinsics, they don't escape or store to allocas. - if (isa(I)) + // Ignore debug info and pseudo op intrinsics, they don't escape or store + // to allocas. + if (I.isDebugOrPseudoInst()) continue; // This is an unknown instruction. Assume it escapes or writes to all // static alloca operands. diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -193,7 +193,7 @@ // Ignore intrinsics that do not become real instructions. // TODO: Narrow this to intrinsics that have store-like effects. const auto *CI = cast(I); - if (!isa(CI) && !CI->isLifetimeStartOrEnd()) + if (!CI->isDebugOrPseudoInst() && !CI->isLifetimeStartOrEnd()) return true; break; } diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -804,6 +804,9 @@ // Debug instructions cannot be counted against the limit. if (OtherMI.isDebugInstr()) continue; + // Pseudo probe instructions cannot be counted against the limit. + if (OtherMI.isPseudoProbe()) + continue; if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. return false; ++NumVisited; @@ -977,6 +980,9 @@ // Debug instructions cannot be counted against the limit. if (OtherMI.isDebugInstr()) continue; + // Pseudo probe instructions cannot be counted against the limit. + if (OtherMI.isPseudoProbe()) + continue; if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. return false; ++NumVisited; diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -149,6 +149,13 @@ if (isNoModRef(MRI)) continue; + // A pseudo probe call shouldn't change any function attribute since it + // doesn't translate to a real instruction. It comes with a memory access + // tag to prevent itself being removed by optimizations and not block + // other instructions being optimized. + if (isa(I)) + continue; + if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) { // The call could access any memory. If that includes writes, note it. if (isModSet(MRI)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -592,7 +592,7 @@ BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end(); for (++BBI; BBI != E; ++BBI) - if (BBI->mayWriteToMemory()) + if (BBI->mayWriteToMemory() && !isa(BBI)) return false; // Check for non-address taken alloca. If not address-taken already, it isn't diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3878,9 +3878,10 @@ } } - // Skip processing debug intrinsics in InstCombine. Processing these call instructions - // consumes non-trivial amount of time and provides no value for the optimization. - if (!isa(Inst)) { + // Skip processing debug and pseudo intrinsics in InstCombine. Processing + // these call instructions consumes non-trivial amount of time and + // provides no value for the optimization. + if (!Inst->isDebugOrPseudoInst()) { InstrsForInstCombineWorklist.push_back(Inst); SeenAliasScopes.analyse(Inst); } diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll @@ -0,0 +1,66 @@ +; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +%struct.nonbonded = type { [2 x %struct.CompAtom*], [2 x %struct.CompAtomExt*], [2 x %struct.CompAtom*], [2 x %class.Vector*], [2 x %class.Vector*], [2 x i32], %class.Vector, double*, double*, %class.ComputeNonbondedWorkArrays*, %class.Pairlists*, i32, i32, double, double, i32, i32, i32, i32 } +%struct.CompAtomExt = type { i32 } +%struct.CompAtom = type { %class.Vector, float, i16, i8, i8 } +%class.Vector = type { double, double, double } +%class.ComputeNonbondedWorkArrays = type { %class.ResizeArray, %class.ResizeArray.0, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray, %class.ResizeArray.2, %class.ResizeArray.2 } +%class.ResizeArray.0 = type { i32 (...)**, %class.ResizeArrayRaw.1* } +%class.ResizeArrayRaw.1 = type <{ double*, i8*, i32, i32, i32, float, i32, [4 x i8] }> +%class.ResizeArray = type { i32 (...)**, %class.ResizeArrayRaw* } +%class.ResizeArrayRaw = type <{ i16*, i8*, i32, i32, i32, float, i32, [4 x i8] }> +%class.ResizeArray.2 = type { i32 (...)**, %class.ResizeArrayRaw.3* } +%class.ResizeArrayRaw.3 = type <{ %class.Vector*, i8*, i32, i32, i32, float, i32, [4 x i8] }> +%class.Pairlists = type { i16*, i32, i32 } + +;; Check the minPart4 and minPart assignments are merged. +; CHECK-COUNT-1: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16 +; CHECK-NOT: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16 + +define dso_local void @_ZN20ComputeNonbondedUtil9calc_pairEP9nonbonded(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 { +entry: + %savePairlists3 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 11 + %0 = load i32, i32* %savePairlists3, align 8 + %usePairlists4 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 12 + %1 = load i32, i32* %usePairlists4, align 4 + %tobool54.not = icmp eq i32 %0, 0 + br i1 %tobool54.not, label %lor.lhs.false55, label %if.end109 + +lor.lhs.false55: ; preds = %entry + %tobool56.not = icmp eq i32 %1, 0 + br i1 %tobool56.not, label %if.end109, label %if.end109.thread + +if.end109.thread: ; preds = %lor.lhs.false55 + %minPart4 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16 + %2 = load i32, i32* %minPart4, align 4 + call void @llvm.pseudoprobe(i64 -6172701105289426098, i64 2, i32 0, i64 -1) + br label %if.then138 + +if.end109: ; preds = %lor.lhs.false55, %entry + %minPart = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16 + %3 = load i32, i32* %minPart, align 4 + call void @llvm.pseudoprobe(i64 -6172701105289426098, i64 3, i32 0, i64 -1) + %tobool116.not = icmp eq i32 %1, 0 + br i1 %tobool116.not, label %if.then117, label %if.then138 + +if.then117: ; preds = %if.end109 + ret void + +if.then138: ; preds = %if.end109.thread, %if.end109 + %4 = phi i32 [ %2, %if.end109.thread ], [ %3, %if.end109 ] + %tobool139.not = icmp eq i32 %4, 0 + br i1 %tobool139.not, label %if.else147, label %if.then140 + +if.then140: ; preds = %if.then138 + ret void + +if.else147: ; preds = %if.then138 + ret void +} + +declare dso_local void @_ZN9Pairlists8addIndexEv() align 2 + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 + +attributes #0 = { inaccessiblememonly nounwind willreturn } diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-instsched.ll @@ -0,0 +1,33 @@ +; PR1075 +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin -pseudo-probe-for-profiling -O3 | FileCheck %s + +define float @foo(float %x) #0 { + %tmp1 = fmul float %x, 3.000000e+00 + %tmp3 = fmul float %x, 5.000000e+00 + %tmp5 = fmul float %x, 7.000000e+00 + %tmp7 = fmul float %x, 1.100000e+01 + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1) + %tmp10 = fadd float %tmp1, %tmp3 + %tmp12 = fadd float %tmp10, %tmp5 + %tmp14 = fadd float %tmp12, %tmp7 + ret float %tmp14 +; CHECK: mulss +; CHECK: mulss +; CHECK: addss +; CHECK: mulss +; CHECK: addss +; CHECK: mulss +; CHECK: addss +; CHECK: ret +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1 + +attributes #0 = { nounwind } +attributes #1 = { inaccessiblememonly nounwind willreturn } + +!llvm.pseudo_probe_desc = !{!0} + +!0 = !{i64 6699318081062747564, i64 4294967295, !"foo", null} + diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-peep.ll @@ -0,0 +1,29 @@ +; RUN: llc -mtriple=x86_64-- -stop-after=peephole-opt -o - %s | FileCheck %s + +define internal i32 @arc_compare() { +entry: + %0 = load i64, i64* undef, align 8 + br i1 undef, label %return, label %if.end + +if.end: ; preds = %entry +; Chek a register copy has been sinked into the compare instruction. +; CHECK: %[[#REG:]]:gr64 = IMPLICIT_DEF +; CHECK-NOT: %[[#]]:gr64 = MOV64rm %[[#REG]] +; CHECK: PSEUDO_PROBE 5116412291814990879, 3, 0, 0 +; CHECK: CMP64mr %[[#REG]], 1 + call void @llvm.pseudoprobe(i64 5116412291814990879, i64 3, i32 0, i64 -1) + %cmp4 = icmp slt i64 %0, undef + br i1 %cmp4, label %return, label %if.end6 + +if.end6: ; preds = %if.end + call void @llvm.pseudoprobe(i64 5116412291814990879, i64 5, i32 0, i64 -1) + br label %return + +return: ; preds = %if.end6, %if.end, %entry + ret i32 undef +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 + +attributes #0 = { inaccessiblememonly nounwind willreturn } diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-twoaddr.ll @@ -0,0 +1,37 @@ +; RUN: llc -stop-after=twoaddressinstruction -mtriple=x86_64-- -o - %s | FileCheck %s + + +define dso_local double @twoaddressinstruction() local_unnamed_addr { +for.end: + %0 = load i64, i64* undef, align 8 + br label %for.body14.preheader + +for.body14.preheader: ; preds = %for.end + br i1 undef, label %for.cond25.preheader.loopexit.unr-lcssa, label %for.body14.preheader.new + +for.body14.preheader.new: ; preds = %for.body14.preheader + %unroll_iter136 = and i64 %0, -4 + br label %for.body14 + +for.cond25.preheader.loopexit.unr-lcssa: ; preds = %for.body14, %for.body14.preheader + %indvars.iv127.unr = phi i64 [ 1, %for.body14.preheader ], [ %indvars.iv.next128.3, %for.body14 ] + ret double undef + +for.body14: ; preds = %for.body14, %for.body14.preheader.new + %indvars.iv127 = phi i64 [ 1, %for.body14.preheader.new ], [ %indvars.iv.next128.3, %for.body14 ] + %niter137 = phi i64 [ %unroll_iter136, %for.body14.preheader.new ], [ %niter137.nsub.3, %for.body14 ] + %indvars.iv.next128.3 = add nuw nsw i64 %indvars.iv127, 4 +; CHECK: PSEUDO_PROBE -6878943695821059507, 9, 0, 0 + call void @llvm.pseudoprobe(i64 -6878943695821059507, i64 9, i32 0, i64 -1) +;; Check an opeq form of instruction is created. +; CHECK: %[[#REG:]]:gr64_nosp = COPY killed %[[#]] +; CHECK: %[[#REG]]:gr64_nosp = nuw ADD64ri8 %[[#REG]], 4, implicit-def dead $eflags + %niter137.nsub.3 = add i64 %niter137, -4 + %niter137.ncmp.3 = icmp eq i64 %niter137.nsub.3, 0 + br i1 %niter137.ncmp.3, label %for.cond25.preheader.loopexit.unr-lcssa, label %for.body14 +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 + +attributes #0 = { inaccessiblememonly nounwind willreturn } \ No newline at end of file