diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1639,6 +1639,9 @@ /// false, but it shouldn't matter what it returns anyway. bool hasArmWideBranch(bool Thumb) const; + /// \returns whether the target is BPF. + bool isBPFTarget() const; + /// @} private: @@ -2003,6 +2006,7 @@ virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; virtual bool hasArmWideBranch(bool Thumb) const = 0; + virtual bool isBPFTarget() const = 0; }; template @@ -2696,6 +2700,10 @@ bool hasArmWideBranch(bool Thumb) const override { return Impl.hasArmWideBranch(Thumb); } + + bool isBPFTarget() const override { + return Impl.isBPFTarget(); + } }; template diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -880,6 +880,7 @@ } bool hasArmWideBranch(bool) const { return false; } + bool isBPFTarget() const { return false; } protected: // Obtain the minimum required size to hold the value (without the sign) diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -169,10 +169,10 @@ /// \p AllowSpeculation is whether values should be hoisted even if they are not /// guaranteed to execute in the loop, but are safe to speculatively execute. bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, - AssumptionCache *, TargetLibraryInfo *, Loop *, - MemorySSAUpdater &, ScalarEvolution *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, - bool AllowSpeculation); + AssumptionCache *, TargetLibraryInfo *, TargetTransformInfo *, + Loop *, MemorySSAUpdater &, ScalarEvolution *, + ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, + OptimizationRemarkEmitter *, bool, bool AllowSpeculation); /// Return true if the induction variable \p IV in a Loop whose latch is /// \p LatchBlock would become dead if the exit test \p Cond were removed. diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1187,6 +1187,10 @@ return TTIImpl->hasArmWideBranch(Thumb); } +bool TargetTransformInfo::isBPFTarget() const { + return TTIImpl->isBPFTarget(); +} + bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { return TTIImpl->shouldExpandReduction(II); } diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h --- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -77,6 +77,7 @@ return Options; } + bool isBPFTarget() const { return true; } }; } // end namespace llvm diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -454,9 +454,9 @@ MSSAU, &SafetyInfo, Flags, ORE); Flags.setIsSink(false); if (Preheader) - Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L, - MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode, - LicmAllowSpeculation); + Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, + TTI, L, MSSAU, SE, &SafetyInfo, Flags, ORE, + LoopNestMode, LicmAllowSpeculation); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -859,9 +859,9 @@ /// bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, - TargetLibraryInfo *TLI, Loop *CurLoop, - MemorySSAUpdater &MSSAU, ScalarEvolution *SE, - ICFLoopSafetyInfo *SafetyInfo, + TargetLibraryInfo *TLI, TargetTransformInfo *TTI, + Loop *CurLoop, MemorySSAUpdater &MSSAU, + ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE, bool LoopNestMode, bool AllowSpeculation) { @@ -987,7 +987,7 @@ // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them // into (x < min(INV1, INV2)), and hoisting the invariant part of this // expression out of the loop. - if (hoistMinMax(I, *CurLoop, *SafetyInfo, MSSAU)) { + if (!TTI->isBPFTarget() && hoistMinMax(I, *CurLoop, *SafetyInfo, MSSAU)) { ++NumMinMaxHoisted; Changed = true; continue; diff --git a/llvm/test/CodeGen/BPF/licm-maxmin.ll b/llvm/test/CodeGen/BPF/licm-maxmin.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/licm-maxmin.ll @@ -0,0 +1,86 @@ +; RUN: opt -O2 -mtriple=bpf-pc-linux -mcpu=v3 -S < %s | FileCheck %s +; source: +; unsigned foo(unsigned); +; unsigned g; +; void bar(unsigned u) { +; unsigned i; +; for (i = 0; i < 5 && i < u; i++) +; g += foo(i); +; } +; Compilation flag: +; clang -target bpf -O2 -Xclang -disable-llvm-passes -S -emit-llvm t.c -o t.ll + +@g = dso_local global i32 0, align 4 + +; Function Attrs: nounwind +define dso_local void @bar(i32 noundef %u) #0 { +entry: + %u.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %u, ptr %u.addr, align 4, !tbaa !3 + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #3 + store i32 0, ptr %i, align 4, !tbaa !3 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4, !tbaa !3 + %cmp = icmp ult i32 %0, 5 + br i1 %cmp, label %land.rhs, label %land.end + +land.rhs: ; preds = %for.cond + %1 = load i32, ptr %i, align 4, !tbaa !3 + %2 = load i32, ptr %u.addr, align 4, !tbaa !3 + %cmp1 = icmp ult i32 %1, %2 + br label %land.end + +land.end: ; preds = %land.rhs, %for.cond + %3 = phi i1 [ false, %for.cond ], [ %cmp1, %land.rhs ] + br i1 %3, label %for.body, label %for.end + +for.body: ; preds = %land.end + %4 = load i32, ptr %i, align 4, !tbaa !3 + %call = call i32 @foo(i32 noundef %4) + %5 = load i32, ptr @g, align 4, !tbaa !3 + %add = add i32 %5, %call + store i32 %add, ptr @g, align 4, !tbaa !3 + br label %for.inc + +for.inc: ; preds = %for.body + %6 = load i32, ptr %i, align 4, !tbaa !3 + %inc = add i32 %6, 1 + store i32 %inc, ptr %i, align 4, !tbaa !3 + br label %for.cond, !llvm.loop !7 + +for.end: ; preds = %land.end + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #3 + ret void +} + +; CHECK: define dso_local void @bar(i32 noundef [[U:%.*]]) +; CHECK-NOT: tail call i32 @llvm.umin.i32(i32 [[U]], i32 5) + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +declare dso_local i32 @foo(i32 noundef) #2 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v3" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v3" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git b337735390659a7aa79bcefd1bebb89d7f278194)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = distinct !{!7, !8} +!8 = !{!"llvm.loop.mustprogress"}