diff --git a/llvm/include/llvm/Transforms/Scalar/LICM.h b/llvm/include/llvm/Transforms/Scalar/LICM.h --- a/llvm/include/llvm/Transforms/Scalar/LICM.h +++ b/llvm/include/llvm/Transforms/Scalar/LICM.h @@ -42,6 +42,7 @@ class Loop; class LoopNest; +extern cl::opt EnableMinMaxHoisting; extern cl::opt SetLicmMssaOptCap; extern cl::opt SetLicmMssaNoAccForPromotionCap; diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h --- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -18,6 +18,7 @@ #include "BPFTargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Transforms/Scalar/LICM.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" namespace llvm { @@ -35,7 +36,12 @@ public: explicit BPFTTIImpl(const BPFTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), - TLI(ST->getTargetLowering()) {} + TLI(ST->getTargetLowering()) { + + // Disable LICM MinMaxHoisting optimization. This optimization may cause + // verification failure for linux kernel 6.3 or earlier versions. + EnableMinMaxHoisting = false; + } int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) { if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt --- a/llvm/lib/Target/BPF/CMakeLists.txt +++ b/llvm/lib/Target/BPF/CMakeLists.txt @@ -45,6 +45,7 @@ BPFInfo IPO Scalar + ScalarOpts SelectionDAG Support Target diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -124,6 +124,11 @@ cl::desc("Max num uses visited for identifying load " "invariance in loop using invariant start (default = 8)")); +/// MinMaxHoisting optimization is enabled by default. +cl::opt llvm::EnableMinMaxHoisting( + "enable-licm-minmax-hoisting", cl::Hidden, cl::init(true), + cl::desc("Enable MinMaxHoisting optimization in LICM pass")); + // Experimental option to allow imprecision in LICM in pathological cases, in // exchange for faster compile. This is to be removed if MemorySSA starts to // address the same issue. LICM calls MemorySSAWalker's @@ -987,7 +992,7 @@ // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them // into (x < min(INV1, INV2)), and hoisting the invariant part of this // expression out of the loop. - if (hoistMinMax(I, *CurLoop, *SafetyInfo, MSSAU)) { + if (EnableMinMaxHoisting && hoistMinMax(I, *CurLoop, *SafetyInfo, MSSAU)) { ++NumMinMaxHoisted; Changed = true; continue; diff --git a/llvm/test/CodeGen/BPF/licm-minmax-hoisted.ll b/llvm/test/CodeGen/BPF/licm-minmax-hoisted.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/licm-minmax-hoisted.ll @@ -0,0 +1,86 @@ +; RUN: opt -O2 -mtriple=bpf-pc-linux -mcpu=v3 -S < %s | FileCheck %s +; source: +; unsigned foo(unsigned); +; unsigned g; +; void bar(unsigned u) { +; unsigned i; +; for (i = 0; i < 5 && i < u; i++) +; g += foo(i); +; } +; Compilation flag: +; clang -target bpf -O2 -Xclang -disable-llvm-passes -S -emit-llvm t.c -o t.ll + +@g = dso_local global i32 0, align 4 + +; Function Attrs: nounwind +define dso_local void @bar(i32 noundef %u) #0 { +entry: + %u.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %u, ptr %u.addr, align 4, !tbaa !3 + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #3 + store i32 0, ptr %i, align 4, !tbaa !3 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4, !tbaa !3 + %cmp = icmp ult i32 %0, 5 + br i1 %cmp, label %land.rhs, label %land.end + +land.rhs: ; preds = %for.cond + %1 = load i32, ptr %i, align 4, !tbaa !3 + %2 = load i32, ptr %u.addr, align 4, !tbaa !3 + %cmp1 = icmp ult i32 %1, %2 + br label %land.end + +land.end: ; preds = %land.rhs, %for.cond + %3 = phi i1 [ false, %for.cond ], [ %cmp1, %land.rhs ] + br i1 %3, label %for.body, label %for.end + +for.body: ; preds = %land.end + %4 = load i32, ptr %i, align 4, !tbaa !3 + %call = call i32 @foo(i32 noundef %4) + %5 = load i32, ptr @g, align 4, !tbaa !3 + %add = add i32 %5, %call + store i32 %add, ptr @g, align 4, !tbaa !3 + br label %for.inc + +for.inc: ; preds = %for.body + %6 = load i32, ptr %i, align 4, !tbaa !3 + %inc = add i32 %6, 1 + store i32 %inc, ptr %i, align 4, !tbaa !3 + br label %for.cond, !llvm.loop !7 + +for.end: ; preds = %land.end + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #3 + ret void +} + +; CHECK: define dso_local void @bar(i32 noundef [[U:%.*]]) +; CHECK-NOT: tail call i32 @llvm.umin.i32(i32 [[U]], i32 5) + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +declare dso_local i32 @foo(i32 noundef) #2 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v3" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="v3" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git b337735390659a7aa79bcefd1bebb89d7f278194)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = distinct !{!7, !8} +!8 = !{!"llvm.loop.mustprogress"}