diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h --- a/llvm/lib/Target/BPF/BPF.h +++ b/llvm/lib/Target/BPF/BPF.h @@ -21,6 +21,7 @@ FunctionPass *createBPFAbstractMemberAccess(BPFTargetMachine *TM); FunctionPass *createBPFPreserveDIType(); +FunctionPass *createBPFIRPeephole(); FunctionPass *createBPFISelDag(BPFTargetMachine &TM); FunctionPass *createBPFMISimplifyPatchablePass(); FunctionPass *createBPFMIPeepholePass(); @@ -33,6 +34,7 @@ void initializeBPFAbstractMemberAccessLegacyPassPass(PassRegistry &); void initializeBPFPreserveDITypePass(PassRegistry&); +void initializeBPFIRPeepholePass(PassRegistry&); void initializeBPFMISimplifyPatchablePass(PassRegistry&); void initializeBPFMIPeepholePass(PassRegistry&); void initializeBPFMIPeepholeTruncElimPass(PassRegistry&); @@ -57,6 +59,13 @@ static bool isRequired() { return true; } }; +class BPFIRPeepholePass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool isRequired() { return true; } +}; + class BPFAdjustOptPass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); diff --git a/llvm/lib/Target/BPF/BPFIRPeephole.cpp b/llvm/lib/Target/BPF/BPFIRPeephole.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/BPF/BPFIRPeephole.cpp @@ -0,0 +1,115 @@ +//===------------ BPFIRPeephole.cpp - IR Peephole Transformation ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// IR level peephole optimization, specifically removing @llvm.stacksave() and +// @llvm.stackrestore(). +// +//===----------------------------------------------------------------------===// + +#include "BPF.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" + +#define DEBUG_TYPE "bpf-ir-peephole" + +using namespace llvm; + +namespace { + +static bool BPFIRPeepholeImpl(Function &F) { + LLVM_DEBUG(dbgs() << "******** BPF IR Peephole ********\n"); + + Instruction *ToErase = nullptr; + for (auto &BB : F) { + for (auto &I : BB) { + // The following code pattern is handled: + // %3 = call i8* @llvm.stacksave() + // store i8* %3, i8** %saved_stack, align 8 + // ... + // %4 = load i8*, i8** %saved_stack, align 8 + // call void @llvm.stackrestore(i8* %4) + // ... + // The goal is to remove the above four instructions, + // so we won't have instructions with r11 (stack pointer) + // if eventually there is no variable length stack allocation. + // InstrCombine also tries to remove the above instructions, + // if it is proven safe (constant alloca etc.), but depending + // on code pattern, it may still miss some. + // + // With unconditionally removing these instructions, if alloca is + // constant, we are okay then. Otherwise, SelectionDag will complain + // since BPF does not support dynamic allocation yet. + if (ToErase) { + ToErase->eraseFromParent(); + ToErase = nullptr; + } + + if (auto *Call = dyn_cast(&I)) { + if (auto *GV = dyn_cast(Call->getCalledOperand())) { + if (!GV->getName().equals("llvm.stacksave")) + continue; + if (!Call->hasOneUser()) + continue; + auto *Inst = cast(*Call->user_begin()); + LLVM_DEBUG(dbgs() << "Remove:"; I.dump()); + LLVM_DEBUG(dbgs() << "Remove:"; Inst->dump(); dbgs() << '\n'); + Inst->eraseFromParent(); + ToErase = &I; + } + continue; + } + + if (auto *LD = dyn_cast(&I)) { + if (!LD->hasOneUser()) + continue; + auto *Call = dyn_cast(*LD->user_begin()); + if (!Call) + continue; + auto *GV = dyn_cast(Call->getCalledOperand()); + if (!GV) + continue; + if (!GV->getName().equals("llvm.stackrestore")) + continue; + LLVM_DEBUG(dbgs() << "Remove:"; I.dump()); + LLVM_DEBUG(dbgs() << "Remove:"; Call->dump(); dbgs() << '\n'); + Call->eraseFromParent(); + ToErase = &I; + } + } + } + + return false; +} + +class BPFIRPeephole final : public FunctionPass { + bool runOnFunction(Function &F) override; + +public: + static char ID; + BPFIRPeephole() : FunctionPass(ID) {} +}; +} // End anonymous namespace + +char BPFIRPeephole::ID = 0; +INITIALIZE_PASS(BPFIRPeephole, DEBUG_TYPE, "BPF IR Peephole", false, false) + +FunctionPass *llvm::createBPFIRPeephole() { return new BPFIRPeephole(); } + +bool BPFIRPeephole::runOnFunction(Function &F) { return BPFIRPeepholeImpl(F); } + +PreservedAnalyses BPFIRPeepholePass::run(Function &F, + FunctionAnalysisManager &AM) { + return BPFIRPeepholeImpl(F) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -43,6 +43,7 @@ PassRegistry &PR = *PassRegistry::getPassRegistry(); initializeBPFAbstractMemberAccessLegacyPassPass(PR); initializeBPFPreserveDITypePass(PR); + initializeBPFIRPeepholePass(PR); initializeBPFAdjustOptPass(PR); initializeBPFCheckAndAdjustIRPass(PR); initializeBPFMIPeepholePass(PR); @@ -107,6 +108,7 @@ [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) { PM.add(createBPFAbstractMemberAccess(this)); PM.add(createBPFPreserveDIType()); + PM.add(createBPFIRPeephole()); }); Builder.addExtension( @@ -128,6 +130,7 @@ FunctionPassManager FPM; FPM.addPass(BPFAbstractMemberAccessPass(this)); FPM.addPass(BPFPreserveDITypePass()); + FPM.addPass(BPFIRPeepholePass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); }); PB.registerPeepholeEPCallback([=](FunctionPassManager &FPM, diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt --- a/llvm/lib/Target/BPF/CMakeLists.txt +++ b/llvm/lib/Target/BPF/CMakeLists.txt @@ -21,6 +21,7 @@ BPFCheckAndAdjustIR.cpp BPFFrameLowering.cpp BPFInstrInfo.cpp + BPFIRPeephole.cpp BPFISelDAGToDAG.cpp BPFISelLowering.cpp BPFMCInstLower.cpp diff --git a/llvm/test/CodeGen/BPF/vla.ll b/llvm/test/CodeGen/BPF/vla.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/vla.ll @@ -0,0 +1,115 @@ +; RUN: opt --bpf-ir-peephole -mtriple=bpf-pc-linux -S %s | FileCheck %s +; Source: +; #define AA 40 +; struct t { +; char a[20]; +; }; +; void foo(void *); +; +; int test1() { +; const int a = 8; +; char tmp[AA + sizeof(struct t) + a]; +; foo(tmp); +; return 0; +; } +; +; int test2(int b) { +; const int a = 8; +; char tmp[a + b]; +; foo(tmp); +; return 0; +; } +; Compilation flag: +; clang -target bpf -O2 -S -emit-llvm t.c -Xclang -disable-llvm-passes + +source_filename = "t.c" +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "bpf" + +; Function Attrs: nounwind +define dso_local i32 @test1() #0 { +entry: + %a = alloca i32, align 4 + %saved_stack = alloca i8*, align 8 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #4 + store i32 8, i32* %a, align 4, !tbaa !3 + %1 = call i8* @llvm.stacksave() + store i8* %1, i8** %saved_stack, align 8 + %vla = alloca i8, i64 68, align 1 + call void @foo(i8* %vla) + %2 = load i8*, i8** %saved_stack, align 8 + call void @llvm.stackrestore(i8* %2) + %3 = bitcast i32* %a to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %3) #4 + ret i32 0 +} + +; CHECK: define dso_local i32 @test1 +; CHECK-NOT: %[[#]] = call i8* @llvm.stacksave() +; CHECK-NOT: store i8* %[[#]], i8** %saved_stack, align 8 +; CHECK-NOT: %[[#]] = load i8*, i8** %saved_stack, align 8 +; CHECK-NOT: call void @llvm.stackrestore(i8* %[[#]]) + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: nofree nosync nounwind willreturn +declare i8* @llvm.stacksave() #2 + +declare dso_local void @foo(i8*) #3 + +; Function Attrs: nofree nosync nounwind willreturn +declare void @llvm.stackrestore(i8*) #2 + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: nounwind +define dso_local i32 @test2(i32 %b) #0 { +entry: + %b.addr = alloca i32, align 4 + %a = alloca i32, align 4 + %saved_stack = alloca i8*, align 8 + %__vla_expr0 = alloca i64, align 8 + store i32 %b, i32* %b.addr, align 4, !tbaa !3 + %0 = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #4 + store i32 8, i32* %a, align 4, !tbaa !3 + %1 = load i32, i32* %b.addr, align 4, !tbaa !3 + %add = add nsw i32 8, %1 + %2 = zext i32 %add to i64 + %3 = call i8* @llvm.stacksave() + store i8* %3, i8** %saved_stack, align 8 + %vla = alloca i8, i64 %2, align 1 + store i64 %2, i64* %__vla_expr0, align 8 + call void @foo(i8* %vla) + %4 = load i8*, i8** %saved_stack, align 8 + call void @llvm.stackrestore(i8* %4) + %5 = bitcast i32* %a to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %5) #4 + ret i32 0 +} + +; CHECK: define dso_local i32 @test2 +; CHECK-NOT: %[[#]] = call i8* @llvm.stacksave() +; CHECK-NOT: store i8* %[[#]], i8** %saved_stack, align 8 +; CHECK-NOT: %[[#]] = load i8*, i8** %saved_stack, align 8 +; CHECK-NOT: call void @llvm.stackrestore(i8* %[[#]]) + +attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { argmemonly nofree nosync nounwind willreturn } +attributes #2 = { nofree nosync nounwind willreturn } +attributes #3 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 14.0.0 (https://github.com/llvm/llvm-project.git 64c5d5c671fb5b5f25c464652a4eec2cf743af0d)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"}