diff --git a/clang/test/CodeGen/bpf-hoist-getelementptr-load-1.c b/clang/test/CodeGen/bpf-hoist-getelementptr-load-1.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/bpf-hoist-getelementptr-load-1.c @@ -0,0 +1,121 @@ +// REQUIRES: bpf-registered-target +// RUN: %clang -mllvm --print-after -mllvm codegenprepare \ +// RUN: -target bpf -S -g -O2 -o /dev/null %s 2>&1 \ +// RUN: | FileCheck %s + +// This test is related to the issue described in the following thread: +// +// https://lore.kernel.org/bpf/CAA-VZPmxh8o8EBcJ=m-DH4ytcxDFmo0JKsm1p1gf40kS0CE3NQ@mail.gmail.com/T/#m4b9ce2ce73b34f34172328f975235fc6f19841b6 +// +// See lib/Target/BPF/BPFHoistGetElementPtrLoadsPass.cpp for additional details. +// +// Here is the detailed description of the issue. The initial +// (simplified) IR for function _getsockopt looks as follows: +// +// define dso_local i32 @_getsockopt(ptr noundef %ctx) +// ... +// sw.bb: +// %1 = load ptr, ptr %ctx ;; +// %family = getelementptr inbounds %struct.bpf_sock, ptr %1 ;; access to ctx->sk->family +// %2 = load i32, ptr %family ;; (a) +// %call = call i32 @f(i32 noundef %2) +// br label %sw.epilog +// +// sw.bb1: +// %optlen = getelementptr inbounds %struct.bpf_sockopt, ptr %ctx ;; access to ctx->optlen +// %3 = load i32, ptr %optlen ;; (b) +// %call2 = call i32 @f(i32 noundef %3) +// br label %sw.epilog +// +// sw.epilog: +// ... +// +// W/o additional code motion machine code for field accesses would +// look as follows: +// +// ... +// $r1 = LDW $r1, 4 ;; for ctx->sk->family +// ... +// $r1 = LDW $r1, 12 ;; for ctx->optlen +// +// However, SimplifyCFGPass may rewrite the above IR separating +// getelementptr and load instructions as shown below: +// +// ... +// sw.bb: +// %1 = load ptr, ptr %ctx +// %family = getelementptr inbounds %struct.bpf_sock, ptr %1 +// br label %sw.epilog.sink.split +// +// sw.bb1: +// %optlen = getelementptr inbounds %struct.bpf_sockopt, ptr %ctx +// br label %sw.epilog.sink.split +// +// sw.epilog.sink.split: +// %optlen.sink = phi ptr [ %optlen, %sw.bb1 ], [ %family, %sw.bb ] +// %2 = load i32, ptr %optlen.sink ;; (c) +// %call2 = call fastcc i32 @f(i32 noundef %2) +// br label %sw.epilog +// +// Note that load instructions (a) and (b) are replaced by a single +// load instruction (c) that gets it's value from a PHI node. The two +// calls to @f are also replaced by a single call that uses result of +// (c). This is done by a code sinking part of the +// SimplifyCFGPass. This leads to the following machine code: +// +// bb.2.sw.bb: +// $r1 = LDD $r1, 0 +// $r1 = ADD_ri $r1, 4 +// JMP %bb.4 +// +// bb.3.sw.bb1: +// $r1 = ADD_ri $r1, 12 +// +// bb.4.sw.epilog.sink.split: +// $r1 = LDW $r1, 0 +// JAL @f +// +// Such access to the struct bpf_sockopt fields is not allowed by BPF +// kernel verifier (BPFHoistGetElementPtrLoadsPass.cpp has details on +// this). +// +// This test case verifies that BPFHoistGetElementPtrLoadsPass hoists +// the load instructions back to getelementptr to avoid the pattern +// forbidden by verifier. + +struct bpf_sock { + int bound_dev_if; + int family; +}; + +struct bpf_sockopt { + struct bpf_sock *sk; + int level; + int optlen; +}; + +__attribute__((noinline)) +static int f(int x) { + return x + 1; +} + +int _getsockopt(struct bpf_sockopt *ctx) +{ + unsigned g = 0; + switch (ctx->level) { +// CHECK: %level = getelementptr inbounds %struct.bpf_sockopt, ptr %ctx +// CHECK: [[r0:%[0-9]+]] = load i32, ptr %level + case 10: + g = f(ctx->sk->family); + break; +// CHECK: [[r2:%[0-9]+]] = load ptr, ptr %ctx +// CHECK: %family = getelementptr inbounds %struct.bpf_sock, ptr [[r2]] +// CHECK: [[r3:%[0-9]+]] = load i32, ptr %family + case 20: + g = f(ctx->optlen); + break; +// CHECK: %optlen = getelementptr inbounds %struct.bpf_sockopt, ptr %ctx +// CHECK: [[r5:%[0-9]+]] = load i32, ptr %optlen + } + return g % 2; +} diff --git a/clang/test/CodeGen/bpf-hoist-getelementptr-load-2.c b/clang/test/CodeGen/bpf-hoist-getelementptr-load-2.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/bpf-hoist-getelementptr-load-2.c @@ -0,0 +1,53 @@ +// REQUIRES: bpf-registered-target +// RUN: %clang -mllvm --print-after -mllvm codegenprepare \ +// RUN: -target bpf -S -g -O2 -o /dev/null %s 2>&1 \ +// RUN: | FileCheck %s + +// This test is related to the issue described in the following thread: +// +// https://lore.kernel.org/bpf/CAA-VZPmxh8o8EBcJ=m-DH4ytcxDFmo0JKsm1p1gf40kS0CE3NQ@mail.gmail.com/T/#m4b9ce2ce73b34f34172328f975235fc6f19841b6 +// +// Verifies that loads that follow getelementptr are not sunk to +// another BB and this does not interfere with the +// __attribute__((preserve_access_index)). +// +// For additional details refer to: +// - lib/Target/BPF/BPFHoistGetElementPtrLoadsPass.cpp +// - lib/Target/BPF/BPFAbstractMemberAccess.cpp +// - clang/test/CodeGen/bpf-hoist-getelementptr-load-1.c + +#define __reloc__ __attribute__((preserve_access_index)) + +struct bpf_sock_ops { + int op; + int bpf_sock_ops_cb_flags; +} __reloc__; + +__attribute__((noinline)) +static int f(int x) { + return x + 1; +} + +int g; + +__attribute__((section("sockops"))) +int h(struct bpf_sock_ops *i) { + switch (i->op) { +// CHECK: [[r0:%[0-9]+]] = load i64, ptr @"llvm.bpf_sock_ops:0:0$0:0" +// CHECK: [[r1:%[0-9]+]] = getelementptr i8, ptr %i, i64 [[r0]] +// CHECK: [[r2:%[0-9]+]] = load i32, ptr [[r1]] + case 10: + g = f(i->bpf_sock_ops_cb_flags); + break; +// CHECK [[r4:%[0-9]+]] = load i64, ptr @"llvm.bpf_sock_ops:0:4$0:1" +// CHECK [[r5:%[0-9]+]] = getelementptr i8, ptr %i, i64 [[r4]] +// CHECK [[r6:%[0-9]+]] = load i32, ptr [[r5]] + case 20: + g = f(i->bpf_sock_ops_cb_flags); + break; +// CHECK [[r8:%[0-9]+]] = load i64, ptr @"llvm.bpf_sock_ops:0:4$0:1" +// CHECK [[r9:%[0-9]+]] = getelementptr i8, ptr %i, i64 [[r8]] +// CHECK [[r10:%[0-9]+]] = load i32, ptr [[r9]] + } + return 0; +} diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h --- a/llvm/lib/Target/BPF/BPF.h +++ b/llvm/lib/Target/BPF/BPF.h @@ -30,6 +30,7 @@ FunctionPass *createBPFMIPeepholeTruncElimPass(); FunctionPass *createBPFMIPreEmitPeepholePass(); FunctionPass *createBPFMIPreEmitCheckingPass(); +FunctionPass *createBPFHoistGetElementPtrLoadsPass(); void initializeBPFAdjustOptPass(PassRegistry&); void initializeBPFCheckAndAdjustIRPass(PassRegistry&); @@ -42,6 +43,7 @@ void initializeBPFMIPeepholeTruncElimPass(PassRegistry&); void initializeBPFMIPreEmitPeepholePass(PassRegistry&); void initializeBPFMIPreEmitCheckingPass(PassRegistry&); +void initializeBPFHoistGetElementPtrLoadsLegacyPassPass(PassRegistry &); class BPFAbstractMemberAccessPass : public PassInfoMixin { @@ -72,6 +74,7 @@ public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; + } // namespace llvm #endif diff --git a/llvm/lib/Target/BPF/BPFHoistGetElementPtrLoadsPass.cpp b/llvm/lib/Target/BPF/BPFHoistGetElementPtrLoadsPass.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/BPF/BPFHoistGetElementPtrLoadsPass.cpp @@ -0,0 +1,400 @@ +//===------ BPFHoistGetElementPtrLoadsPass.cpp ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// BPF verifier limits access patterns allowed for certain data +// types. E.g. struct __sk_buff and struct bpf_sock_ops. For these +// types only BASE + static-offset memory loads are allowed. +// +// This is so because offsets of the fields of these structures do not +// match real offsets in the running kernel. During BPF program +// load/verification loads and stores to the fields of these types are +// rewritten so that offsets match real offsets. For this rewrite to +// happen static offsets have to be encoded in the instructions. +// +// See kernel/bpf/verifier.c:convert_ctx_access function in the Linux +// kernel source tree for details. +// +// During instruction selection phase the following sequence of +// instructions: +// +// %x = getelementptr %ptr, %offset +// %y = load %x +// +// Is translated as a single load instruction with embedded offset, +// e.g. LDW %ptr, %offset, which matches access pattern necessary for +// the restricted set of types described above (when %offset is static). +// +// However, several optimization passes might sink load instruction or +// hoist getelementptr instruction so that the instructions are no +// longer in sequence. Examples of such passes are SimplifyCFGPass and +// InstCombinePass. +// +// The following test cases illustrate this issue: +// - clang/test/CodeGen/bpf-hoist-getelementptr-load-1.c +// - clang/test/CodeGen/bpf-hoist-getelementptr-load-2.c +// +// The BPFHoistGetElementPtrLoadsPass undoes sinking of the load / store +// instructions relative to the getelementptr for a specific pattern. +// +// The recognized pattern looks as follows: +// +// +--------------------+ +--------------------+ +----------+ +----------+ +// | ... | | ... | | ... | | ... | +// | %a = getelementptr | | %b = getelementptr | | %c = foo | | %d = bar | +// | ... | | ... | | ... | | ... | +// +--------------------+ +--------------------+ +----------+ +----------+ +// \_________________|_____________/_____________/ +// | +// V +// +-------------------------+ +// | ... | +// | %p = phi %a, %b, %c, %d | +// | ... | +// | %l = load %p | +// | ... | +// +-------------------------+ +// +// And is rewritten as: +// +// +--------------------+ +--------------------+ +----------+ +----------+ +// | ... | | ... | | ... | | ... | +// | %a = getelementptr | | %b = getelementptr | | %c = foo | | %d = bar | +// | %a1 = load %a | | %b1 = load %b | | ... | | ... | +// | ... | | ... | +----------+ +----------+ +// +--------------------+ +--------------------+ \_________/ +// | | | +// | | V +// | | +------------------+ +// | | | %t1 = phi %c, %d | +// | | | %t = load t1 | +// | | +------------------+ +// \_________________|_______________________/ +// | +// V +// +-------------------------+ +// | ... | +// | %l = phi %a1, %b1, %t | +// | ... | +// +-------------------------+ +// +// The following limitations apply: +// +// - instructions preceding hoisted load or store should satisfy +// isSafeToSpeculativelyExecute or to be hoist-able loads / stores +// themselves; +// +// - instructions following getelementptr should satisfy +// isSafeToSpeculativelyExecute +// +// - BB containing getelementptr should end with unconditional jump +// +// - value operand of the hoisted store should dominate the +// getelementptr to which store is hoisted + +#include "BPF.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GenericDomTreeConstruction.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/CodeMoverUtils.h" +#include + +#define DEBUG_TYPE "bpf-hoist-getelementptr-loads" + +using namespace llvm; + +static cl::opt DisableBPFHoistGetElementPtrLoads + ("bpf-disable-hoist-getelementptr-loads", + cl::Hidden, + cl::desc("BPF: Disable hoisting of load instructions to getelementptr instructions"), + cl::init(false)); + +// Check if it is safe to move some instruction after StartInsn, +// safety conditions are: +// - all instructions till the end of the StartInsn BB satisfy +// isSafeToSpeculativelyExecute; +// - StartInsn BB is terminated with unconditional jump. +// Use KnownSafe as a cache to avoid traversal of the same +// instructions multiple times. +static bool isSafeTillEndOfBB(Instruction *StartInsn, + std::map &KnownSafe) { + auto SafeTillEndOfBB = [&](Value *Val) { + auto *Insn = StartInsn; + while (!Insn->isTerminator()) { + auto Cached = KnownSafe.find(Insn); + if (Cached != KnownSafe.end()) + return Cached->second; + + if (!isSafeToSpeculativelyExecute(Insn)) + return false; + + Insn = Insn->getNextNode(); + } + + auto *Br = dyn_cast(Insn); + if (!Br || Br->isConditional()) + return false; + + return true; + }; + + auto Result = SafeTillEndOfBB(StartInsn); + KnownSafe[StartInsn] = Result; + + return Result; +} + +// Phi is interesting if it has some GEPs as it's incoming values and +// those GEPs satisfy isSafeTillEndOfBB. +static bool isInterestingPhi(PHINode *Phi, + std::map &KnownSafe) { + bool HasGEPs = false; + + for (auto &Incoming : Phi->incoming_values()) { + if (auto *GEP = dyn_cast(Incoming)) { + if (!isSafeTillEndOfBB(GEP->getNextNode(), KnownSafe)) + return false; + HasGEPs = true; + } + } + + return HasGEPs; +} + +static bool isInterestingLoad(Instruction *Insn, + std::map &KnownSafe) { + auto *Load = dyn_cast(Insn); + if (!Load) + return false; + + auto *Phi = dyn_cast(Load->getPointerOperand()); + if (!Phi) + return false; + + if (Phi->getParent() != Load->getParent()) + return false; + + return isInterestingPhi(Phi, KnownSafe); +} + +static bool isInterestingStore(Instruction *Insn, + std::map &KnownSafe, + DominatorTree &DT) { + auto *Store = dyn_cast(Insn); + if (!Store) + return false; + + auto *Phi = dyn_cast(Store->getPointerOperand()); + if (!Phi) + return false; + + if (Phi->getParent() != Store->getParent()) + return false; + + if (!isInterestingPhi(Phi, KnownSafe)) + return false; + + auto *StoredValue = Store->getValueOperand(); + auto *StoredValueInsn = dyn_cast(StoredValue); + if (isa(StoredValue) || !StoredValueInsn) + return true; + + for (auto &Incoming : Phi->incoming_values()) + if (auto *GEP = dyn_cast(Incoming.get())) + if (!DT.dominates(StoredValueInsn, GEP)) + return false; + + return true; +} + +static std::vector collectInterestingInsns(Function &F, + DominatorTree &DT) { + std::vector InterestingInsns; + std::map KnownSafe; + + for (auto &BB : F.getBasicBlockList()) { + for (auto &Insn : BB.getInstList()) { + if (isa(&Insn)) + continue; + + if (isInterestingLoad(&Insn, KnownSafe)) { + InterestingInsns.push_back(&Insn); + continue; + } + + if (isInterestingStore(&Insn, KnownSafe, DT)) { + InterestingInsns.push_back(&Insn); + continue; + } + + // Stop BB traversal if an instruction that could not be moved is reached. + if (!isSafeToSpeculativelyExecute(&Insn)) + break; + } + } + + return InterestingInsns; +} + +static Instruction *getLastNonTerminatorInsn(BasicBlock *BB) { + auto &IL = BB->getInstList(); + for (auto Iter = IL.rbegin(); Iter != IL.rend(); ++Iter) + if (!Iter->isTerminator()) + return &(*Iter); + return nullptr; +} + +static void insertLast(BasicBlock *BB, Instruction *Insn) { + IRBuilder<> Builder(BB, BB->getFirstInsertionPt()); + if (auto *LastInsn = getLastNonTerminatorInsn(BB)) + Builder.SetInsertPoint(LastInsn); + else + Builder.Insert(Insn); +} + +static void insertAfterOrAtEnd(Instruction *What, + Value *After, + BasicBlock *AtEnd) { + if (auto *InputInsn = dyn_cast(After)) + What->insertAfter(InputInsn); + else + insertLast(AtEnd, What); +} + +static void hoistLoad(LoadInst *Load) { + auto *Phi = cast(Load->getPointerOperand()); + auto *NewPhi = PHINode::Create(Load->getType(), + Phi->getNumOperands(), Phi->getName(), Phi); + for (unsigned I = 0; I < Phi->getNumIncomingValues(); ++I) { + auto *Input = Phi->getIncomingValue(I); + auto *Block = Phi->getIncomingBlock(I); + auto *LoadCopy = cast(Load->clone()); + insertAfterOrAtEnd(LoadCopy, Input, Block); + LoadCopy->setOperand(0, Input); + NewPhi->addIncoming(LoadCopy, Block); + } + Load->replaceAllUsesWith(NewPhi); + Load->eraseFromParent(); + if (Phi->getNumUses() == 0) + Phi->eraseFromParent(); +} + +static void hoistStore(StoreInst *Store) { + auto *Phi = cast(Store->getPointerOperand()); + for (unsigned I = 0; I < Phi->getNumIncomingValues(); ++I) { + auto *Input = Phi->getIncomingValue(I); + auto *Block = Phi->getIncomingBlock(I); + auto *StoreCopy = cast(Store->clone()); + insertAfterOrAtEnd(StoreCopy, Input, Block); + StoreCopy->setOperand(1, Input); + } + Store->eraseFromParent(); + if (Phi->getNumUses() == 0) + Phi->eraseFromParent(); +} + +// Create a proxy BB for load PHI's incoming values that are not +// getelementptr's. The load would be hoisted into this BB. +// BB is created only if such values exist. +static void maybeMakeProxyBB(Instruction *Insn, PHINode *Phi) { + SmallVector ProxyPreds; + + for (unsigned I = 0; I < Phi->getNumIncomingValues(); ++I) { + auto *Input = Phi->getIncomingValue(I); + auto *Block = Phi->getIncomingBlock(I); + if (isa(Input)) + continue; + + ProxyPreds.push_back(Block); + } + + if (ProxyPreds.empty()) + return; + + SplitBlockPredecessors(Insn->getParent(), + makeArrayRef(ProxyPreds), + ".load.hoist"); +} + +static bool hoistGetElementPtrLoads(Function &F, DominatorTree &DT) { + auto InterestingInsns = collectInterestingInsns(F, DT); + // Hoist instructions in reverse to get the same relative order. + // Each hoisted instruction is added at the beginning of the hoisting point. + for (auto ReverseIterator = InterestingInsns.rbegin(); + ReverseIterator != InterestingInsns.rend(); + ++ReverseIterator) { + auto *Insn = *ReverseIterator; + + if (auto *Load = dyn_cast(Insn)) { + maybeMakeProxyBB(Load, cast(Load->getPointerOperand())); + hoistLoad(Load); + continue; + } + + if (auto *Store = dyn_cast(Insn)) { + maybeMakeProxyBB(Store, cast(Store->getPointerOperand())); + hoistStore(Store); + continue; + } + + std::string ErrMsg; + raw_string_ostream ErrStream(ErrMsg); + ErrStream << "Expecting Load or Store instruction not " << *Insn; + report_fatal_error(Twine(ErrMsg), true); + } + return !InterestingInsns.empty(); +} + +namespace { + +class BPFHoistGetElementPtrLoadsLegacyPass final : public FunctionPass { +public: + static char ID; + + BPFHoistGetElementPtrLoadsLegacyPass() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) override { + if (DisableBPFHoistGetElementPtrLoads) + return false; + + auto &DT = getAnalysis().getDomTree(); + + return hoistGetElementPtrLoads(F, DT); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } +}; + +} // End anonymous namespace + +char BPFHoistGetElementPtrLoadsLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(BPFHoistGetElementPtrLoadsLegacyPass, DEBUG_TYPE, + "BPF Hoist GetElementPtr Loads", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(BPFHoistGetElementPtrLoadsLegacyPass, DEBUG_TYPE, + "BPF Hoist GetElementPtr Loads", false, false) + +FunctionPass *llvm::createBPFHoistGetElementPtrLoadsPass() { + return new BPFHoistGetElementPtrLoadsLegacyPass(); +} diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -48,6 +48,7 @@ initializeBPFCheckAndAdjustIRPass(PR); initializeBPFMIPeepholePass(PR); initializeBPFMIPeepholeTruncElimPass(PR); + initializeBPFHoistGetElementPtrLoadsLegacyPassPass(PR); } // DataLayout: little or big endian @@ -145,6 +146,7 @@ void BPFPassConfig::addIRPasses() { addPass(createBPFCheckAndAdjustIR()); + addPass(createBPFHoistGetElementPtrLoadsPass()); TargetPassConfig::addIRPasses(); } diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt --- a/llvm/lib/Target/BPF/CMakeLists.txt +++ b/llvm/lib/Target/BPF/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(BPFCodeGen BPFAbstractMemberAccess.cpp + BPFHoistGetElementPtrLoadsPass.cpp BPFAdjustOpt.cpp BPFAsmPrinter.cpp BPFCheckAndAdjustIR.cpp diff --git a/llvm/test/CodeGen/BPF/hoist-getelementptr-loads.ll b/llvm/test/CodeGen/BPF/hoist-getelementptr-loads.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/BPF/hoist-getelementptr-loads.ll @@ -0,0 +1,801 @@ +; RUN: opt --bpf-hoist-getelementptr-loads -S %s | FileCheck %s + +; Test for various hoist / no hoist cases handling by +; bpf-hoist-getelementptr-loads pass. +; For details see BPFHoistGetElementPtrLoadsPass.cpp. + +; The ll code is generated using the following command: +; +; clang -target bpf -O2 -emit-llvm -S t.c -o t.ll +; +; Using the following C code: + +; struct context { +; int first; +; int butlast; +; int last; +; }; + +; extern int magic(int); +; extern int magic2(int *); +; extern int* magic_ptr1(); +; extern int* magic_ptr2(); +; volatile int volatile_global; + +; int simple_hoist(struct context *ctx) { +; switch (ctx->first) { +; case 1: return magic(ctx->butlast); +; case 2: return magic(ctx->last); +; } +; return 0; +; } + +; int hoist_with_proxy_bb(struct context *ctx) { +; int *p; +; switch (ctx->first) { +; case 1: p = &ctx->butlast; break; +; case 2: p = &ctx->last; break; +; case 3: p = magic_ptr1(); break; +; default: p = magic_ptr2(); break; +; } +; return magic(*p); +; } + +; int hoist_with_proxy_bb_unsafe_in_non_gep_branch(struct context *ctx) { +; int *p; +; switch (ctx->first) { +; case 1: p = &ctx->butlast; break; +; case 2: p = &ctx->last; break; +; case 3: p = magic_ptr1(); break; +; default: +; p = magic_ptr2(); +; volatile_global = 7; +; break; +; } +; return magic(*p); +; } + +; int no_hoist_cond_exit(struct context *ctx, int x) { +; int *p; +; switch (ctx->first) { +; case 1: p = &ctx->butlast; break; +; case 2: +; p = &ctx->last; +; if (x == 0) +; break; +; default: p = magic_ptr1(); break; +; } +; return magic(*p); +; } + +; int hoist_two_uses(struct context *ctx) { +; int *p = 0; +; switch (ctx->first) { +; case 1: p = &ctx->butlast; break; +; case 2: p = &ctx->last; break; +; } +; return magic(*p) + magic2(p); +; } + +; int hoist_non_instruction(struct context *ctx, int *iptr) { +; int *p; +; switch (ctx->first) { +; case 1: p = iptr; break; +; case 2: p = &ctx->butlast; break; +; default: p = &ctx->last; break; +; } +; return magic(*p); +; } + +; int no_hoist_unsafe_in_load_bb(struct context *ctx, int *iptr) { +; int *p; +; switch (ctx->first) { +; case 1: p = iptr; break; +; case 2: p = &ctx->butlast; break; +; default: p = &ctx->last; break; +; } +; volatile_global = 3; +; return magic(*p); +; } + +; int no_hoist_unsafe_in_branch_bb(struct context *ctx, int *iptr) { +; int *p; +; switch (ctx->first) { +; case 1: p = iptr; break; +; case 2: p = &ctx->butlast; break; +; default: p = &ctx->last; +; volatile_global = 3; +; break; +; } +; return magic(*p); +; } + +; int hoist_two_loads(struct context *ctx) { +; int *pa; +; int *pb; +; switch (ctx->first) { +; case 1: +; pa = &ctx->butlast; +; pb = &ctx->last; +; break; +; case 2: +; pb = &ctx->butlast; +; pa = &ctx->last; +; break; +; default: +; return 0; +; } +; magic(*pa + *pb); +; return 0; +; } + +; int no_hoist_for_b_unsafe_call_in_the_middle(struct context *ctx) { +; int *pa; +; int *pb; +; switch (ctx->first) { +; case 1: +; pa = &ctx->butlast; +; pb = &ctx->last; +; break; +; case 2: +; pb = &ctx->butlast; +; pa = &ctx->last; +; break; +; default: +; return 0; +; } +; magic(*pa); +; magic(*pb); +; return 0; +; } + +; struct __sk_buff { +; int len; +; int priority; +; int mark; +; int tc_index; +; }; + +; int hoist_store(struct __sk_buff *ctx) { +; switch (ctx->tc_index) { +; case 10: +; ctx->mark = 1; +; break; +; case 20: +; ctx->priority = 1; +; break; +; } +; return 0; +; } + +; int no_hoist_store_cond_terminator(struct __sk_buff *ctx) { +; switch (ctx->priority) { +; case 10: +; ctx->mark = 1; +; break; +; case 20: +; ctx->priority = 1; +; break; +; } +; return 0; +; } + +; int hoist_store_order(struct __sk_buff *ctx) { +; volatile int *p; +; switch (ctx->tc_index) { +; case 10: p = &ctx->mark; break; +; case 20: p = &ctx->priority; break; +; default: return 0; +; } +; *p = 1; +; *p = 2; +; return 0; +; } + +; ModuleID = 't.c' +source_filename = "t.c" +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "bpf" + +%struct.context = type { i32, i32, i32 } +%struct.__sk_buff = type { i32, i32, i32, i32 } + +@volatile_global = dso_local global i32 0, align 4 + +; Function Attrs: nounwind +define dso_local i32 @simple_hoist(ptr nocapture noundef readonly %ctx) local_unnamed_addr #0 { +; CHECK: define dso_local i32 @simple_hoist +entry: + %0 = load i32, ptr %ctx, align 4, !tbaa !3 + switch i32 %0, label %return [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + ] + +sw.bb: ; preds = %entry + %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 + br label %return.sink.split +; CHECK: sw.bb: +; CHECK-NEXT: %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 +; CHECK-NEXT: [[r1:%[a-zA-Z0-9.]+]] = load i32, ptr %butlast +; CHECK-NEXT: br label %return.sink.split + +sw.bb1: ; preds = %entry + %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 + br label %return.sink.split +; CHECK: sw.bb1: +; CHECK-NEXT: %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 +; CHECK-NEXT: [[r2:%[a-zA-Z0-9.]+]] = load i32, ptr %last +; CHECK-NEXT: br label %return.sink.split + +return.sink.split: ; preds = %sw.bb, %sw.bb1 + %last.sink = phi ptr [ %last, %sw.bb1 ], [ %butlast, %sw.bb ] + %1 = load i32, ptr %last.sink, align 4, !tbaa !8 + %call2 = tail call i32 @magic(i32 noundef %1) #2 + br label %return +; CHECK: return.sink.split: +; CHECK-NEXT: [[last_sink1:%[a-zA-Z0-9.]+]] = phi i32 [ [[r2]], %sw.bb1 ], [ [[r1]], %sw.bb ] +; CHECK-NEXT: %call2 = tail call i32 @magic(i32 noundef [[last_sink1]]) +; CHECK-NEXT: br label %return + +return: ; preds = %return.sink.split, %entry + %retval.0 = phi i32 [ 0, %entry ], [ %call2, %return.sink.split ] + ret i32 %retval.0 +; CHECK: return: +; CHECK-NEXT: %retval.0 = phi i32 [ 0, %entry ], [ %call2, %return.sink.split ] +; CHECK-NEXT: ret i32 %retval.0 +} + +declare dso_local i32 @magic(i32 noundef) local_unnamed_addr #1 + +; Function Attrs: nounwind +define dso_local i32 @hoist_with_proxy_bb(ptr nocapture noundef readonly %ctx) local_unnamed_addr #0 { +; CHECK: define dso_local i32 @hoist_with_proxy_bb +entry: + %0 = load i32, ptr %ctx, align 4, !tbaa !3 + switch i32 %0, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + i32 3, label %sw.bb2 + ] + +sw.bb: ; preds = %entry + %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 + br label %sw.epilog +; CHECK: sw.bb: +; CHECK-NEXT: %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 +; CHECK-NEXT: [[r1:%[a-zA-Z0-9.]+]] = load i32, ptr %butlast +; CHECK-NEXT: br label %sw.epilog + +sw.bb1: ; preds = %entry + %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 + br label %sw.epilog + +sw.bb2: ; preds = %entry + %call = tail call ptr @magic_ptr1() #2 + br label %sw.epilog +; CHECK: sw.bb2: +; CHECK-NEXT: %call = tail call ptr @magic_ptr1() +; CHECK-NEXT: br label %[[sw_epilog_load_hoist:[a-zA-Z0-9.]+]] + +sw.default: ; preds = %entry + %call3 = tail call ptr @magic_ptr2() #2 + br label %sw.epilog +; CHECK: sw.default: +; CHECK-NEXT: %call3 = tail call ptr @magic_ptr2() +; CHECK-NEXT: br label %[[sw_epilog_load_hoist]] + +; CHECK: [[sw_epilog_load_hoist]]: +; CHECK-NEXT: [[p_0_ph:%[a-zA-Z0-9.]+]] = phi ptr [ %call, %sw.bb2 ], [ %call3, %sw.default ] +; CHECK-NEXT: [[r3:%[a-zA-Z0-9.]+]] = load i32, ptr [[p_0_ph]] +; CHECK-NEXT: br label %sw.epilog + +sw.epilog: ; preds = %sw.default, %sw.bb2, %sw.bb1, %sw.bb + %p.0 = phi ptr [ %call3, %sw.default ], [ %call, %sw.bb2 ], [ %last, %sw.bb1 ], [ %butlast, %sw.bb ] + %1 = load i32, ptr %p.0, align 4, !tbaa !8 + %call4 = tail call i32 @magic(i32 noundef %1) #2 + ret i32 %call4 +; CHECK: sw.epilog: +; CHECK-NEXT: [[p_01:%[a-zA-Z0-9.]+]] = phi i32 [ [[r2]], %sw.bb1 ], [ [[r1]], %sw.bb ], [ [[r3]], %[[sw_epilog_load_hoist]] ] +; CHECK-NEXT: %call4 = tail call i32 @magic(i32 noundef [[p_01]]) +; CHECK-NEXT: ret i32 %call4 +} + +declare dso_local ptr @magic_ptr1(...) local_unnamed_addr #1 + +declare dso_local ptr @magic_ptr2(...) local_unnamed_addr #1 + +; Function Attrs: nounwind +define dso_local i32 @hoist_with_proxy_bb_unsafe_in_non_gep_branch(ptr nocapture noundef readonly %ctx) local_unnamed_addr #0 { +; CHEKC: define dso_local i32 @hoist_with_proxy_bb_unsafe_in_non_gep_branch +entry: + %0 = load i32, ptr %ctx, align 4, !tbaa !3 + switch i32 %0, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + i32 3, label %sw.bb2 + ] + +sw.bb: ; preds = %entry + %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 + br label %sw.epilog +; CHECK: sw.bb: +; CHECK-NEXT: %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 +; CHECK-NEXT: [[r1:%[a-zA-Z0-9.]+]] = load i32, ptr %butlast +; CHECK-NEXT: br label %sw.epilog + +sw.bb1: ; preds = %entry + %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 + br label %sw.epilog +; CHECK: sw.bb1: +; CHECK-NEXT: %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 +; CHECK-NEXT: [[r2:%[a-zA-Z0-9.]+]] = load i32, ptr %last +; CHECK-NEXT: br label %sw.epilog + +sw.bb2: ; preds = %entry + %call = tail call ptr @magic_ptr1() #2 + br label %sw.epilog +; CHECK: sw.bb2: +; CHECK-NEXT: %call = tail call ptr @magic_ptr1() +; CHECK-NEXT: br label %[[sw_epilog_load_hoist:[a-zA-Z0-9.]+]] + +sw.default: ; preds = %entry + %call3 = tail call ptr @magic_ptr2() #2 + store volatile i32 7, ptr @volatile_global, align 4, !tbaa !8 + br label %sw.epilog +; CHECK: sw.default: +; CHECK-NEXT: %call3 = tail call ptr @magic_ptr2() +; CHECK-NEXT: store volatile i32 7, ptr @volatile_global +; CHECK-NEXT: br label %[[sw_epilog_load_hoist]] + +; CHECK: [[sw_epilog_load_hoist]]: +; CHECK-NEXT: [[p_0_ph:%[a-zA-Z0-9.]+]] = phi ptr [ %call, %sw.bb2 ], [ %call3, %sw.default ] +; CHECK-NEXT: [[r3:%[a-zA-Z0-9.]+]] = load i32, ptr [[p_0_ph]] +; CHECK-NEXT: br label %sw.epilog + +sw.epilog: ; preds = %sw.default, %sw.bb2, %sw.bb1, %sw.bb + %p.0 = phi ptr [ %call3, %sw.default ], [ %call, %sw.bb2 ], [ %last, %sw.bb1 ], [ %butlast, %sw.bb ] + %1 = load i32, ptr %p.0, align 4, !tbaa !8 + %call4 = tail call i32 @magic(i32 noundef %1) #2 + ret i32 %call4 +; CHECK: sw.epilog: +; CHECK-NEXT: [[p_01:%[a-zA-Z0-9.]+]] = phi i32 [ [[r2]], %sw.bb1 ], [ [[r1]], %sw.bb ], [ [[r3]], %[[sw_epilog_load_hoist]] ] +; CHECK-NEXT: %call4 = tail call i32 @magic(i32 noundef [[p_01]]) +; CHECK-NEXT: ret i32 %call4 +} + +; Function Attrs: nounwind +define dso_local i32 @no_hoist_cond_exit(ptr nocapture noundef readonly %ctx, i32 noundef %x) local_unnamed_addr #0 { +; CHECK: define dso_local i32 @no_hoist_cond_exit +entry: + %0 = load i32, ptr %ctx, align 4, !tbaa !3 + switch i32 %0, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + ] + +sw.bb: ; preds = %entry + %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 + br label %sw.epilog +; CHECK: sw.bb: +; CHECK-NEXT: %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 +; CHECK-NEXT: br label %sw.epilog + +sw.bb1: ; preds = %entry + %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %sw.epilog, label %sw.default +; CHECK: sw.bb1: +; CHECK-NEXT: %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 +; CHECK-NEXT: %cmp = icmp eq i32 %x, 0 +; CHECK-NEXT: br i1 %cmp, label %sw.epilog, label %sw.default + +sw.default: ; preds = %sw.bb1, %entry + %call = tail call ptr @magic_ptr1() #2 + br label %sw.epilog +; CHECK: sw.default: +; CHECK-NEXT: %call = tail call ptr @magic_ptr1() +; CHECK-NEXT: br label %sw.epilog + +sw.epilog: ; preds = %sw.bb1, %sw.default, %sw.bb + %p.0 = phi ptr [ %call, %sw.default ], [ %last, %sw.bb1 ], [ %butlast, %sw.bb ] + %1 = load i32, ptr %p.0, align 4, !tbaa !8 + %call2 = tail call i32 @magic(i32 noundef %1) #2 + ret i32 %call2 +; CHECK: sw.epilog: +; CHECK-NEXT: %p.0 = phi ptr [ %call, %sw.default ], [ %last, %sw.bb1 ], [ %butlast, %sw.bb ] +; CHECK-NEXT: [[r1:%[a-zA-Z0-9.]+]] = load i32, ptr %p.0 +; CHECK-NEXT: %call2 = tail call i32 @magic(i32 noundef [[r1]]) +; CHECK-NEXT: ret i32 %call2 +} + +; Function Attrs: nounwind +define dso_local i32 @hoist_two_uses(ptr noundef %ctx) local_unnamed_addr #0 { +; CHECK: define dso_local i32 @hoist_two_uses +entry: + %0 = load i32, ptr %ctx, align 4, !tbaa !3 + switch i32 %0, label %sw.epilog [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + ] +; CHECK: entry: +; CHECK-NEXT: [[r0:%[a-zA-Z0-9.]+]] = load i32, ptr %ctx +; CHECK-NEXT: switch i32 [[r0]], label %[[sw_epilog_load_hoist:[a-zA-Z0-9.]+]] [ +; CHECK-NEXT: i32 1, label %sw.bb +; CHECK-NEXT: i32 2, label %sw.bb1 +; CHECK-NEXT: ] + +sw.bb: ; preds = %entry + %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 + br label %sw.epilog +; CHECK: sw.bb: +; CHECK-NEXT: %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 +; CHECK-NEXT: [[r1:%[a-zA-Z0-9.]+]] = load i32, ptr %butlast +; CHECK-NEXT: br label %sw.epilog + +sw.bb1: ; preds = %entry + %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 + br label %sw.epilog +; CHECK: sw.bb1: +; CHECK-NEXT: %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 +; CHECK-NEXT: [[r2:%[a-zA-Z0-9.]+]] = load i32, ptr %last +; CHECK-NEXT: br label %sw.epilog + +; CHECK: [[sw_epilog_load_hoist]]: +; CHECK-NEXT: [[r3:%[a-zA-Z0-9.]+]] = load i32, ptr null +; CHECK-NEXT: br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.bb1, %sw.bb + %p.0 = phi ptr [ null, %entry ], [ %last, %sw.bb1 ], [ %butlast, %sw.bb ] + %1 = load i32, ptr %p.0, align 4, !tbaa !8 + %call = tail call i32 @magic(i32 noundef %1) #2 + %call2 = tail call i32 @magic2(ptr noundef nonnull %p.0) #2 + %add = add nsw i32 %call2, %call + ret i32 %add +; CHECK: sw.epilog: +; CHECK-NEXT: [[p_01:%[a-zA-Z0-9.]+]] = phi i32 [ [[r2]], %sw.bb1 ], [ [[r1]], %sw.bb ], [ [[r3]], %[[sw_epilog_load_hoist]] ] +; CHECK-NEXT: %p.0 = phi ptr [ %last, %sw.bb1 ], [ %butlast, %sw.bb ], [ null, %[[sw_epilog_load_hoist]] ] +; CHECK-NEXT: %call = tail call i32 @magic(i32 noundef [[p_01]]) +; CHECK-NEXT: %call2 = tail call i32 @magic2(ptr noundef nonnull %p.0) +; CHECK-NEXT: %add = add nsw i32 %call2, %call +; CHECK-NEXT: ret i32 %add +} + +declare dso_local i32 @magic2(ptr noundef) local_unnamed_addr #1 + +; Function Attrs: nounwind +define dso_local i32 @hoist_non_instruction(ptr nocapture noundef readonly %ctx, ptr nocapture noundef readonly %iptr) local_unnamed_addr #0 { +; CHECK: define dso_local i32 @hoist_non_instruction +entry: + %0 = load i32, ptr %ctx, align 4, !tbaa !3 + switch i32 %0, label %sw.default [ + i32 1, label %sw.epilog + i32 2, label %sw.bb1 + ] +; CHECK: entry: +; CHECK-NEXT: [[r0:%[a-zA-Z0-9.]+]] = load i32, ptr %ctx +; CHECK-NEXT: switch i32 [[r0]], label %sw.default [ +; CHECK-NEXT: i32 1, label %[[sw_epilog_load_hoist:[a-zA-Z0-9.]+]] +; CHECK-NEXT: i32 2, label %sw.bb1 +; CHECK-NEXT: ] + +sw.bb1: ; preds = %entry + %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 + br label %sw.epilog +; CHECK: sw.bb1: +; CHECK-NEXT: %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 +; CHECK-NEXT: [[r1:%[a-zA-Z0-9.]+]] = load i32, ptr %butlast +; CHECK-NEXT: br label %sw.epilog + +sw.default: ; preds = %entry + %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 + br label %sw.epilog +; CHECK: sw.default: +; CHECK-NEXT: %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 +; CHECK-NEXT: [[r2:%[a-zA-Z0-9.]+]] = load i32, ptr %last +; CHECK-NEXT: br label %sw.epilog + +; CHECK: [[sw_epilog_load_hoist]]: +; CHECK-NEXT: [[r3:%[a-zA-Z0-9.]+]] = load i32, ptr %iptr +; CHECK-NEXT: br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.default, %sw.bb1 + %p.0 = phi ptr [ %last, %sw.default ], [ %butlast, %sw.bb1 ], [ %iptr, %entry ] + %1 = load i32, ptr %p.0, align 4, !tbaa !8 + %call = tail call i32 @magic(i32 noundef %1) #2 + ret i32 %call +; CHECK: sw.epilog: +; CHECK-NEXT: [[p_01:%[a-zA-Z0-9.]+]] = phi i32 [ [[r2]], %sw.default ], [ [[r1]], %sw.bb1 ], [ [[r3]], %[[sw_epilog_load_hoist]] ] +; CHECK-NEXT: %call = tail call i32 @magic(i32 noundef [[p_01]]) +; CHECK-NEXT: ret i32 %call +} + +; Function Attrs: nounwind +define dso_local i32 @no_hoist_unsafe_in_load_bb(ptr nocapture noundef readonly %ctx, ptr nocapture noundef readonly %iptr) local_unnamed_addr #0 { +; CHECK: define dso_local i32 @no_hoist_unsafe_in_load_bb +entry: + %0 = load i32, ptr %ctx, align 4, !tbaa !3 + switch i32 %0, label %sw.default [ + i32 1, label %sw.epilog + i32 2, label %sw.bb1 + ] + +sw.bb1: ; preds = %entry + %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 + br label %sw.epilog +; CHECK: sw.bb1: +; CHECK-NEXT: %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 +; CHECK-NEXT: br label %sw.epilog + +sw.default: ; preds = %entry + %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 + br label %sw.epilog +; CHECK: sw.default: +; CHECK-NEXT: %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 +; CHECK-NEXT: br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.default, %sw.bb1 + %p.0 = phi ptr [ %last, %sw.default ], [ %butlast, %sw.bb1 ], [ %iptr, %entry ] + store volatile i32 3, ptr @volatile_global, align 4, !tbaa !8 + %1 = load i32, ptr %p.0, align 4, !tbaa !8 + %call = tail call i32 @magic(i32 noundef %1) #2 + ret i32 %call +} + +; Function Attrs: nounwind +define dso_local i32 @no_hoist_unsafe_in_branch_bb(ptr nocapture noundef readonly %ctx, ptr nocapture noundef readonly %iptr) local_unnamed_addr #0 { +; CHECK: define dso_local i32 @no_hoist_unsafe_in_branch_bb +entry: + %0 = load i32, ptr %ctx, align 4, !tbaa !3 + switch i32 %0, label %sw.default [ + i32 1, label %sw.epilog + i32 2, label %sw.bb1 + ] + +sw.bb1: ; preds = %entry + %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 + br label %sw.epilog +; CHECK: sw.bb1: +; CHECK-NEXT: %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 +; CHECK-NEXT: br label %sw.epilog + +sw.default: ; preds = %entry + %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 + store volatile i32 3, ptr @volatile_global, align 4, !tbaa !8 + br label %sw.epilog +; CHECK: sw.default: +; CHECK-NEXT: %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 +; CHECK-NEXT: store volatile i32 3, ptr @volatile_global +; CHECK-NEXT: br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.default, %sw.bb1 + %p.0 = phi ptr [ %last, %sw.default ], [ %butlast, %sw.bb1 ], [ %iptr, %entry ] + %1 = load i32, ptr %p.0, align 4, !tbaa !8 + %call = tail call i32 @magic(i32 noundef %1) #2 + ret i32 %call +} + +; Function Attrs: nounwind +define dso_local i32 @hoist_two_loads(ptr nocapture noundef readonly %ctx) local_unnamed_addr #0 { +; CHECK: define dso_local i32 @hoist_two_loads +entry: + %0 = load i32, ptr %ctx, align 4, !tbaa !3 + switch i32 %0, label %cleanup [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + ] + +sw.bb: ; preds = %entry + %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 + %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 + br label %sw.epilog +; CHECK: sw.bb: +; CHECK-NEXT: %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 +; CHECK-NEXT: [[r1:%[a-zA-Z0-9.]+]] = load i32, ptr %butlast +; CHECK-NEXT: %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 +; CHECK-NEXT: [[r2:%[a-zA-Z0-9.]+]] = load i32, ptr %last +; CHECK-NEXT: br label %sw.epilog + +sw.bb1: ; preds = %entry + %butlast2 = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 + %last3 = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 + br label %sw.epilog +; CHECK: sw.bb1: +; CHECK-NEXT: %butlast2 = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 +; CHECK-NEXT: [[r3:%[a-zA-Z0-9.]+]] = load i32, ptr %butlast2 +; CHECK-NEXT: %last3 = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 +; CHECK-NEXT: [[r4:%[a-zA-Z0-9.]+]] = load i32, ptr %last3 +; CHECK-NEXT: br label %sw.epilog + +sw.epilog: ; preds = %sw.bb1, %sw.bb + %pa.0 = phi ptr [ %last3, %sw.bb1 ], [ %butlast, %sw.bb ] + %pb.0 = phi ptr [ %butlast2, %sw.bb1 ], [ %last, %sw.bb ] + %1 = load i32, ptr %pa.0, align 4, !tbaa !8 + %2 = load i32, ptr %pb.0, align 4, !tbaa !8 + %add = add nsw i32 %2, %1 + %call = tail call i32 @magic(i32 noundef %add) #2 + br label %cleanup +; CHECK: sw.epilog: +; CHECK-NEXT: [[pa_01:%[a-zA-Z0-9.]+]] = phi i32 [ [[r4]], %sw.bb1 ], [ [[r1]], %sw.bb ] +; CHECK-NEXT: [[pb_02:%[a-zA-Z0-9.]+]] = phi i32 [ [[r3]], %sw.bb1 ], [ [[r2]], %sw.bb ] +; CHECK-NEXT: %add = add nsw i32 [[pb_02]], [[pa_01]] +; CHECK-NEXT: %call = tail call i32 @magic(i32 noundef %add) +; CHECK-NEXT: br label %cleanup + +cleanup: ; preds = %entry, %sw.epilog + ret i32 0 +} + +; Function Attrs: nounwind +define dso_local i32 @no_hoist_for_b_unsafe_call_in_the_middle(ptr nocapture noundef readonly %ctx) local_unnamed_addr #0 { +; CHECK: define dso_local i32 @no_hoist_for_b_unsafe_call_in_the_middle +entry: + %0 = load i32, ptr %ctx, align 4, !tbaa !3 + switch i32 %0, label %cleanup [ + i32 1, label %sw.bb + i32 2, label %sw.bb1 + ] + +sw.bb: ; preds = %entry + %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 + %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 + br label %sw.epilog +; CHECK: sw.bb: +; CHECK-NEXT: %butlast = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 +; CHECK-NEXT: [[r1:%[a-zA-Z0-9.]+]] = load i32, ptr %butlast +; CHECK-NEXT: %last = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 +; CHECK-NEXT: br label %sw.epilog + +sw.bb1: ; preds = %entry + %butlast2 = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 + %last3 = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 + br label %sw.epilog +; CHECK: sw.bb1: +; CHECK-NEXT: %butlast2 = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 1 +; CHECK-NEXT: %last3 = getelementptr inbounds %struct.context, ptr %ctx, i64 0, i32 2 +; CHECK-NEXT: [[r2:%[a-zA-Z0-9.]+]] = load i32, ptr %last3 +; CHECK-NEXT: br label %sw.epilog + +sw.epilog: ; preds = %sw.bb1, %sw.bb + %pa.0 = phi ptr [ %last3, %sw.bb1 ], [ %butlast, %sw.bb ] + %pb.0 = phi ptr [ %butlast2, %sw.bb1 ], [ %last, %sw.bb ] + %1 = load i32, ptr %pa.0, align 4, !tbaa !8 + %call = tail call i32 @magic(i32 noundef %1) #2 + %2 = load i32, ptr %pb.0, align 4, !tbaa !8 + %call4 = tail call i32 @magic(i32 noundef %2) #2 + br label %cleanup + +cleanup: ; preds = %entry, %sw.epilog + ret i32 0 +} + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn +define dso_local i32 @hoist_store(ptr nocapture noundef %ctx) local_unnamed_addr #2 { +; CHECK: define dso_local i32 @hoist_store +entry: + %tc_index = getelementptr inbounds %struct.__sk_buff, ptr %ctx, i64 0, i32 3 + %0 = load i32, ptr %tc_index, align 4, !tbaa !9 + switch i32 %0, label %sw.epilog [ + i32 10, label %sw.bb + i32 20, label %sw.bb1 + ] + +sw.bb: ; preds = %entry + %mark = getelementptr inbounds %struct.__sk_buff, ptr %ctx, i64 0, i32 2 + br label %sw.epilog.sink.split +; CHECK: sw.bb: +; CHECK-NEXT: %mark = getelementptr inbounds %struct.__sk_buff, ptr %ctx, i64 0, i32 2 +; CHECK-NEXT: store i32 1, ptr %mark +; CHECK-NEXT: br label %sw.epilog.sink.split + +sw.bb1: ; preds = %entry + %priority = getelementptr inbounds %struct.__sk_buff, ptr %ctx, i64 0, i32 1 + br label %sw.epilog.sink.split +; CHECK: sw.bb1: +; CHECK-NEXT: %priority = getelementptr inbounds %struct.__sk_buff, ptr %ctx, i64 0, i32 1 +; CHECK-NEXT: store i32 1, ptr %priority +; CHECK-NEXT: br label %sw.epilog.sink.split + +sw.epilog.sink.split: ; preds = %sw.bb, %sw.bb1 + %priority.sink = phi ptr [ %priority, %sw.bb1 ], [ %mark, %sw.bb ] + store i32 1, ptr %priority.sink, align 4, !tbaa !8 + br label %sw.epilog + +sw.epilog: ; preds = %sw.epilog.sink.split, %entry + ret i32 0 +; CHECK: sw.epilog.sink.split: +; CHECK-NEXT: br label %sw.epilog +} + +; Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn +define dso_local i32 @no_hoist_store_cond_terminator(ptr nocapture noundef %ctx) local_unnamed_addr #2 { +; CHECK: define dso_local i32 @no_hoist_store_cond_terminator +entry: + %priority = getelementptr inbounds %struct.__sk_buff, ptr %ctx, i64 0, i32 1 + %0 = load i32, ptr %priority, align 4, !tbaa !11 + switch i32 %0, label %sw.epilog [ + i32 10, label %sw.bb + i32 20, label %sw.epilog.sink.split + ] + +sw.bb: ; preds = %entry + %mark = getelementptr inbounds %struct.__sk_buff, ptr %ctx, i64 0, i32 2 + br label %sw.epilog.sink.split +; CHECK: sw.bb: +; CHECK-NEXT: %mark = getelementptr inbounds %struct.__sk_buff, ptr %ctx, i64 0, i32 2 +; CHECK-NEXT: br label %sw.epilog.sink.split + +sw.epilog.sink.split: ; preds = %entry, %sw.bb + %priority.sink = phi ptr [ %mark, %sw.bb ], [ %priority, %entry ] + store i32 1, ptr %priority.sink, align 4, !tbaa !8 + br label %sw.epilog +; CHECK: sw.epilog.sink.split: +; CHECK-NEXT: %priority.sink = phi ptr [ %mark, %sw.bb ], [ %priority, %entry ] +; CHECK-NEXT: store i32 1, ptr %priority.sink +; CHECK-NEXT: br label %sw.epilog + +sw.epilog: ; preds = %sw.epilog.sink.split, %entry + ret i32 0 +} + +; Function Attrs: nofree norecurse nounwind +define dso_local i32 @hoist_store_order(ptr noundef %ctx) local_unnamed_addr #3 { +;; define dso_local i32 @hoist_store_order +entry: + %tc_index = getelementptr inbounds %struct.__sk_buff, ptr %ctx, i64 0, i32 3 + %0 = load i32, ptr %tc_index, align 4, !tbaa !9 + switch i32 %0, label %cleanup [ + i32 10, label %sw.bb + i32 20, label %sw.bb1 + ] + +sw.bb: ; preds = %entry + %mark = getelementptr inbounds %struct.__sk_buff, ptr %ctx, i64 0, i32 2 + br label %sw.epilog +; CHECK: sw.bb: +; CHECK-NEXT: %mark = getelementptr inbounds %struct.__sk_buff, ptr %ctx, i64 0, i32 2 +; CHECK-NEXT: store volatile i32 1, ptr %mark +; CHECK-NEXT: store volatile i32 2, ptr %mark +; CHECK-NEXT: br label %sw.epilog + +sw.bb1: ; preds = %entry + %priority = getelementptr inbounds %struct.__sk_buff, ptr %ctx, i64 0, i32 1 + br label %sw.epilog; CHECK: sw.bb1: +; CHECK-NEXT: %priority = getelementptr inbounds %struct.__sk_buff, ptr %ctx, i64 0, i32 1 +; CHECK-NEXT: store volatile i32 1, ptr %priority +; CHECK-NEXT: store volatile i32 2, ptr %priority +; CHECK-NEXT: br label %sw.epilog + +sw.epilog: ; preds = %sw.bb1, %sw.bb + %p.0 = phi ptr [ %priority, %sw.bb1 ], [ %mark, %sw.bb ] + store volatile i32 1, ptr %p.0, align 4, !tbaa !8 + store volatile i32 2, ptr %p.0, align 4, !tbaa !8 + br label %cleanup +; CHECK: sw.epilog: +; CHECK-NEXT: br label %cleanup + +cleanup: ; preds = %entry, %sw.epilog + ret i32 0 +} + +attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #2 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { nofree norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 828a930d9b93a21772456e77a81fa501492a261a)"} +!3 = !{!4, !5, i64 0} +!4 = !{!"context", !5, i64 0, !5, i64 4, !5, i64 8} +!5 = !{!"int", !6, i64 0} +!6 = !{!"omnipotent char", !7, i64 0} +!7 = !{!"Simple C/C++ TBAA"} +!8 = !{!5, !5, i64 0} +!9 = !{!10, !5, i64 12} +!10 = !{!"__sk_buff", !5, i64 0, !5, i64 4, !5, i64 8, !5, i64 12} +!11 = !{!10, !5, i64 4}