Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -3171,6 +3171,13 @@ return nullptr; } + /// Return true if it is beneficial to fold address calculation into a call + /// sequence. + virtual bool canFoldAddrModeIntoCall(const CallBase &CB, unsigned ArgNo, + const AddrMode &AM) const { + return AM.HasBaseReg && AM.BaseGV == nullptr && AM.Scale == 0 && AM.BaseOffs == 0; + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // Index: llvm/include/llvm/CodeGen/TargetSubtargetInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -220,6 +220,9 @@ /// True if the subtarget should run the indirectbr expansion pass. virtual bool enableIndirectBrExpand() const; + /// Enable folding of address computations into call sequences. + virtual bool enableCallAddrFold() const { return false; } + /// Override generic scheduling policy within a region. /// /// This is a convenient way for targets that don't provide any custom Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2200,6 +2200,28 @@ return true; } +static bool isRegularCall(const CallBase *CB) { + return !CB->isInlineAsm() && CB->getIntrinsicID() == Intrinsic::not_intrinsic; +} + +static const CallBase *getRegularCall(const Instruction *I) { + if (const auto *CB = dyn_cast(I); CB && isRegularCall(CB)) + return CB; + return nullptr; +} + +static unsigned getRegularCallArgNo(const Instruction *I, Value *V) { + if (const CallBase *CB = getRegularCall(I)) { + unsigned AN = 0; + for (const Use &U : CB->args()) { + if (U == V) + return AN; + ++AN; + } + } + return ~0; +} + bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { BasicBlock *BB = CI->getParent(); @@ -2273,8 +2295,8 @@ // cold block. This interacts with our handling for loads and stores to // ensure that we can fold all uses of a potential addressing computation // into their uses. TODO: generalize this to work over profiling data - if (CI->hasFnAttr(Attribute::Cold) && !OptSize && - !llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) + if (isRegularCall(CI) && CI->hasFnAttr(Attribute::Cold) && !OptSize && + !llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) { for (auto &Arg : CI->args()) { if (!Arg->getType()->isPointerTy()) continue; @@ -2282,6 +2304,7 @@ if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS)) return true; } + } IntrinsicInst *II = dyn_cast(CI); if (II) { @@ -3207,8 +3230,11 @@ /// /// This encapsulates the logic for matching the target-legal addressing modes. class AddressingModeMatcher { + static constexpr unsigned INVALID_ARG_NO = ~0u; + SmallVectorImpl &AddrModeInsts; const TargetLowering &TLI; + const TargetSubtargetInfo &STI; const TargetRegisterInfo &TRI; const DataLayout &DL; const LoopInfo &LI; @@ -3218,6 +3244,7 @@ /// the memory instruction that we're computing this address for. Type *AccessTy; unsigned AddrSpace; + unsigned ArgNo; Instruction *MemoryInst; /// This is the addressing mode that we're building up. This is @@ -3248,6 +3275,7 @@ AddressingModeMatcher( SmallVectorImpl &AMI, const TargetLowering &TLI, + const TargetSubtargetInfo &STI, const TargetRegisterInfo &TRI, const LoopInfo &LI, const std::function getDTFn, Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM, @@ -3255,11 +3283,12 @@ TypePromotionTransaction &TPT, std::pair, int64_t> &LargeOffsetGEP, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) - : AddrModeInsts(AMI), TLI(TLI), TRI(TRI), + : AddrModeInsts(AMI), TLI(TLI), STI(STI), TRI(TRI), DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn), - AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM), - InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT), - LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) { + AccessTy(AT), AddrSpace(AS), ArgNo(INVALID_ARG_NO), MemoryInst(MI), + AddrMode(AM), InsertedInsts(InsertedInsts), + PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP), + OptSize(OptSize), PSI(PSI), BFI(BFI) { IgnoreProfitability = false; } @@ -3276,17 +3305,19 @@ SmallVectorImpl &AddrModeInsts, const TargetLowering &TLI, const LoopInfo &LI, const std::function getDTFn, + const TargetSubtargetInfo &STI, const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, std::pair, int64_t> &LargeOffsetGEP, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { ExtAddrMode Result; - bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn, - AccessTy, AS, MemoryInst, Result, - InsertedInsts, PromotedInsts, TPT, - LargeOffsetGEP, OptSize, PSI, BFI) - .matchAddr(V, 0); + AddressingModeMatcher Matcher( + AddrModeInsts, TLI, STI, TRI, LI, getDTFn, AccessTy, AS, MemoryInst, Result, + InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI); + if (STI.enableCallAddrFold()) + Matcher.ArgNo = getRegularCallArgNo(MemoryInst, V); + bool Success = Matcher.matchAddr(V, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); return Result; @@ -3303,6 +3334,7 @@ bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); bool isPromotionProfitable(unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const; + bool canFoldAddr(const ExtAddrMode &TestAddrMode) const; }; class PhiNodeSet; @@ -3967,7 +3999,7 @@ TestAddrMode.ScaledReg = ScaleReg; // If the new address isn't legal, bail out. - if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) + if (!canFoldAddr(TestAddrMode)) return false; // It was legal, so commit it. @@ -3988,7 +4020,7 @@ // If this addressing mode is legal, commit it and remember that we folded // this instruction. - if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) { + if (canFoldAddr(TestAddrMode)) { AddrModeInsts.push_back(cast(ScaleReg)); AddrMode = TestAddrMode; return true; @@ -4049,7 +4081,7 @@ // If this addressing mode is legal, commit it.. // (Note that we defer the (expensive) domtree base legality check // to the very last possible point.) - if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) && + if (canFoldAddr(TestAddrMode) && getDTFn().dominates(IVInc, MemoryInst)) { AddrModeInsts.push_back(cast(IVInc)); AddrMode = TestAddrMode; @@ -4557,6 +4589,25 @@ return isPromotedInstructionLegal(TLI, DL, PromotedOperand); } +bool AddressingModeMatcher::canFoldAddr(const ExtAddrMode &TestAddrMode) const { + if (ArgNo != INVALID_ARG_NO) { + // Check if the address is "foldable" into a regular call. + const auto &CB = cast(*MemoryInst); + if (STI.enableCallAddrFold() && + TLI.canFoldAddrModeIntoCall(CB, ArgNo, TestAddrMode)) + return true; + + // Even if it isn't, accept it if we have a cold call, but still require + // legal addressing mode. This limits the amount of code we potentially + // sink. + if (!CB.hasFnAttr(Attribute::Cold) || OptSize || + llvm::shouldOptimizeForSize(CB.getParent(), PSI, BFI)) + return false; + } + + return TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace); +} + /// Given an instruction or constant expr, see if we can fold the operation /// into the addressing mode. If so, update the addressing mode and return /// true, otherwise return false without modifying AddrMode. @@ -4708,13 +4759,15 @@ // A common case is for the GEP to only do a constant offset. In this case, // just add it to the disp field and check validity. if (VariableOperand == -1) { + bool InBounds = AddrMode.InBounds; + int64_t BaseOffs = AddrMode.BaseOffs; AddrMode.BaseOffs += ConstantOffset; - if (matchAddr(AddrInst->getOperand(0), Depth + 1)) { if (!cast(AddrInst)->isInBounds()) AddrMode.InBounds = false; + if (matchAddr(AddrInst->getOperand(0), Depth + 1)) return true; - } - AddrMode.BaseOffs -= ConstantOffset; + AddrMode.InBounds = InBounds; + AddrMode.BaseOffs = BaseOffs; if (EnableGEPOffsetSplit && isa(AddrInst) && TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 && @@ -4858,7 +4911,7 @@ if (CI->getValue().isSignedIntN(64)) { // Fold in immediates if legal for the target. AddrMode.BaseOffs += CI->getSExtValue(); - if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) + if (canFoldAddr(AddrMode)) return true; AddrMode.BaseOffs -= CI->getSExtValue(); } @@ -4866,7 +4919,7 @@ // If this is a global variable, try to fold it into the addressing mode. if (!AddrMode.BaseGV) { AddrMode.BaseGV = GV; - if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) + if (canFoldAddr(AddrMode)) return true; AddrMode.BaseGV = nullptr; } @@ -4909,7 +4962,7 @@ AddrMode.HasBaseReg = true; AddrMode.BaseReg = Addr; // Still check for legality in case the target supports [imm] but not [i+r]. - if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) + if (canFoldAddr(AddrMode)) return true; AddrMode.HasBaseReg = false; AddrMode.BaseReg = nullptr; @@ -4919,7 +4972,7 @@ if (AddrMode.Scale == 0) { AddrMode.Scale = 1; AddrMode.ScaledReg = Addr; - if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) + if (canFoldAddr(AddrMode)) return true; AddrMode.Scale = 0; AddrMode.ScaledReg = nullptr; @@ -4963,6 +5016,7 @@ static bool FindAllMemoryUses( Instruction *I, SmallVectorImpl> &MemoryUses, SmallPtrSetImpl &ConsideredInsts, const TargetLowering &TLI, + const TargetSubtargetInfo &STI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, int &SeenInsts) { // If we already considered this instruction, we're done. @@ -5017,17 +5071,27 @@ continue; } - InlineAsm *IA = dyn_cast(CI->getCalledOperand()); - if (!IA) + if (InlineAsm *IA = dyn_cast(CI->getCalledOperand())) { + // If this is a memory operand, we're cool, otherwise bail out. + if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI)) + return true; + continue; + } + + // Bail if call folding is not enabled. + if (!STI.enableCallAddrFold()) return true; - // If this is a memory operand, we're cool, otherwise bail out. - if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI)) + // Intrinsics are handled elsewhere and we can't quite handle non-pointer + // types yet. + if (isa(UserI) || !I->getType()->isPointerTy()) return true; + + MemoryUses.push_back({&U, I->getType()}); continue; } - if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, + if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, STI, TRI, OptSize, PSI, BFI, SeenInsts)) return true; } @@ -5037,11 +5101,11 @@ static bool FindAllMemoryUses( Instruction *I, SmallVectorImpl> &MemoryUses, - const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, + const TargetLowering &TLI, const TargetSubtargetInfo &STI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { int SeenInsts = 0; SmallPtrSet ConsideredInsts; - return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, + return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, STI, TRI, OptSize, PSI, BFI, SeenInsts); } @@ -5148,7 +5212,7 @@ // for another (at worst.) In this context, folding an addressing mode into // the use is just a particularly nice way of sinking it. SmallVector, 16> MemoryUses; - if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI)) + if (FindAllMemoryUses(I, MemoryUses, TLI, STI, TRI, OptSize, PSI, BFI)) return false; // Has a non-memory, non-foldable use! // Now that we know that all uses of this instruction are part of a chain of @@ -5175,11 +5239,13 @@ 0); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); - AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn, + AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, STI, TRI, LI, getDTFn, AddressAccessTy, AS, UserI, Result, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI); Matcher.IgnoreProfitability = true; + if (STI.enableCallAddrFold()) + Matcher.ArgNo = getRegularCallArgNo(UserI, Address); bool Success = Matcher.matchAddr(Address, 0); (void)Success; assert(Success && "Couldn't select *anything*?"); @@ -5290,7 +5356,7 @@ }; ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn, - *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, + *SubtargetInfo, *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI.get()); GetElementPtrInst *GEP = LargeOffsetGEP.first; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -842,6 +842,9 @@ ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator = nullptr) const override; + bool canFoldAddrModeIntoCall(const CallBase &CB, unsigned ArgNo, + const AddrMode &AM) const override; + bool supportSplitCSR(MachineFunction *MF) const override { return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && MF->getFunction().hasFnAttribute(Attribute::NoUnwind); Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -24517,3 +24517,35 @@ return nullptr; } + +bool AArch64TargetLowering::canFoldAddrModeIntoCall(const CallBase &CB, + unsigned ArgNo, + const AddrMode &AM) const { + // We should always accept a single base register. + if (TargetLowering::canFoldAddrModeIntoCall(CB, ArgNo, AM)) + return true; + + if (ArgNo > 7 || AM.BaseGV || !AM.HasBaseReg) + return false; + + // For more complex addressing modes, check the possibility of a cheap + // materialisation into an argument register. + + // reg + imm + if (AM.Scale == 0) + return isLegalAddImmediate(AM.BaseOffs); + + if (AM.BaseOffs != 0) + return false; + + // reg + scale * reg + if (AM.Scale == 1) + return true; + + // Some CPUs have fast `reg + scale * reg` instruction, for scales of 2, 4, 8, and 16. + if (!Subtarget->hasLSLFast() || AM.Scale <= 0) + return false; + + uint64_t S = uint64_t(AM.Scale); + return (S & (S - 1)) == 0 && S <= 16; +} Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -190,7 +190,8 @@ const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override { return true; } bool enablePostRAScheduler() const override { return usePostRAScheduler(); } - + bool enableCallAddrFold() const override { return true; } + /// Returns ARM processor family. /// Avoid this function! CPU specifics should be kept local to this class /// and preferably modeled with SubtargetFeatures or properties in Index: llvm/test/CodeGen/AArch64/call-addr-fold.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/call-addr-fold.ll @@ -0,0 +1,173 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -codegenprepare < %s | FileCheck %s +target triple = "aarch64-linux" + +declare void @use(...) + +; Check address is sunk towards the load, since CodeGenPrepare considers it likely to be +; "folded" into the call as well. +define i32 @f0(i1 %c1, ptr %p) { +; CHECK-LABEL: @f0( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 2 +; CHECK-NEXT: br i1 [[C1:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[V0:%.*]] = call i32 @use(ptr [[A]]) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[P]], i64 8 +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V0]], [[IF_THEN]] ], [ [[V1]], [[IF_ELSE]] ] +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a = getelementptr i32, ptr %p, i32 2 + br i1 %c1, label %if.then, label %if.else + +if.then: + %v0 = call i32 @use(ptr %a) + br label %exit + +if.else: + %v1 = load i32, ptr %a + br label %exit + +exit: + %v = phi i32 [%v0, %if.then], [%v1, %if.else] + ret i32 %v +} + +define i32 @f1(i1 %c1, ptr %p, i64 %i) { +; CHECK-LABEL: @f1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: br i1 [[C1:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[V0:%.*]] = call i32 @use(ptr [[A]]) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V0]], [[IF_THEN]] ], [ [[V1]], [[IF_ELSE]] ] +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a = getelementptr i8, ptr %p, i64 %i + br i1 %c1, label %if.then, label %if.else + +if.then: + %v0 = call i32 @use(ptr %a) + br label %exit + +if.else: + %v1 = load i32, ptr %a + br label %exit + +exit: + %v = phi i32 [%v0, %if.then], [%v1, %if.else] + ret i32 %v +} + +; Address calculation too complex. +define i32 @f2(i1 %c1, ptr %p, i64 %i) { +; CHECK-LABEL: @f2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: br i1 [[C1:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[V0:%.*]] = call i32 @use(ptr [[A]]) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V0]], [[IF_THEN]] ], [ [[V1]], [[IF_ELSE]] ] +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a = getelementptr i32, ptr %p, i64 %i + br i1 %c1, label %if.then, label %if.else + +if.then: + %v0 = call i32 @use(ptr %a) + br label %exit + +if.else: + %v1 = load i32, ptr %a + br label %exit + +exit: + %v = phi i32 [%v0, %if.then], [%v1, %if.else] + ret i32 %v +} + +; Address calculation cheap enough on some cores. +define i32 @f3(i1 %c1, ptr %p, i64 %i) "target-cpu"="neoverse-n1" { +; CHECK-LABEL: @f3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: br i1 [[C1:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[V0:%.*]] = call i32 @use(ptr [[A]]) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[I]], 4 +; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[P]], i64 [[SUNKADDR]] +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[SUNKADDR1]], align 4 +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V0]], [[IF_THEN]] ], [ [[V1]], [[IF_ELSE]] ] +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a = getelementptr i32, ptr %p, i64 %i + br i1 %c1, label %if.then, label %if.else + +if.then: + %v0 = call i32 @use(ptr %a) + br label %exit + +if.else: + %v1 = load i32, ptr %a + br label %exit + +exit: + %v = phi i32 [%v0, %if.then], [%v1, %if.else] + ret i32 %v +} + +define i32 @f4(i1 %c1, ptr %p, i64 %i) { +; CHECK-LABEL: @f4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: br i1 [[C1:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[V0:%.*]] = call i32 @use(ptr [[A]]) +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[V1:%.*]] = call i32 @use(i32 1, ptr [[A]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V0]], [[IF_THEN]] ], [ [[V1]], [[IF_ELSE]] ] +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a = getelementptr i8, ptr %p, i64 %i + br i1 %c1, label %if.then, label %if.else + +if.then: + %v0 = call i32 @use(ptr %a) + br label %exit + +if.else: + %v1 = call i32 @use(i32 1, ptr %a) + br label %exit + +exit: + %v = phi i32 [%v0, %if.then], [%v1, %if.else] + ret i32 %v +}