diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1760,11 +1760,11 @@ return isNoopAddrSpaceCast(SrcAS, DestAS); } - /// Return true if the pointer arguments to CI should be aligned by aligning + /// Return true if the pointer arguments to CB should be aligned by aligning /// the object whose address is being passed. If so then MinSize is set to the /// minimum size the object must be to be aligned and PrefAlign is set to the /// preferred alignment. - virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, + virtual bool shouldAlignPointerArgs(CallBase * /*CB*/, unsigned & /*MinSize*/, unsigned & /*PrefAlign*/) const { return false; } diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -369,8 +369,8 @@ bool optimizeInst(Instruction *I, bool &ModifiedDT); bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy, unsigned AddrSpace); - bool optimizeInlineAsmInst(CallInst *CS); - bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); + bool optimizeInlineAsmInst(CallBase *CB); + bool optimizeCallBase(CallBase *CB, bool &ModifiedDT); bool optimizeExt(Instruction *&I); bool optimizeExtUses(Instruction *I); bool optimizeLoadExt(LoadInst *Load); @@ -1883,35 +1883,36 @@ return true; } -bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { - BasicBlock *BB = CI->getParent(); +bool CodeGenPrepare::optimizeCallBase(CallBase *CB, bool &ModifiedDT) { + BasicBlock *BB = CB->getParent(); // Lower inline assembly if we can. // If we found an inline asm expession, and if the target knows how to // lower it to normal LLVM code, do so now. - if (isa(CI->getCalledValue())) { - if (TLI->ExpandInlineAsm(CI)) { - // Avoid invalidating the iterator. - CurInstIterator = BB->begin(); - // Avoid processing instructions out of order, which could cause - // reuse before a value is defined. - SunkAddrs.clear(); - return true; + if (auto *CI = dyn_cast(CB)) + if (isa(CI->getCalledValue())) { + if (TLI->ExpandInlineAsm(CI)) { + // Avoid invalidating the iterator. + CurInstIterator = BB->begin(); + // Avoid processing instructions out of order, which could cause + // reuse before a value is defined. + SunkAddrs.clear(); + return true; + } + // Sink address computing for memory operands into the block. + if (optimizeInlineAsmInst(CI)) + return true; } - // Sink address computing for memory operands into the block. - if (optimizeInlineAsmInst(CI)) - return true; - } // Align the pointer arguments to this call if the target thinks it's a good // idea unsigned MinSize, PrefAlign; - if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { - for (auto &Arg : CI->arg_operands()) { + if (TLI->shouldAlignPointerArgs(CB, MinSize, PrefAlign)) { + for (auto &Arg : CB->arg_operands()) { // We want to align both objects whose address is used directly and // objects whose address is used in casts and GEPs, though it only makes - // sense for GEPs if the offset is a multiple of the desired alignment and - // if size - offset meets the size threshold. + // sense for GEPs if the offset is a multiple of the desired alignment + // and if size - offset meets the size threshold. if (!Arg->getType()->isPointerTy()) continue; APInt Offset(DL->getIndexSizeInBits( @@ -1919,7 +1920,7 @@ 0); Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); uint64_t Offset2 = Offset.getLimitedValue(); - if ((Offset2 & (PrefAlign-1)) != 0) + if ((Offset2 & (PrefAlign - 1)) != 0) continue; AllocaInst *AI; if ((AI = dyn_cast(Val)) && AI->getAlignment() < PrefAlign && @@ -1932,13 +1933,12 @@ GlobalVariable *GV; if ((GV = dyn_cast(Val)) && GV->canIncreaseAlignment() && GV->getPointerAlignment(*DL) < PrefAlign && - DL->getTypeAllocSize(GV->getValueType()) >= - MinSize + Offset2) + DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2) GV->setAlignment(MaybeAlign(PrefAlign)); } // If this is a memcpy (or similar) then we may be able to improve the // alignment - if (MemIntrinsic *MI = dyn_cast(CI)) { + if (MemIntrinsic *MI = dyn_cast(CB)) { unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL); if (DestAlign > MI->getDestAlignment()) MI->setDestAlignment(DestAlign); @@ -1954,16 +1954,16 @@ // cold block. This interacts with our handling for loads and stores to // ensure that we can fold all uses of a potential addressing computation // into their uses. TODO: generalize this to work over profiling data - if (CI->hasFnAttr(Attribute::Cold) && - !OptSize && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) - for (auto &Arg : CI->arg_operands()) { + if (CB->hasFnAttr(Attribute::Cold) && !OptSize && + !llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) + for (auto &Arg : CB->arg_operands()) { if (!Arg->getType()->isPointerTy()) continue; unsigned AS = Arg->getType()->getPointerAddressSpace(); - return optimizeMemoryInst(CI, Arg, Arg->getType(), AS); + return optimizeMemoryInst(CB, Arg, Arg->getType(), AS); } - IntrinsicInst *II = dyn_cast(CI); + auto *II = dyn_cast(CB); if (II) { switch (II->getIntrinsicID()) { default: break; @@ -1982,7 +1982,7 @@ } Constant *RetVal = ConstantInt::getTrue(II->getContext()); resetIteratorIfInvalidatedWhileCalling(BB, [&]() { - replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); + replaceAndRecursivelySimplify(CB, RetVal, TLInfo, nullptr); }); return true; } @@ -1992,12 +1992,12 @@ llvm_unreachable("llvm.is.constant.* should have been lowered already"); case Intrinsic::aarch64_stlxr: case Intrinsic::aarch64_stxr: { - ZExtInst *ExtVal = dyn_cast(CI->getArgOperand(0)); + ZExtInst *ExtVal = dyn_cast(CB->getArgOperand(0)); if (!ExtVal || !ExtVal->hasOneUse() || - ExtVal->getParent() == CI->getParent()) + ExtVal->getParent() == CB->getParent()) return false; // Sink a zext feeding stlxr/stxr before it, so it can be folded into it. - ExtVal->moveBefore(CI); + ExtVal->moveBefore(CB); // Mark this instruction as "inserted by CGP", so that other // optimizations don't touch it. InsertedInsts.insert(ExtVal); @@ -2057,18 +2057,21 @@ } // From here on out we're working with named functions. - if (!CI->getCalledFunction()) return false; + if (!CB->getCalledFunction()) + return false; // Lower all default uses of _chk calls. This is very similar // to what InstCombineCalls does, but here we are only lowering calls // to fortified library functions (e.g. __memcpy_chk) that have the default // "don't know" as the objectsize. Anything else should be left alone. - FortifiedLibCallSimplifier Simplifier(TLInfo, true); - IRBuilder<> Builder(CI); - if (Value *V = Simplifier.optimizeCall(CI, Builder)) { - CI->replaceAllUsesWith(V); - CI->eraseFromParent(); - return true; + if (auto *CI = dyn_cast(CB)) { + FortifiedLibCallSimplifier Simplifier(TLInfo, true); + IRBuilder<> Builder(CB); + if (Value *V = Simplifier.optimizeCall(CI, Builder)) { + CI->replaceAllUsesWith(V); + CI->eraseFromParent(); + return true; + } } return false; @@ -4537,13 +4540,12 @@ /// Check to see if all uses of OpVal by the specified inline asm call are due /// to memory operands. If so, return true, otherwise return false. -static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, +static bool IsOperandAMemoryOperand(CallBase *CB, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI) { - const Function *F = CI->getFunction(); - TargetLowering::AsmOperandInfoVector TargetConstraints = - TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, - ImmutableCallSite(CI)); + const Function *F = CB->getFunction(); + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( + F->getParent()->getDataLayout(), &TRI, ImmutableCallSite(CB)); for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; @@ -4620,21 +4622,19 @@ continue; } - if (CallInst *CI = dyn_cast(UserI)) { - if (CI->hasFnAttr(Attribute::Cold)) { + if (CallBase *CB = dyn_cast(UserI)) { + if (CB->hasFnAttr(Attribute::Cold)) { // If this is a cold call, we can sink the addressing calculation into - // the cold path. See optimizeCallInst - bool OptForSize = OptSize || - llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); - if (!OptForSize) + // the cold path. See optimizeCallBase. + if (!(OptSize || shouldOptimizeForSize(CB->getParent(), PSI, BFI))) continue; } - InlineAsm *IA = dyn_cast(CI->getCalledValue()); + InlineAsm *IA = dyn_cast(CB->getCalledValue()); if (!IA) return true; // If this is a memory operand, we're cool, otherwise bail out. - if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI)) + if (!IsOperandAMemoryOperand(CB, IA, I, TLI, TRI)) return true; continue; } @@ -5186,13 +5186,13 @@ /// If there are any memory operands, use OptimizeMemoryInst to sink their /// address computing into the block when possible / profitable. -bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { +bool CodeGenPrepare::optimizeInlineAsmInst(CallBase *CB) { bool MadeChange = false; const TargetRegisterInfo *TRI = - TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo(); + TM->getSubtargetImpl(*CB->getFunction())->getRegisterInfo(); TargetLowering::AsmOperandInfoVector TargetConstraints = - TLI->ParseConstraints(*DL, TRI, CS); + TLI->ParseConstraints(*DL, TRI, ImmutableCallSite(CB)); unsigned ArgNo = 0; for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; @@ -5202,8 +5202,8 @@ if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { - Value *OpVal = CS->getArgOperand(ArgNo++); - MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u); + Value *OpVal = CB->getArgOperand(ArgNo++); + MadeChange |= optimizeMemoryInst(CB, OpVal, OpVal->getType(), ~0u); } else if (OpInfo.Type == InlineAsm::isInput) ArgNo++; } @@ -7245,7 +7245,8 @@ case Instruction::AShr: return optimizeShiftInst(cast(I)); case Instruction::Call: - return optimizeCallInst(cast(I), ModifiedDT); + case Instruction::CallBr: + return optimizeCallBase(cast(I), ModifiedDT); case Instruction::Select: return optimizeSelectInst(cast(I)); case Instruction::ShuffleVector: diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -515,7 +515,7 @@ return true; } - bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, + bool shouldAlignPointerArgs(CallBase *CB, unsigned &MinSize, unsigned &PrefAlign) const override; /// createFastISel - This method returns a target specific FastISel object, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1770,9 +1770,9 @@ // memcpy, and other memory intrinsics, typically tries to use LDM/STM if the // source/dest is aligned and the copy size is large enough. We therefore want // to align such objects passed to memory intrinsics. -bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, +bool ARMTargetLowering::shouldAlignPointerArgs(CallBase *CB, unsigned &MinSize, unsigned &PrefAlign) const { - if (!isa(CI)) + if (!isa(CB)) return false; MinSize = 8; // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1