diff --git a/llvm/include/llvm/CodeGen/MachineConstantPool.h b/llvm/include/llvm/CodeGen/MachineConstantPool.h --- a/llvm/include/llvm/CodeGen/MachineConstantPool.h +++ b/llvm/include/llvm/CodeGen/MachineConstantPool.h @@ -19,6 +19,7 @@ #include "llvm/MC/SectionKind.h" #include "llvm/Support/Alignment.h" #include +#include #include namespace llvm { @@ -146,6 +147,17 @@ return Constants; } + bool removeConstantsFromPool(std::set CPI) { + bool RemovedSomething = false; + // CPI should contain a sorted list of constant pool indices to be removed. + // We traverse it backwards so that we do not delete the wrong entries. + for (auto it = CPI.crbegin(); it != CPI.crend(); ++it) { + Constants.erase(Constants.begin() + (*it)); + RemovedSomething = true; + } + return RemovedSomething; + } + /// print - Used by the MachineFunction printer to print information about /// constant pool objects. Implemented in MachineFunction.cpp void print(raw_ostream &OS) const; diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -41,6 +41,7 @@ PPCMachineFunctionInfo.cpp PPCMachineScheduler.cpp PPCMacroFusion.cpp + PPCMergeConstPoolEntries.cpp PPCMIPeephole.cpp PPCRegisterInfo.cpp PPCSubtarget.cpp diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -53,6 +53,7 @@ FunctionPass *createPPCPreEmitPeepholePass(); FunctionPass *createPPCExpandAtomicPseudoPass(); FunctionPass *createPPCCTRLoopsPass(); + ModulePass *createPPCMergeConstPoolPass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP); bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, @@ -78,6 +79,7 @@ void initializePPCExpandAtomicPseudoPass(PassRegistry &); void initializePPCCTRLoopsPass(PassRegistry &); void initializePPCDAGToDAGISelPass(PassRegistry &); + void initializePPCMergeConstPoolPass(PassRegistry &); extern char &PPCVSXFMAMutateID; diff --git a/llvm/lib/Target/PowerPC/PPCMergeConstPoolEntries.cpp b/llvm/lib/Target/PowerPC/PPCMergeConstPoolEntries.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCMergeConstPoolEntries.cpp @@ -0,0 +1,519 @@ +//===--------- PPCMergeConstPoolEntries.cpp - Merge Const Pool Entries ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// On PowerPC each entry in the Constant Pool requires one TOC entry. +// This pass tries to merge the Constant Pool Entries so that we avoid having +// one TOC entry for each Constant Pool Entry. This can be done on the module +// level or on the function level. The goal of the merge is to have a single +// global struct that contains all of the constants in the constant pool and +// then access them as the base address of the struct plus an offset. +// +// TODO: Implement the function level merging. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCInstrInfo.h" +#include "PPCSubtarget.h" +#include "PPCTargetMachine.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "ppc-constant-pool-merge" + +enum class MergeType { NOMERGE, FUNCTION, MODULE }; +static cl::opt MergeTypeFormat( + "ppc-const-pool-merge", cl::desc("Constant Pool Merge Type"), + cl::values(clEnumValN(MergeType::NOMERGE, "none", + "Do not merge the constant pool."), + clEnumValN(MergeType::FUNCTION, "function", + "Merge constant pool in each function."), + clEnumValN(MergeType::MODULE, "module", + "Merge constant pool for the entire module. "))); + +static cl::opt + MaxConstPoolReplace("max-const-pool-replace", cl::init(-1), cl::Hidden, + cl::desc("Limit the number of constants that can be " + "replaced when merging the constant pool.")); + +// Note: We will merge the constants if either of the two minimum conditions +// are met. If we have at least MinConstsToMerge OR if we have at least +// MinFunctionsToMerge we will merge the constant pool. + +// When merging the constant pool we check the number of constants we have +// collected. We need to have at least MinConstsToMerge in order to consider +// merging the constant pool. +static cl::opt MinConstsToMerge( + "min-consts-to-merge", cl::init(2), cl::Hidden, + cl::desc("Merge if we have at least this many unique constants.")); + +// When merging the constant pool we check the number of function that have +// constants to merge. +static cl::opt + MinFunctionsToMerge("min-funcs-to-merge", cl::init(2), cl::Hidden, + cl::desc("Merge if we have at least this many " + "functions with valid constants to merge.")); + +namespace { + +class PPCMergeConstPool : public ModulePass { +public: + static char ID; + + // Info that we want to maintain for each constant. + struct ConstantInfo { + Align Alignment; + std::vector UsesOfConst; + }; + + // When merging elements we also need to keep track of the offset into the + // merged struct as well as which merged struct to use. + struct MergedElementInfo { + Align Alignment; + std::vector UsesOfConst; + uint64_t OffsetIntoStruct; + uint64_t StructIndex; + }; + + PPCMergeConstPool() : ModulePass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + ModulePass::getAnalysisUsage(AU); + } + + bool runOnModule(Module &M) override { + MM = &M; + MMI = &getAnalysis().getMMI(); + TargetPassConfig &TPC = getAnalysis(); + + PPCTM = &TPC.getTM(); + + // This pass is AIX only. + if (!PPCTM->isAIX()) + return false; + + switch (MergeTypeFormat) { + case MergeType::NOMERGE: + return false; + case MergeType::FUNCTION: + return mergeConstantsInFunction(); + case MergeType::MODULE: + return mergeConstantsInModule(); + } + } + +private: + Module *MM; + MachineModuleInfo *MMI; + PPCTargetMachine const *PPCTM; + + // The map ModuleEntriesList collects all of the constants that are to be + // merged. + // The map CPIToUpdate contains all of the constants that will not be + // merged. These must still be tracked as their constant pool index may + // change. + // Every constant in this module will be in exactly one of these two maps. + std::map ModuleEntriesList; + std::map CPIToUpdate; + + bool mergeConstantsInFunction(); + bool mergeConstantsInModule(); + Constant *getEditableConstant(const Constant *CVConst); + void fixupRemainingCPI(); + bool mergeModuleConstantsFromList(); +}; + +bool PPCMergeConstPool::mergeConstantsInFunction() { + bool MadeChange = false; + LLVM_DEBUG(dbgs() << "PPCMergeConstPool: Function Level Merge.\n"); + + // TODO: Add the feature to merge constants on a per-function basis. + errs() << ("PPCMergeConstPool: Function merge has not yet been implemented."); + + return MadeChange; +} + +bool PPCMergeConstPool::mergeConstantsInModule() { + bool MadeChange = false; + LLVM_DEBUG(dbgs() << "PPCMergeConstPool: Module Level Merge.\n"); + LLVM_DEBUG(dbgs() << "PPCMergeConstPool: Module: " << MM->getName() << "\n"); + + // Keep a set of unique functions that have at least one constant that is to + // be merged. This is used as a heuristic to determine if this module should + // merge the selected constants in the constant pool. + std::set FunctionsWithAtLeastOneMerge; + for (Function &F : *MM) { + if (F.empty()) + continue; + + MachineFunction *MF = MMI->getMachineFunction(F); + + assert(MF && "Expected all machine functions to exist at this point."); + + const MachineConstantPool *MCP = MF->getConstantPool(); + + // No need to look at functions with an empty Constant Pool. + if (!MCP || MCP->isEmpty()) + continue; + + const std::vector &MCPVec = MCP->getConstants(); + LLVM_DEBUG(dbgs() << "Module: " << MM->getName() + << " Function: " << MF->getName() + << " Const Pool Size: " << MCPVec.size() << "\n"); + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB.instrs()) { + + // The only instruction that should have a const pool is either + // a load or an ADDIS from toc. All others can be ignored. + if (!MI.mayLoad() && MI.getOpcode() != PPC::ADDIStocHA && + MI.getOpcode() != PPC::ADDIStocHA8) + continue; + + for (MachineOperand &MOP : MI.operands()) { + // We only care about operands that are Constant Pool Indices. + if (!MOP.isCPI()) + continue; + + // We cannot edit existing constants. This is an issue because in + // order to merge all of the constants in the constant pool we need + // to edit the existing constants. Therefore, we have to copy the + // old constant data to a new constant that is editable and then + // delete the old constant later. + int CPI = MOP.getIndex(); + const Constant *CVConst = MCPVec[CPI].Val.ConstVal; + Constant *CV = getEditableConstant(CVConst); + if (!CV || (ModuleEntriesList.size() >= MaxConstPoolReplace && + ModuleEntriesList.find(CV) == ModuleEntriesList.end())) { + // If we're not merging this constant, place it in the CPIToUpdate + // map as its index in the constant pool will likely change and all + // the CPI operands that refer to it need to be updated. + CPIToUpdate[CVConst].Alignment = MCPVec[CPI].getAlign(); + CPIToUpdate[CVConst].UsesOfConst.push_back(&MOP); + } else { + // We have a constant we want to replace. + ModuleEntriesList[CV].Alignment = MCPVec[CPI].getAlign(); + ModuleEntriesList[CV].UsesOfConst.push_back(&MOP); + ModuleEntriesList[CV].OffsetIntoStruct = -1; + ModuleEntriesList[CV].StructIndex = -1; + FunctionsWithAtLeastOneMerge.insert(MF); + } + } + } + } + } + + if (ModuleEntriesList.size() >= MinConstsToMerge || + FunctionsWithAtLeastOneMerge.size() >= MinFunctionsToMerge) { + mergeModuleConstantsFromList(); + MadeChange = true; + } + + // If we have made a change then we have deleted a constant from the + // constant pool. This means that the constant pool index for the remaining + // CPI instructions need to be updated. + if (MadeChange && !CPIToUpdate.empty()) + fixupRemainingCPI(); + + return MadeChange; +} + +// This pass may not merge all of the constants from the constant pool. For +// those constants that will not be merged we need to go through each of them +// and update their constant pool index. The index needs to be updated because +// constants that are merged will be removed from the constant pool. Constants +// that are removed may change the index of a remaining constant. For example +// if we have a constant at index 1 that is removed all constants with indices +// that are greater than 1 will be renumbered. +void PPCMergeConstPool::fixupRemainingCPI() { + assert(ModuleEntriesList.empty() && + "Expected the map of merged constants to be empty at this point."); + for (auto const &Entry : CPIToUpdate) { + for (MachineOperand *Operand : Entry.second.UsesOfConst) { + MachineFunction *MF = Operand->getParent()->getMF(); + MachineConstantPool *MCP = MF->getConstantPool(); + unsigned CPI = + MCP->getConstantPoolIndex(Entry.first, Entry.second.Alignment); + Operand->setIndex(CPI); + } + } +} + +// Get a copy of the CVConst that is not marked as "const". +// This is required because the creation of a constant struct using +// ConstantSctruct::getAnon(..) requires ArrayRef and not +// ArrayRef. +Constant *PPCMergeConstPool::getEditableConstant(const Constant *CVConst) { + Constant *CV = nullptr; + if (const ConstantInt *CInt = dyn_cast(CVConst)) + CV = ConstantInt::get(CInt->getContext(), CInt->getValue()); + else if (const ConstantFP *CFP = dyn_cast(CVConst)) + CV = ConstantFP::get(CFP->getContext(), CFP->getValue()); + else if (const ConstantDataVector *CVec = + dyn_cast(CVConst)) + CV = ConstantDataVector::getRaw(CVec->getRawDataValues(), + CVec->getNumElements(), + CVec->getElementType()); + else if (const ConstantDataArray *CArr = + dyn_cast(CVConst)) + CV = ConstantDataArray::getRaw(CArr->getRawDataValues(), + CArr->getNumElements(), + CArr->getElementType()); + return CV; +} + +bool PPCMergeConstPool::mergeModuleConstantsFromList() { + const DataLayout &DL = MM->getDataLayout(); + bool Is64Bit = PPCTM->isPPC64(); + const TargetRegisterClass *GPRNoZero = Is64Bit + ? &PPC::G8RC_and_G8RC_NOX0RegClass + : &PPC::GPRC_and_GPRC_NOR0RegClass; + + std::vector KeyVec; + std::vector MergedPools; + // Offset in bytes into the new struct global variable. + unsigned ByteOffsetInStruct = 0; + // Index into which group of merged Constants + uint64_t IndexOfMergedPool = 0; + for (auto &ConstVal : ModuleEntriesList) { + unsigned AlignValue = ConstVal.second.Alignment.value(); + // Pad the offset so that it aligns with the required Alignment. + uint64_t AlignmentRemainder = ByteOffsetInStruct % AlignValue; + if (AlignmentRemainder) { + uint64_t Padding = AlignValue - AlignmentRemainder; + ByteOffsetInStruct += Padding; + } + + // Check if we have filled up this pool. Start a new one if we have. + if (ByteOffsetInStruct > 0x7FFF) { + Constant *AnonStruct = ConstantStruct::getAnon(KeyVec); + + // The GlobalVariable constructor calls MM->insertGlobalVariable(G). + GlobalVariable *G = new GlobalVariable( + *MM, AnonStruct->getType(), + /* isConstant */ true, GlobalValue::PrivateLinkage, AnonStruct, + "__ModuleConstantPool" + std::to_string(IndexOfMergedPool)); + + // The machine operand for the global variable that contains the struct. + MachineOperand CPOperand = MachineOperand::CreateGA(G, 0); + MergedPools.push_back(CPOperand); + IndexOfMergedPool++; + ByteOffsetInStruct = 0; + KeyVec.clear(); + } + ConstVal.second.OffsetIntoStruct = ByteOffsetInStruct; + ConstVal.second.StructIndex = IndexOfMergedPool; + KeyVec.push_back(ConstVal.first); + Constant *C = ConstVal.first; + Type *ElemType = C->getType(); + ByteOffsetInStruct += DL.getTypeSizeInBits(ElemType) / 8; + } + + Constant *AnonStruct = ConstantStruct::getAnon(KeyVec); + + // The GlobalVariable constructor calls MM->insertGlobalVariable(G). + GlobalVariable *G = new GlobalVariable( + *MM, AnonStruct->getType(), + /* isConstant */ true, GlobalValue::PrivateLinkage, AnonStruct, + "__ModuleConstantPool" + std::to_string(IndexOfMergedPool)); + + // The machine operand for the global variable that contains the struct. + MachineOperand CPOperand = MachineOperand::CreateGA(G, 0); + MergedPools.push_back(CPOperand); + + LLVM_DEBUG(dbgs() << "mergeConstantsForModule: Replacing " + << ModuleEntriesList.size() + << " constant pool entries with one entry.\n"); + LLVM_DEBUG(AnonStruct->dump()); + + std::map> ConstsToDelete; + for (auto const &ConstVal : ModuleEntriesList) { + Constant *C = ConstVal.first; + + assert(ConstVal.second.OffsetIntoStruct <= 0x7FFF && + "Merged constant pool too large."); + + LLVM_DEBUG(dbgs() << "Constant Offset: " << ConstVal.second.OffsetIntoStruct + << "\n"); + LLVM_DEBUG(dbgs() << "For Constant: "); + LLVM_DEBUG(C->dump()); + + // On AIX the constant is loaded by first loading the address of the + // constant and then by loading the actual value of the constant from + // that address. Since we are changing this base address we need to + // loop through all of the uses of that first load and potentially + // add an offset. + for (MachineOperand *MOUse : ConstVal.second.UsesOfConst) { + MachineInstr *MI = MOUse->getParent(); + const DebugLoc &DL = MI->getDebugLoc(); + unsigned MIOpcode = MI->getOpcode(); + MachineOperand &Op0 = MI->getOperand(0); + MachineFunction *MF = MI->getMF(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + Register ResultReg = Op0.getReg(); + const PPCSubtarget &Subtarget = MF->getSubtarget(); + const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); + + LLVM_DEBUG( + dbgs() << "mergeConstantsForModule: Replacing this instruction:\n"); + LLVM_DEBUG(MI->dump()); + + switch (MIOpcode) { + case PPC::ADDIStocHA8: + case PPC::ADDIStocHA: { + // These instructions are the High bits of the GOT address so + // we do not need to look ahead. There is another TOC + // instruction that will follow it later. + MI->getOperand(2).ChangeToGA(G, 0); + break; + } + case PPC::LDtocCPT: + // We need to change the opcode in order to get the correct + // relocation. + MIOpcode = PPC::LDtoc; + LLVM_FALLTHROUGH; + case PPC::LDtocL: + case PPC::LWZtoc: + case PPC::LWZtocL: { + // Need to create a new destination register that is a GPR but not + // R0. The reason for this is that this register will potentially + // be used as the first input register to an ADDI instruction + // where R0 is treated as a zero and not a register. + Register LoadDestReg = RegInfo.createVirtualRegister(GPRNoZero); + MachineInstrBuilder Builder = + BuildMI(*MI->getParent(), MI, DL, TII.get(MIOpcode), LoadDestReg); + Builder.add(MergedPools[ConstVal.second.StructIndex]); + Builder.add(MI->getOperand(2)); + + LLVM_DEBUG(dbgs() << "Replaced TOC MI with:"); + LLVM_DEBUG(Builder->dump()); + + // Need to look at all the uses of this constant and add the offset. + // Before merging the constant pool each constant had its own address + // and so the offset was always zero. Now that we are merging the + // contants this offset may be some positive offset greater than zero. + // In order to correctly address the constant in the merged pool we + // may be required to insert an ADDI or an LI. + std::vector OperandsUsingReg; + for (MachineOperand &MOp : RegInfo.use_operands(ResultReg)) + OperandsUsingReg.push_back(&MOp); + + for (MachineOperand *MOp : OperandsUsingReg) { + MachineInstr *MIUse = MOp->getParent(); + LLVM_DEBUG( + dbgs() + << "mergeConstantsForModule: Looking at CP address use:\n"); + LLVM_DEBUG(MIUse->dump()); + + if (!MIUse->mayLoad()) { + // If this machine instr is not a load we cannot assume anything. + // We have to use an ADD to simply add the offset for the new + // location of the constant. + Register TmpDestReg = RegInfo.createVirtualRegister(GPRNoZero); + BuildMI(*MI->getParent(), MI, DL, + TII.get(Is64Bit ? PPC::ADDI8 : PPC::ADDI), TmpDestReg) + .addUse(LoadDestReg) + .addImm(ConstVal.second.OffsetIntoStruct); + + LLVM_DEBUG(dbgs() << "Replacing register use for: "); + LLVM_DEBUG(MIUse->dump()); + MOp->setReg(TmpDestReg); + LLVM_DEBUG(dbgs() << "Replaced with: "); + LLVM_DEBUG(MIUse->dump()); + } else if (TII.isXFormMemOp(MIUse->getOpcode())) { + // We are expecting an X-Form Load here. + // `ResReg = LOAD Reg1, Reg2`. + // If Reg1 is zero then we can replace it with the result from + // the instruction `Reg1 = LI Offset`. + MachineOperand &OffsetOp = MIUse->getOperand(1); + MachineOperand &BaseAddrOp = MIUse->getOperand(2); + assert(OffsetOp.isReg() && "Expected a register as operand 1."); + assert(BaseAddrOp.isReg() && "Expected a register as operand 2."); + + Register TmpDestReg = RegInfo.createVirtualRegister(GPRNoZero); + LLVM_DEBUG(dbgs() << "Replacing register use for:\n"); + LLVM_DEBUG(MIUse->dump()); + bool IsZero = + (OffsetOp.getReg() == (Is64Bit ? PPC::ZERO8 : PPC::ZERO)); + unsigned AddOpcode = IsZero ? (Is64Bit ? PPC::LI8 : PPC::LI) + : (Is64Bit ? PPC::ADDI8 : PPC::ADDI); + MachineInstrBuilder AddBuilder = BuildMI( + *MI->getParent(), MI, DL, TII.get(AddOpcode), TmpDestReg); + if (!IsZero) + AddBuilder.addUse(LoadDestReg); + AddBuilder.addImm(ConstVal.second.OffsetIntoStruct); + + if (IsZero) { + OffsetOp.setReg(TmpDestReg); + MOp->setReg(LoadDestReg); + } else + MOp->setReg(TmpDestReg); + + LLVM_DEBUG(dbgs() << "Replaced with: "); + LLVM_DEBUG(MIUse->dump()); + } else { + // We are expecting this to be a D-Form load of the form: + // ResReg = LOAD Imm(Reg). + MachineOperand &OffsetOp = MIUse->getOperand(1); + MachineOperand &BaseAddrOp = MIUse->getOperand(2); + assert(BaseAddrOp.isReg() && BaseAddrOp.getReg() == ResultReg && + "Base reg should be the address of the original constant"); + assert(OffsetOp.isImm() && OffsetOp.getImm() == 0 && + "Expected a displacement of zero"); + OffsetOp.setImm(ConstVal.second.OffsetIntoStruct); + BaseAddrOp.setReg(LoadDestReg); + } + } + MI->eraseFromParent(); + break; + } + default: + llvm_unreachable("Unexpected constant pool instruction."); + } + + ConstsToDelete[MF].insert(MF->getConstantPool()->getConstantPoolIndex( + C, ConstVal.second.Alignment)); + } + } + + // Loop through the map of constants to delete and move function by function. + // In each function delete the constants that have been merged. + for (auto const &Element : ConstsToDelete) + Element.first->getConstantPool()->removeConstantsFromPool(Element.second); + + // Delete all of the constants that have been merged from the + // ModuleEntriesList. The map should be empty after this loop. + ModuleEntriesList.clear(); + + return true; +} + +} // namespace + +INITIALIZE_PASS(PPCMergeConstPool, DEBUG_TYPE, "PowerPC Merge Constant Pool", + false, false) +char PPCMergeConstPool::ID = 0; + +ModulePass *llvm::createPPCMergeConstPoolPass() { + return new PPCMergeConstPool(); +} diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/llvm/lib/Target/PowerPC/PPCTargetMachine.h --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -68,6 +68,7 @@ const Triple &TT = getTargetTriple(); return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le); }; + bool isAIX() const { return getTargetTriple().isOSAIX(); } bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Addrspacecasts are always noops. diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -137,6 +137,7 @@ initializeGlobalISel(PR); initializePPCCTRLoopsPass(PR); initializePPCDAGToDAGISelPass(PR); + initializePPCMergeConstPoolPass(PR); } static bool isLittleEndianTriple(const Triple &T) { @@ -536,6 +537,7 @@ initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, &PPCVSXFMAMutateID); + addPass(createPPCMergeConstPoolPass()); } // FIXME: We probably don't need to run these for -fPIE. diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -131,12 +131,18 @@ ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: PowerPC MI Peephole Optimization ; CHECK-NEXT: Remove dead machine instructions +; CHECK-NEXT: PowerPC Merge Constant Pool +; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Remove unreachable machine basic blocks ; CHECK-NEXT: Live Variable Analysis +; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Slot index numbering ; CHECK-NEXT: Live Interval Analysis ; CHECK-NEXT: PowerPC TLS Dynamic Call Fixup ; CHECK-NEXT: PowerPC TOC Register Dependencies +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Slot index numbering diff --git a/llvm/test/CodeGen/PowerPC/ppc-aix-const-pool-merge.ll b/llvm/test/CodeGen/PowerPC/ppc-aix-const-pool-merge.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc-aix-const-pool-merge.ll @@ -0,0 +1,682 @@ +; NOTE Please do NOT auto generate this file. The DATA and DATA64 checks will +; not be generated correctly. + +; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 \ +; RUN: --ppc-const-pool-merge=module -ppc-asm-full-reg-names < %s | \ +; RUN: FileCheck %s --check-prefixes=SMALL32,DATA +; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 \ +; RUN: --ppc-const-pool-merge=module -ppc-asm-full-reg-names --code-model=large < %s | \ +; RUN: FileCheck %s --check-prefixes=LARGE32,DATA +; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 \ +; RUN: --ppc-const-pool-merge=module -ppc-asm-full-reg-names --code-model=small < %s | \ +; RUN: FileCheck %s --check-prefixes=SMALL32,DATA +; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 \ +; RUN: --ppc-const-pool-merge=module -ppc-asm-full-reg-names --code-model=large < %s | \ +; RUN: FileCheck %s --check-prefixes=LARGE64,DATA64 +; RUN: llc -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 \ +; RUN: --ppc-const-pool-merge=module -ppc-asm-full-reg-names --code-model=small < %s | \ +; RUN: FileCheck %s --check-prefixes=SMALL64,DATA64 + + +@.str = private unnamed_addr constant [8 x i8] c"ABCDEFG\00", align 1 +@__const.testA.A = private unnamed_addr constant [4 x i32] [i32 34, i32 623, i32 888, i32 888], align 4 + +define double @testd1() local_unnamed_addr { +; SMALL32-LABEL: testd1: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: lwz r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL32-NEXT: lfd f1, 0(r3) +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: testd1: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r3, L..C0@l(r3) +; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: blr +; +; LARGE64-LABEL: testd1: +; LARGE64: # %bb.0: # %entry +; LARGE64-NEXT: addis r3, L..C0@u(r2) +; LARGE64-NEXT: ld r3, L..C0@l(r3) +; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: blr +; +; SMALL64-LABEL: testd1: +; SMALL64: # %bb.0: # %entry +; SMALL64-NEXT: ld r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: blr +entry: + ret double 3.784320e+02 +} + +define float @testf1() local_unnamed_addr { +; SMALL32-LABEL: testf1: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: lwz r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL32-NEXT: lfs f1, 8(r3) +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: testf1: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r3, L..C0@l(r3) +; LARGE32-NEXT: lfs f1, 8(r3) +; LARGE32-NEXT: blr +; +; LARGE64-LABEL: testf1: +; LARGE64: # %bb.0: # %entry +; LARGE64-NEXT: addis r3, L..C0@u(r2) +; LARGE64-NEXT: ld r3, L..C0@l(r3) +; LARGE64-NEXT: lfs f1, 8(r3) +; LARGE64-NEXT: blr +; +; SMALL64-LABEL: testf1: +; SMALL64: # %bb.0: # %entry +; SMALL64-NEXT: ld r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL64-NEXT: lfs f1, 8(r3) +; SMALL64-NEXT: blr +entry: + ret float 0x40039999A0000000 +} + +define double @testd2() local_unnamed_addr { +; SMALL32-LABEL: testd2: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: lwz r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL32-NEXT: lfd f1, 16(r3) +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: testd2: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r3, L..C0@l(r3) +; LARGE32-NEXT: lfd f1, 16(r3) +; LARGE32-NEXT: blr +; +; LARGE64-LABEL: testd2: +; LARGE64: # %bb.0: # %entry +; LARGE64-NEXT: addis r3, L..C0@u(r2) +; LARGE64-NEXT: ld r3, L..C0@l(r3) +; LARGE64-NEXT: lfd f1, 16(r3) +; LARGE64-NEXT: blr +; +; SMALL64-LABEL: testd2: +; SMALL64: # %bb.0: # %entry +; SMALL64-NEXT: ld r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL64-NEXT: lfd f1, 16(r3) +; SMALL64-NEXT: blr +entry: + ret double 6.920000e+00 +} + +define <4 x i32> @testv1() local_unnamed_addr { +; SMALL32-LABEL: testv1: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: lwz r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL32-NEXT: li r4, 32 +; SMALL32-NEXT: lxvw4x vs34, r4, r3 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: testv1: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: li r4, 32 +; LARGE32-NEXT: lwz r3, L..C0@l(r3) +; LARGE32-NEXT: lxvw4x vs34, r4, r3 +; LARGE32-NEXT: blr +; +; LARGE64-LABEL: testv1: +; LARGE64: # %bb.0: # %entry +; LARGE64-NEXT: addis r3, L..C0@u(r2) +; LARGE64-NEXT: li r4, 32 +; LARGE64-NEXT: ld r3, L..C0@l(r3) +; LARGE64-NEXT: lxvw4x vs34, r4, r3 +; LARGE64-NEXT: blr +; +; SMALL64-LABEL: testv1: +; SMALL64: # %bb.0: # %entry +; SMALL64-NEXT: ld r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL64-NEXT: li r4, 32 +; SMALL64-NEXT: lxvw4x vs34, r4, r3 +; SMALL64-NEXT: blr +entry: + ret <4 x i32> +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +; TODO: We can do better codegen here. For example in LARGE32 +; the following instructions: +; addis 3, L..C0@u(2) +; lwz 3, L..C0@l(3) +; are repeated three times. +; We should be able to common the three copies of +; identical instructions into one set. +define double @testd3() local_unnamed_addr { +; SMALL32-LABEL: testd3: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -64(r1) +; SMALL32-NEXT: stw r0, 72(r1) +; SMALL32-NEXT: lwz r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL32-NEXT: lwz r4, L..C0(r2) # @__ModuleConstantPool0 +; SMALL32-NEXT: lwz r5, L..C0(r2) # @__ModuleConstantPool0 +; SMALL32-NEXT: lfd f1, 48(r3) +; SMALL32-NEXT: lfd f2, 56(r4) +; SMALL32-NEXT: lfs f3, 64(r5) +; SMALL32-NEXT: bl .callee[PR] +; SMALL32-NEXT: nop +; SMALL32-NEXT: addi r1, r1, 64 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: testd3: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -64(r1) +; LARGE32-NEXT: stw r0, 72(r1) +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r3, L..C0@l(r3) +; LARGE32-NEXT: addis r4, L..C0@u(r2) +; LARGE32-NEXT: lwz r4, L..C0@l(r4) +; LARGE32-NEXT: addis r5, L..C0@u(r2) +; LARGE32-NEXT: lfd f1, 48(r3) +; LARGE32-NEXT: lwz r5, L..C0@l(r5) +; LARGE32-NEXT: lfd f2, 56(r4) +; LARGE32-NEXT: lfs f3, 64(r5) +; LARGE32-NEXT: bl .callee[PR] +; LARGE32-NEXT: nop +; LARGE32-NEXT: addi r1, r1, 64 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr +; +; LARGE64-LABEL: testd3: +; LARGE64: # %bb.0: # %entry +; LARGE64-NEXT: mflr r0 +; LARGE64-NEXT: stdu r1, -112(r1) +; LARGE64-NEXT: std r0, 128(r1) +; LARGE64-NEXT: addis r3, L..C0@u(r2) +; LARGE64-NEXT: addis r4, L..C0@u(r2) +; LARGE64-NEXT: addis r5, L..C0@u(r2) +; LARGE64-NEXT: ld r3, L..C0@l(r3) +; LARGE64-NEXT: ld r4, L..C0@l(r4) +; LARGE64-NEXT: ld r5, L..C0@l(r5) +; LARGE64-NEXT: lfd f1, 48(r3) +; LARGE64-NEXT: lfd f2, 56(r4) +; LARGE64-NEXT: lfs f3, 64(r5) +; LARGE64-NEXT: bl .callee[PR] +; LARGE64-NEXT: nop +; LARGE64-NEXT: addi r1, r1, 112 +; LARGE64-NEXT: ld r0, 16(r1) +; LARGE64-NEXT: mtlr r0 +; LARGE64-NEXT: blr +; +; SMALL64-LABEL: testd3: +; SMALL64: # %bb.0: # %entry +; SMALL64-NEXT: mflr r0 +; SMALL64-NEXT: stdu r1, -112(r1) +; SMALL64-NEXT: std r0, 128(r1) +; SMALL64-NEXT: ld r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL64-NEXT: ld r4, L..C0(r2) # @__ModuleConstantPool0 +; SMALL64-NEXT: ld r5, L..C0(r2) # @__ModuleConstantPool0 +; SMALL64-NEXT: lfd f1, 48(r3) +; SMALL64-NEXT: lfd f2, 56(r4) +; SMALL64-NEXT: lfs f3, 64(r5) +; SMALL64-NEXT: bl .callee[PR] +; SMALL64-NEXT: nop +; SMALL64-NEXT: addi r1, r1, 112 +; SMALL64-NEXT: ld r0, 16(r1) +; SMALL64-NEXT: mtlr r0 +; SMALL64-NEXT: blr +entry: + %call = tail call double @callee(double noundef 4.582600e+01, double noundef 0x40564F0A3D70A3D7, float noundef 0x402225E360000000) + ret double %call +} + +declare double @callee(double noundef, double noundef, float noundef) local_unnamed_addr + +define double @testd4() local_unnamed_addr { +; SMALL32-LABEL: testd4: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: lwz r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL32-NEXT: lfd f1, 16(r3) +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: testd4: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r3, L..C0@l(r3) +; LARGE32-NEXT: lfd f1, 16(r3) +; LARGE32-NEXT: blr +; +; LARGE64-LABEL: testd4: +; LARGE64: # %bb.0: # %entry +; LARGE64-NEXT: addis r3, L..C0@u(r2) +; LARGE64-NEXT: ld r3, L..C0@l(r3) +; LARGE64-NEXT: lfd f1, 16(r3) +; LARGE64-NEXT: blr +; +; SMALL64-LABEL: testd4: +; SMALL64: # %bb.0: # %entry +; SMALL64-NEXT: ld r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL64-NEXT: lfd f1, 16(r3) +; SMALL64-NEXT: blr +entry: + ret double 6.920000e+00 +} + +define i64 @testi1() local_unnamed_addr { +; SMALL32-LABEL: testi1: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: lis r3, -14366 +; SMALL32-NEXT: ori r4, r3, 53403 +; SMALL32-NEXT: li r3, 20802 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: testi1: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: lis r3, -14366 +; LARGE32-NEXT: ori r4, r3, 53403 +; LARGE32-NEXT: li r3, 20802 +; LARGE32-NEXT: blr +; +; LARGE64-LABEL: testi1: +; LARGE64: # %bb.0: # %entry +; LARGE64-NEXT: li r3, 10401 +; LARGE64-NEXT: rldic r3, r3, 33, 17 +; LARGE64-NEXT: oris r3, r3, 51170 +; LARGE64-NEXT: ori r3, r3, 53403 +; LARGE64-NEXT: blr +; +; SMALL64-LABEL: testi1: +; SMALL64: # %bb.0: # %entry +; SMALL64-NEXT: li r3, 10401 +; SMALL64-NEXT: rldic r3, r3, 33, 17 +; SMALL64-NEXT: oris r3, r3, 51170 +; SMALL64-NEXT: ori r3, r3, 53403 +; SMALL64-NEXT: blr +entry: + ret i64 89347263221915 +} + +define double @testld1() local_unnamed_addr { +; SMALL32-LABEL: testld1: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: lwz r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL32-NEXT: lfd f1, 72(r3) +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: testld1: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r3, L..C0@l(r3) +; LARGE32-NEXT: lfd f1, 72(r3) +; LARGE32-NEXT: blr +; +; LARGE64-LABEL: testld1: +; LARGE64: # %bb.0: # %entry +; LARGE64-NEXT: addis r3, L..C0@u(r2) +; LARGE64-NEXT: ld r3, L..C0@l(r3) +; LARGE64-NEXT: lfd f1, 72(r3) +; LARGE64-NEXT: blr +; +; SMALL64-LABEL: testld1: +; SMALL64: # %bb.0: # %entry +; SMALL64-NEXT: ld r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL64-NEXT: lfd f1, 72(r3) +; SMALL64-NEXT: blr +entry: + ret double 0x417179806D5CDEBE +} + +; Function Attrs: nounwind +define signext i32 @testS() local_unnamed_addr { +; SMALL32-LABEL: testS: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -64(r1) +; SMALL32-NEXT: lwz r3, L..C1(r2) # @.str +; SMALL32-NEXT: stw r0, 72(r1) +; SMALL32-NEXT: bl .calleeS[PR] +; SMALL32-NEXT: nop +; SMALL32-NEXT: addi r1, r1, 64 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: testS: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -64(r1) +; LARGE32-NEXT: stw r0, 72(r1) +; LARGE32-NEXT: addis r3, L..C1@u(r2) +; LARGE32-NEXT: lwz r3, L..C1@l(r3) +; LARGE32-NEXT: bl .calleeS[PR] +; LARGE32-NEXT: nop +; LARGE32-NEXT: addi r1, r1, 64 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr +; +; LARGE64-LABEL: testS: +; LARGE64: # %bb.0: # %entry +; LARGE64-NEXT: mflr r0 +; LARGE64-NEXT: stdu r1, -112(r1) +; LARGE64-NEXT: addis r3, L..C1@u(r2) +; LARGE64-NEXT: std r0, 128(r1) +; LARGE64-NEXT: ld r3, L..C1@l(r3) +; LARGE64-NEXT: bl .calleeS[PR] +; LARGE64-NEXT: nop +; LARGE64-NEXT: addi r1, r1, 112 +; LARGE64-NEXT: ld r0, 16(r1) +; LARGE64-NEXT: mtlr r0 +; LARGE64-NEXT: blr +; +; SMALL64-LABEL: testS: +; SMALL64: # %bb.0: # %entry +; SMALL64-NEXT: mflr r0 +; SMALL64-NEXT: stdu r1, -112(r1) +; SMALL64-NEXT: ld r3, L..C1(r2) # @.str +; SMALL64-NEXT: std r0, 128(r1) +; SMALL64-NEXT: bl .calleeS[PR] +; SMALL64-NEXT: nop +; SMALL64-NEXT: addi r1, r1, 112 +; SMALL64-NEXT: ld r0, 16(r1) +; SMALL64-NEXT: mtlr r0 +; SMALL64-NEXT: blr +entry: + %call = tail call signext i32 @calleeS(ptr noundef nonnull @.str) + ret i32 %call +} + +declare signext i32 @calleeS(ptr noundef) local_unnamed_addr + +define float @testA() local_unnamed_addr { +; SMALL32-LABEL: testA: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: mflr r0 +; SMALL32-NEXT: stwu r1, -80(r1) +; SMALL32-NEXT: lwz r3, L..C2(r2) # @__const.testA.A +; SMALL32-NEXT: stw r0, 88(r1) +; SMALL32-NEXT: lxvw4x vs0, 0, r3 +; SMALL32-NEXT: addi r3, r1, 64 +; SMALL32-NEXT: stxvw4x vs0, 0, r3 +; SMALL32-NEXT: bl .calleeA[PR] +; SMALL32-NEXT: nop +; SMALL32-NEXT: lwz r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL32-NEXT: lfd f0, 80(r3) +; SMALL32-NEXT: xsadddp f0, f1, f0 +; SMALL32-NEXT: xsrsp f1, f0 +; SMALL32-NEXT: addi r1, r1, 80 +; SMALL32-NEXT: lwz r0, 8(r1) +; SMALL32-NEXT: mtlr r0 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: testA: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: mflr r0 +; LARGE32-NEXT: stwu r1, -80(r1) +; LARGE32-NEXT: stw r0, 88(r1) +; LARGE32-NEXT: addis r3, L..C2@u(r2) +; LARGE32-NEXT: lwz r3, L..C2@l(r3) +; LARGE32-NEXT: lxvw4x vs0, 0, r3 +; LARGE32-NEXT: addi r3, r1, 64 +; LARGE32-NEXT: stxvw4x vs0, 0, r3 +; LARGE32-NEXT: bl .calleeA[PR] +; LARGE32-NEXT: nop +; LARGE32-NEXT: addis r3, L..C0@u(r2) +; LARGE32-NEXT: lwz r3, L..C0@l(r3) +; LARGE32-NEXT: lfd f0, 80(r3) +; LARGE32-NEXT: xsadddp f0, f1, f0 +; LARGE32-NEXT: xsrsp f1, f0 +; LARGE32-NEXT: addi r1, r1, 80 +; LARGE32-NEXT: lwz r0, 8(r1) +; LARGE32-NEXT: mtlr r0 +; LARGE32-NEXT: blr +; +; LARGE64-LABEL: testA: +; LARGE64: # %bb.0: # %entry +; LARGE64-NEXT: mflr r0 +; LARGE64-NEXT: stdu r1, -128(r1) +; LARGE64-NEXT: addis r3, L..C2@u(r2) +; LARGE64-NEXT: std r0, 144(r1) +; LARGE64-NEXT: ld r3, L..C2@l(r3) +; LARGE64-NEXT: lxvw4x vs0, 0, r3 +; LARGE64-NEXT: addi r3, r1, 112 +; LARGE64-NEXT: stxvw4x vs0, 0, r3 +; LARGE64-NEXT: bl .calleeA[PR] +; LARGE64-NEXT: nop +; LARGE64-NEXT: addis r3, L..C0@u(r2) +; LARGE64-NEXT: ld r3, L..C0@l(r3) +; LARGE64-NEXT: lfd f0, 80(r3) +; LARGE64-NEXT: xsadddp f0, f1, f0 +; LARGE64-NEXT: xsrsp f1, f0 +; LARGE64-NEXT: addi r1, r1, 128 +; LARGE64-NEXT: ld r0, 16(r1) +; LARGE64-NEXT: mtlr r0 +; LARGE64-NEXT: blr +; +; SMALL64-LABEL: testA: +; SMALL64: # %bb.0: # %entry +; SMALL64-NEXT: mflr r0 +; SMALL64-NEXT: stdu r1, -128(r1) +; SMALL64-NEXT: ld r3, L..C2(r2) # @__const.testA.A +; SMALL64-NEXT: std r0, 144(r1) +; SMALL64-NEXT: lxvw4x vs0, 0, r3 +; SMALL64-NEXT: addi r3, r1, 112 +; SMALL64-NEXT: stxvw4x vs0, 0, r3 +; SMALL64-NEXT: bl .calleeA[PR] +; SMALL64-NEXT: nop +; SMALL64-NEXT: ld r3, L..C0(r2) # @__ModuleConstantPool0 +; SMALL64-NEXT: lfd f0, 80(r3) +; SMALL64-NEXT: xsadddp f0, f1, f0 +; SMALL64-NEXT: xsrsp f1, f0 +; SMALL64-NEXT: addi r1, r1, 128 +; SMALL64-NEXT: ld r0, 16(r1) +; SMALL64-NEXT: mtlr r0 +; SMALL64-NEXT: blr +entry: + %A = alloca [4 x i32], align 4 + call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %A) + call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(16) %A, ptr noundef nonnull align 4 dereferenceable(16) @__const.testA.A, i64 16, i1 false) + %call = call float @calleeA(ptr noundef nonnull %A) + %conv = fpext float %call to double + %add = fadd double %conv, 6.836200e+00 + %conv1 = fptrunc double %add to float + call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %A) + ret float %conv1 +} + +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) +declare float @calleeA(ptr noundef) local_unnamed_addr + +define double @chooseDouble(i32 noundef signext %a) local_unnamed_addr { +; SMALL32-LABEL: chooseDouble: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: lwz r4, L..C0(r2) # @__ModuleConstantPool0 +; SMALL32-NEXT: cmpwi r3, 0 +; SMALL32-NEXT: li r3, 0 +; SMALL32-NEXT: li r5, 8 +; SMALL32-NEXT: iselgt r3, r5, r3 +; SMALL32-NEXT: addi r4, r4, 96 +; SMALL32-NEXT: lfdx f1, r4, r3 +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: chooseDouble: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: addis r4, L..C0@u(r2) +; LARGE32-NEXT: cmpwi r3, 0 +; LARGE32-NEXT: li r3, 0 +; LARGE32-NEXT: li r5, 8 +; LARGE32-NEXT: iselgt r3, r5, r3 +; LARGE32-NEXT: lwz r4, L..C0@l(r4) +; LARGE32-NEXT: addi r4, r4, 96 +; LARGE32-NEXT: lfdx f1, r4, r3 +; LARGE32-NEXT: blr +; +; LARGE64-LABEL: chooseDouble: +; LARGE64: # %bb.0: # %entry +; LARGE64-NEXT: addis r4, L..C0@u(r2) +; LARGE64-NEXT: cmpwi r3, 0 +; LARGE64-NEXT: li r3, 0 +; LARGE64-NEXT: li r5, 8 +; LARGE64-NEXT: ld r4, L..C0@l(r4) +; LARGE64-NEXT: iselgt r3, r5, r3 +; LARGE64-NEXT: addi r4, r4, 112 +; LARGE64-NEXT: lfdx f1, r4, r3 +; LARGE64-NEXT: blr +; +; SMALL64-LABEL: chooseDouble: +; SMALL64: # %bb.0: # %entry +; SMALL64-NEXT: ld r4, L..C0(r2) # @__ModuleConstantPool0 +; SMALL64-NEXT: cmpwi r3, 0 +; SMALL64-NEXT: li r3, 0 +; SMALL64-NEXT: li r5, 8 +; SMALL64-NEXT: iselgt r3, r5, r3 +; SMALL64-NEXT: addi r4, r4, 112 +; SMALL64-NEXT: lfdx f1, r4, r3 +; SMALL64-NEXT: blr +entry: + %cmp = icmp sgt i32 %a, 0 + %cond = select i1 %cmp, double 3.482000e+00, double 6.920000e+00 + ret double %cond +} + +define double @chooseDoubleL(i32 noundef signext %a, i32 noundef signext %b) local_unnamed_addr #0 { +; SMALL32-LABEL: chooseDoubleL: +; SMALL32: # %bb.0: # %entry +; SMALL32-NEXT: lwz r5, L..C0(r2) # @__ModuleConstantPool0 +; SMALL32-NEXT: lwz r6, L..C0(r2) # @__ModuleConstantPool0 +; SMALL32-NEXT: cmpwi r4, 0 +; SMALL32-NEXT: li r4, 0 +; SMALL32-NEXT: li r7, 8 +; SMALL32-NEXT: iselgt r4, r7, r4 +; SMALL32-NEXT: cmpwi r3, 0 +; SMALL32-NEXT: addi r5, r5, 112 +; SMALL32-NEXT: addi r3, r6, 88 +; SMALL32-NEXT: add r4, r5, r4 +; SMALL32-NEXT: iselgt r3, r3, r4 +; SMALL32-NEXT: lfd f1, 0(r3) +; SMALL32-NEXT: blr +; +; LARGE32-LABEL: chooseDoubleL: +; LARGE32: # %bb.0: # %entry +; LARGE32-NEXT: addis r5, L..C0@u(r2) +; LARGE32-NEXT: cmpwi r4, 0 +; LARGE32-NEXT: li r6, 0 +; LARGE32-NEXT: li r7, 8 +; LARGE32-NEXT: iselgt r6, r7, r6 +; LARGE32-NEXT: cmpwi r3, 0 +; LARGE32-NEXT: lwz r4, L..C0@l(r5) +; LARGE32-NEXT: addis r5, L..C0@u(r2) +; LARGE32-NEXT: lwz r5, L..C0@l(r5) +; LARGE32-NEXT: addi r4, r4, 112 +; LARGE32-NEXT: add r4, r4, r6 +; LARGE32-NEXT: addi r3, r5, 88 +; LARGE32-NEXT: iselgt r3, r3, r4 +; LARGE32-NEXT: lfd f1, 0(r3) +; LARGE32-NEXT: blr +; +; LARGE64-LABEL: chooseDoubleL: +; LARGE64: # %bb.0: # %entry +; LARGE64-NEXT: addis r5, L..C0@u(r2) +; LARGE64-NEXT: addis r6, L..C0@u(r2) +; LARGE64-NEXT: cmpwi r4, 0 +; LARGE64-NEXT: li r7, 8 +; LARGE64-NEXT: ld r5, L..C0@l(r5) +; LARGE64-NEXT: ld r4, L..C0@l(r6) +; LARGE64-NEXT: li r6, 0 +; LARGE64-NEXT: iselgt r6, r7, r6 +; LARGE64-NEXT: cmpwi r3, 0 +; LARGE64-NEXT: addi r5, r5, 96 +; LARGE64-NEXT: addi r3, r4, 88 +; LARGE64-NEXT: add r5, r5, r6 +; LARGE64-NEXT: iselgt r3, r3, r5 +; LARGE64-NEXT: lfd f1, 0(r3) +; LARGE64-NEXT: blr +; +; SMALL64-LABEL: chooseDoubleL: +; SMALL64: # %bb.0: # %entry +; SMALL64-NEXT: ld r5, L..C0(r2) # @__ModuleConstantPool0 +; SMALL64-NEXT: ld r6, L..C0(r2) # @__ModuleConstantPool0 +; SMALL64-NEXT: cmpwi r4, 0 +; SMALL64-NEXT: li r4, 0 +; SMALL64-NEXT: li r7, 8 +; SMALL64-NEXT: iselgt r4, r7, r4 +; SMALL64-NEXT: cmpwi r3, 0 +; SMALL64-NEXT: addi r5, r5, 96 +; SMALL64-NEXT: addi r3, r6, 88 +; SMALL64-NEXT: add r4, r5, r4 +; SMALL64-NEXT: iselgt r3, r3, r4 +; SMALL64-NEXT: lfd f1, 0(r3) +; SMALL64-NEXT: blr +entry: + %cmp = icmp sgt i32 %a, 0 + %cmp1 = icmp sgt i32 %b, 0 + %cond = select i1 %cmp1, double 7.850000e+00, double 6.920000e+00 + %cond2 = select i1 %cmp, double 3.482000e+00, double %cond + ret double %cond2 +} + +; DATA: .csect L..__ModuleConstantPool0[RO],2 +; DATA: .align 4 +; DATA: .vbyte 4, 1081583337 # double 378.43200000000002 +; DATA: .vbyte 4, 2027224564 +; DATA: .vbyte 4, 0x401ccccd # float 2.45000005 +; DATA: .space 4 +; DATA: .vbyte 4, 1075555860 # double 6.9199999999999999 +; DATA: .vbyte 4, 2061584302 +; DATA: .space 8 +; DATA: .vbyte 4, 13 # 0xd +; DATA: .vbyte 4, 78 # 0x4e +; DATA: .vbyte 4, 4294967283 # 0xfffffff3 +; DATA: .vbyte 4, 100 # 0x64 +; DATA: .vbyte 4, 1078389178 # double 45.826000000000001 +; DATA: .vbyte 4, 1580547965 +; DATA: .vbyte 4, 1079398154 # double 89.234999999999999 +; DATA: .vbyte 4, 1030792151 +; DATA: .vbyte 4, 0x41112f1b # float 9.07400035 +; DATA: .space 4 +; DATA: .vbyte 4, 1097955712 # double 18323462.835173361 +; DATA: .vbyte 4, 1834802878 +; DATA: .vbyte 4, 1075533892 # double 6.8361999999999998 +; DATA: .vbyte 4, 3490949418 +; DATA: .vbyte 4, 1075555860 # double 6.9199999999999999 +; DATA: .vbyte 4, 2061584302 +; DATA: .vbyte 4, 1074518818 # double 3.4820000000000002 +; DATA: .vbyte 4, 3504693314 +; DATA: .vbyte 4, 1075555860 # double 6.9199999999999999 +; DATA: .vbyte 4, 2061584302 +; DATA: .vbyte 4, 1075799654 # double 7.8499999999999996 +; DATA: .vbyte 4, 1717986918 +; DATA: L..C0: +; DATA: .tc L..__ModuleConstantPool0 + +; DATA64: .csect L..__ModuleConstantPool0[RO],2 +; DATA64: .align 4 +; DATA64: .vbyte 8, 0x4077a6e978d4fdf4 # double 378.43200000000002 +; DATA64: .vbyte 4, 0x401ccccd # float 2.45000005 +; DATA64: .space 4 +; DATA64: .vbyte 8, 0x401bae147ae147ae # double 6.9199999999999999 +; DATA64: .space 8 +; DATA64: .vbyte 4, 13 # 0xd +; DATA64: .vbyte 4, 78 # 0x4e +; DATA64: .vbyte 4, 4294967283 # 0xfffffff3 +; DATA64: .vbyte 4, 100 # 0x64 +; DATA64: .vbyte 8, 0x4046e9ba5e353f7d # double 45.826000000000001 +; DATA64: .vbyte 8, 0x40564f0a3d70a3d7 # double 89.234999999999999 +; DATA64: .vbyte 4, 0x41112f1b # float 9.07400035 +; DATA64: .space 4 +; DATA64: .vbyte 8, 0x417179806d5cdebe # double 18323462.835173361 +; DATA64: .vbyte 8, 0x401b5844d013a92a # double 6.8361999999999998 +; DATA64: .vbyte 8, 0x401bae147ae147ae # double 6.9199999999999999 +; DATA64: .vbyte 8, 0x401f666666666666 # double 7.8499999999999996 +; DATA64: .vbyte 8, 0x401bae147ae147ae # double 6.9199999999999999 +; DATA64: .vbyte 8, 0x400bdb22d0e56042 # double 3.4820000000000002 +; DATA64: L..C0: +; DATA64: .tc L..__ModuleConstantPool0