Index: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h @@ -91,8 +91,8 @@ uint64_t ZeroIsSpecialNew : 3; // Is the operation commutative? uint64_t IsCommutative : 1; - // The operand number to check for load immediate. - uint64_t ConstantOpNo : 3; + // The operand number to check for add-immediate def. + uint64_t OpNoForForwarding : 3; // The operand number for the immediate. uint64_t ImmOpNo : 3; // The opcode of the new instruction. @@ -101,6 +101,8 @@ uint64_t ImmWidth : 5; // The immediate should be truncated to N bits. uint64_t TruncateImmTo : 5; + // Is the instruction summing the operand + uint64_t IsSummingOperands : 1; }; // Information required to convert an instruction to just a materialized @@ -123,10 +125,42 @@ unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, SmallVectorImpl &NewMIs) const; - bool transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III, - unsigned ConstantOpNo, int64_t Imm) const; - MachineInstr *getConstantDefMI(MachineInstr &MI, unsigned &ConstOp, - bool &SeenIntermediateUse) const; + + // If the inst has imm-form and one of its operand is produced by a LI, + // put the imm into the inst directly and remove the LI if possible. + bool transformToImmFormFedByLI(MachineInstr &MI, const ImmInstrInfo &III, + unsigned ConstantOpNo, int64_t Imm) const; + // If the inst has imm-form and one of its operand is produced by an + // add-immediate, try to transform it when possible. + bool transformToImmFormFedByAdd(MachineInstr &MI, const ImmInstrInfo &III, + unsigned ConstantOpNo, + MachineInstr &DefMI, + bool KillDefMI) const; + // Try to find that, if the instruction 'MI' contains any operand that + // could be forwarded from some inst that feeds it. If yes, return the + // Def of that operand. And OpNoForForwarding is the operand index in + // the 'MI' for that 'Def'. If we see another use of this Def between + // the Def and the MI, SeenIntermediateUse becomes 'true'. + MachineInstr *getForwardingDefMI(MachineInstr &MI, + unsigned &OpNoForForwarding, + bool &SeenIntermediateUse) const; + + // Can the user MI have it's source at index \p OpNoForForwarding + // forwarded from an add-immediate that feeds it? + bool isUseMIElgibleForForwarding(MachineInstr &MI, const ImmInstrInfo &III, + unsigned OpNoForForwarding) const; + bool isDefMIElgibleForForwarding(MachineInstr &DefMI, + const ImmInstrInfo &III, + MachineOperand *&ImmMO, + MachineOperand *&RegMO) const; + bool isImmElgibleForForwarding(const MachineOperand &ImmMO, + const MachineInstr &DefMI, + const ImmInstrInfo &III, + int64_t &Imm) const; + bool isRegElgibleForForwarding(const MachineOperand &RegMO, + const MachineInstr &DefMI, + const MachineInstr &MI, + bool KillDefMI) const; const unsigned *getStoreOpcodesForSpillArray() const; const unsigned *getLoadOpcodesForSpillArray() const; virtual void anchor(); Index: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2088,11 +2088,9 @@ return true; } -#ifndef NDEBUG static bool isAnImmediateOperand(const MachineOperand &MO) { return MO.isCPI() || MO.isGlobal() || MO.isImm(); } -#endif bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { auto &MBB = *MI.getParent(); @@ -2256,10 +2254,11 @@ .addImm(LII.Imm); } -MachineInstr *PPCInstrInfo::getConstantDefMI(MachineInstr &MI, - unsigned &ConstOp, - bool &SeenIntermediateUse) const { - ConstOp = ~0U; +MachineInstr *PPCInstrInfo::getForwardingDefMI( + MachineInstr &MI, + unsigned &OpNoForForwarding, + bool &SeenIntermediateUse) const { + OpNoForForwarding = ~0U; MachineInstr *DefMI = nullptr; MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo(); const TargetRegisterInfo *TRI = &getRegisterInfo(); @@ -2276,7 +2275,7 @@ if (TargetRegisterInfo::isVirtualRegister(TrueReg)) { DefMI = MRI->getVRegDef(TrueReg); if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8) { - ConstOp = i; + OpNoForForwarding = i; break; } } @@ -2319,15 +2318,22 @@ if (PPC::G8RCRegClass.contains(Reg)) Reg = Reg - PPC::X0 + PPC::R0; - // Is this register defined by a load-immediate in this block? + // Is this register defined by some form of add-immediate (including + // load-immediate) within this basic block? for ( ; It != E; ++It) { if (It->modifiesRegister(Reg, &getRegisterInfo())) { - if (It->getOpcode() == PPC::LI || It->getOpcode() == PPC::LI8) { - ConstOp = i; + switch (It->getOpcode()) { + default: break; + case PPC::LI: + case PPC::LI8: + case PPC::ADDItocL: + case PPC::ADDI: + case PPC::ADDI8: + OpNoForForwarding = i; return &*It; - } else - break; - } else if (It->readsRegister(Reg, &getRegisterInfo())) + } + break; + } else if (It->readsRegister(Reg, &getRegisterInfo())) // If we see another use of this reg between the def and the MI, // we want to flat it so the def isn't deleted. SeenIntermediateUse = true; @@ -2335,7 +2341,7 @@ } } } - return ConstOp == ~0U ? nullptr : DefMI; + return OpNoForForwarding == ~0U ? nullptr : DefMI; } const unsigned *PPCInstrInfo::getStoreOpcodesForSpillArray() const { @@ -2371,35 +2377,48 @@ } // If this instruction has an immediate form and one of its operands is a -// result of a load-immediate, convert it to the immediate form if the constant -// is in range. +// result of a load-immediate or an add-immediate, convert it to +// the immediate form if the constant is in range. bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI, MachineInstr **KilledDef) const { MachineFunction *MF = MI.getParent()->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); bool PostRA = !MRI->isSSA(); bool SeenIntermediateUse = true; - unsigned ConstantOperand = ~0U; - MachineInstr *DefMI = getConstantDefMI(MI, ConstantOperand, - SeenIntermediateUse); - if (!DefMI || !DefMI->getOperand(1).isImm()) + unsigned ForwardingOperand = ~0U; + MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand, + SeenIntermediateUse); + if (!DefMI) + return false; + assert(ForwardingOperand < MI.getNumOperands() && + "The forwarding operand needs to be valid at this point"); + bool KillFwdDefMI = !SeenIntermediateUse && + MI.getOperand(ForwardingOperand).isKill(); + if (KilledDef && KillFwdDefMI) + *KilledDef = DefMI; + + ImmInstrInfo III; + bool HasImmForm = instrHasImmForm(MI, III); + // If this is a reg+reg instruction that has a reg+imm form, + // and one of the operands is produced by an add-immediate, + // try to convert it. + if (HasImmForm && transformToImmFormFedByAdd(MI, III, ForwardingOperand, + *DefMI, KillFwdDefMI)) + return true; + + if ((DefMI->getOpcode() != PPC::LI && DefMI->getOpcode() != PPC::LI8) || + !DefMI->getOperand(1).isImm()) return false; - assert(ConstantOperand < MI.getNumOperands() && - "The constant operand needs to be valid at this point"); int64_t Immediate = DefMI->getOperand(1).getImm(); // Sign-extend to 64-bits. int64_t SExtImm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ? (Immediate | 0xFFFFFFFFFFFF0000) : Immediate; - if (KilledDef && MI.getOperand(ConstantOperand).isKill() && - !SeenIntermediateUse) - *KilledDef = DefMI; - - // If this is a reg+reg instruction that has a reg+imm form, convert it now. - ImmInstrInfo III; - if (instrHasImmForm(MI, III)) - return transformToImmForm(MI, III, ConstantOperand, SExtImm); + // If this is a reg+reg instruction that has a reg+imm form, + // and one of the operands is produced by LI, convert it now. + if (HasImmForm) + return transformToImmFormFedByLI(MI, III, ForwardingOperand, SExtImm); bool ReplaceWithLI = false; bool Is64BitLI = false; @@ -2610,10 +2629,11 @@ // are the update form loads/stores for which a constant operand 2 would need // to turn into a displacement and move operand 1 to the operand 2 position. III.ImmOpNo = 2; - III.ConstantOpNo = 2; + III.OpNoForForwarding = 2; III.ImmWidth = 16; III.ImmMustBeMultipleOf = 1; III.TruncateImmTo = 0; + III.IsSummingOperands = false; switch (Opc) { default: return false; case PPC::ADD4: @@ -2622,6 +2642,7 @@ III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 1; III.IsCommutative = true; + III.IsSummingOperands = true; III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8; break; case PPC::ADDC: @@ -2630,6 +2651,7 @@ III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 0; III.IsCommutative = true; + III.IsSummingOperands = true; III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8; break; case PPC::ADDCo: @@ -2637,6 +2659,7 @@ III.ZeroIsSpecialOrig = 0; III.ZeroIsSpecialNew = 0; III.IsCommutative = true; + III.IsSummingOperands = true; III.ImmOpcode = PPC::ADDICo; break; case PPC::SUBFC: @@ -2809,8 +2832,9 @@ III.ZeroIsSpecialOrig = 1; III.ZeroIsSpecialNew = 2; III.IsCommutative = true; + III.IsSummingOperands = true; III.ImmOpNo = 1; - III.ConstantOpNo = 2; + III.OpNoForForwarding = 2; switch(Opc) { default: llvm_unreachable("Unknown opcode"); case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break; @@ -2866,8 +2890,9 @@ III.ZeroIsSpecialOrig = 2; III.ZeroIsSpecialNew = 3; III.IsCommutative = false; + III.IsSummingOperands = true; III.ImmOpNo = 2; - III.ConstantOpNo = 3; + III.OpNoForForwarding = 3; switch(Opc) { default: llvm_unreachable("Unknown opcode"); case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break; @@ -2911,8 +2936,9 @@ III.ZeroIsSpecialOrig = 1; III.ZeroIsSpecialNew = 2; III.IsCommutative = true; + III.IsSummingOperands = true; III.ImmOpNo = 1; - III.ConstantOpNo = 2; + III.OpNoForForwarding = 2; switch(Opc) { default: llvm_unreachable("Unknown opcode"); case PPC::LXVX: @@ -2984,13 +3010,256 @@ } } -bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III, - unsigned ConstantOpNo, - int64_t Imm) const { +// Check if the 'MI' that has the index OpNoForForwarding +// meets the requirement described in the ImmInstrInfo. +bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI, + const ImmInstrInfo &III, + unsigned OpNoForForwarding + ) const { + // As the algorithm of checking for PPC::ZERO/PPC::ZERO8 + // would not work pre-RA, we can only do the check post RA. + MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + if (MRI.isSSA()) + return false; + + // Cannot do the transform if MI isn't summing the operands. + if (!III.IsSummingOperands) + return false; + + // The instruction we are trying to replace must have the ZeroIsSpecialOrig set. + if (!III.ZeroIsSpecialOrig) + return false; + + // We cannot do the transform if the operand we are trying to replace + // isn't the same as the operand the instruction allows. + if (OpNoForForwarding != III.OpNoForForwarding) + return false; + + // Check if the instruction we are trying to transform really has + // the special zero register as its operand. + if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO && + MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8) + return false; + + // This machine instruction is convertible if it is, + // 1. summing the operands. + // 2. one of the operands is special zero register. + // 3. the operand we are trying to replace is allowed by the MI. + return true; +} + +// Check if the DefMI is the add inst and set the ImmMO and RegMO +// accordingly. +bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI, + const ImmInstrInfo &III, + MachineOperand *&ImmMO, + MachineOperand *&RegMO) const { + unsigned Opc = DefMI.getOpcode(); + if (Opc != PPC::ADDItocL && Opc != PPC::ADDI && Opc != PPC::ADDI8) + return false; + + assert(DefMI.getNumOperands() >= 3 && + "Add inst must have at least three operands"); + RegMO = &DefMI.getOperand(1); + ImmMO = &DefMI.getOperand(2); + + // This DefMI is elgible for forwarding if it is: + // 1. add inst + // 2. one of the operands is Imm/CPI/Global. + return isAnImmediateOperand(*ImmMO); +} + +bool PPCInstrInfo::isRegElgibleForForwarding(const MachineOperand &RegMO, + const MachineInstr &DefMI, + const MachineInstr &MI, + bool KillDefMI + ) const { + // x = addi y, imm + // ... + // z = lfdx 0, x -> z = lfd imm(y) + // The Reg "y" can be forwarded to the MI(z) only when there is no DEF + // of "y" between the DEF of "x" and "z". + // The query is only valid post RA. + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + if (MRI.isSSA()) + return false; + + // MachineInstr::readsRegister only returns true if the machine + // instruction reads the exact register or its super-register. It + // does not consider uses of sub-registers which seems like strange + // behaviour. Nonetheless, if we end up with a 64-bit register here, + // get the corresponding 32-bit register to check. + unsigned Reg = RegMO.getReg(); + if (PPC::G8RCRegClass.contains(Reg)) + Reg = Reg - PPC::X0 + PPC::R0; + + // Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg. + MachineBasicBlock::const_reverse_iterator It = MI; + MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend(); + It++; + for (; It != E; ++It) { + if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI) + return false; + // Made it to DefMI without encountering a clobber. + if ((&*It) == &DefMI) + break; + } + assert((&*It) == &DefMI && "DefMI is missing"); + + // If DefMI also uses the register to be forwarded, we can only forward it + // if DefMI is being erased. + if (DefMI.readsRegister(Reg, &getRegisterInfo())) + return KillDefMI; + + return true; +} + +bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO, + const MachineInstr &DefMI, + const ImmInstrInfo &III, + int64_t &Imm) const { + assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate"); + if (DefMI.getOpcode() == PPC::ADDItocL) { + // The operand for ADDItocL is CPI, which isn't imm at compiling time, + // However, we know that, it is 16-bit width, and has the alignment of 4. + // Check if the instruction met the requirement. + if (III.ImmMustBeMultipleOf > 4 || + III.TruncateImmTo || III.ImmWidth != 16) + return false; + + return true; + } + + if (ImmMO.isImm()) { + // It is Imm, we need to check if the Imm fit the range. + int64_t Immediate = ImmMO.getImm(); + // Sign-extend to 64-bits. + Imm = ((uint64_t)Immediate & ~0x7FFFuLL) != 0 ? + (Immediate | 0xFFFFFFFFFFFF0000) : Immediate; + + if (Imm % III.ImmMustBeMultipleOf) + return false; + if (III.TruncateImmTo) + Imm &= ((1 << III.TruncateImmTo) - 1); + if (III.SignedImm) { + APInt ActualValue(64, Imm, true); + if (!ActualValue.isSignedIntN(III.ImmWidth)) + return false; + } else { + uint64_t UnsignedMax = (1 << III.ImmWidth) - 1; + if ((uint64_t)Imm > UnsignedMax) + return false; + } + } + else + return false; + + // This ImmMO is forwarded if it meets the requriement describle + // in ImmInstrInfo + return true; +} + +// If an X-Form instruction is fed by an add-immediate and one of its operands +// is the literal zero, attempt to forward the source of the add-immediate to +// the corresponding D-Form instruction with the displacement coming from +// the immediate being added. +bool PPCInstrInfo::transformToImmFormFedByAdd(MachineInstr &MI, + const ImmInstrInfo &III, + unsigned OpNoForForwarding, + MachineInstr &DefMI, + bool KillDefMI) const { + // RegMO ImmMO + // | | + // x = addi reg, imm <----- DefMI + // y = op 0 , x <----- MI + // | + // OpNoForForwarding + // Check if the MI meet the requirement described in the III. + if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding)) + return false; + + // Check if the DefMI meet the requirement + // described in the III. If yes, set the ImmMO and RegMO accordingly. + MachineOperand *ImmMO = nullptr; + MachineOperand *RegMO = nullptr; + if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO)) + return false; + assert(ImmMO && RegMO && "Imm and Reg operand must have been set"); + + // As we get the Imm operand now, we need to check if the ImmMO meet + // the requirement described in the III. If yes set the Imm. + int64_t Imm = 0; + if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm)) + return false; + + // Check if the RegMO can be forwarded to MI. + if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI)) + return false; + + // We know that, the MI and DefMI both meet the pattern, and + // the Imm also meet the requirement with the new Imm-form. + // It is safe to do the transformation now. + LLVM_DEBUG(dbgs() << "Replacing instruction:\n"); + LLVM_DEBUG(MI.dump()); + LLVM_DEBUG(dbgs() << "Fed by:\n"); + LLVM_DEBUG(DefMI.dump()); + + // Update the base reg first. + MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(), + false, false, + RegMO->isKill()); + + // Then, update the imm. + if (ImmMO->isImm()) { + // If the ImmMO is Imm, change the operand that has ZERO to that Imm + // directly. + MI.getOperand(III.ZeroIsSpecialOrig).ChangeToImmediate(Imm); + } + else { + // Otherwise, it is Constant Pool Index(CPI) or Global, + // which is relocation in fact. We need to replace the special zero + // register with ImmMO. + // Before that, we need to fixup the target flags for imm. + // For some reason, we miss to set the flag for the ImmMO if it is CPI. + if (DefMI.getOpcode() == PPC::ADDItocL) + ImmMO->setTargetFlags(PPCII::MO_TOC_LO); + + // MI didn't have the interface such as MI.setOperand(i) though + // it has MI.getOperand(i). To repalce the ZERO MachineOperand with + // ImmMO, we need to remove ZERO operand and all the operands behind it, + // and, add the ImmMO, then, move back all the operands behind ZERO. + SmallVector MOps; + for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) { + MOps.push_back(MI.getOperand(i)); + MI.RemoveOperand(i); + } + + // Remove the last MO in the list, which is ZERO operand in fact. + MOps.pop_back(); + // Add the imm operand. + MI.addOperand(*ImmMO); + // Now add the rest back. + for (auto &MO : MOps) + MI.addOperand(MO); + } + + // Update the opcode. + MI.setDesc(get(III.ImmOpcode)); + + LLVM_DEBUG(dbgs() << "With:\n"); + LLVM_DEBUG(MI.dump()); + + return true; +} + +bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, + const ImmInstrInfo &III, + unsigned ConstantOpNo, + int64_t Imm) const { MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); bool PostRA = !MRI.isSSA(); // Exit early if we can't convert this. - if ((ConstantOpNo != III.ConstantOpNo) && !III.IsCommutative) + if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative) return false; if (Imm % III.ImmMustBeMultipleOf) return false; @@ -3035,7 +3304,7 @@ Opc == PPC::SRW || Opc == PPC::SRWo || Opc == PPC::SRD || Opc == PPC::SRDo; MI.setDesc(get(III.ImmOpcode)); - if (ConstantOpNo == III.ConstantOpNo) { + if (ConstantOpNo == III.OpNoForForwarding) { // Converting shifts to immediate form is a bit tricky since they may do // one of three things: // 1. If the shift amount is between OpSize and 2*OpSize, the result is zero @@ -3063,14 +3332,14 @@ uint64_t SH = RightShift ? 32 - ShAmt : ShAmt; uint64_t MB = RightShift ? ShAmt : 0; uint64_t ME = RightShift ? 31 : 31 - ShAmt; - MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH); + MI.getOperand(III.OpNoForForwarding).ChangeToImmediate(SH); MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB) .addImm(ME); } else { // Left shifts use (N, 63-N), right shifts use (64-N, N). uint64_t SH = RightShift ? 64 - ShAmt : ShAmt; uint64_t ME = RightShift ? ShAmt : 63 - ShAmt; - MI.getOperand(III.ConstantOpNo).ChangeToImmediate(SH); + MI.getOperand(III.OpNoForForwarding).ChangeToImmediate(SH); MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME); } } @@ -3081,14 +3350,14 @@ // desired one to an immediate. else if (III.IsCommutative) { MI.getOperand(ConstantOpNo).ChangeToImmediate(Imm); - swapMIOperands(MI, ConstantOpNo, III.ConstantOpNo); + swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding); } else llvm_unreachable("Should have exited early!"); // For instructions for which the constant register replaces a different // operand than where the immediate goes, we need to swap them. - if (III.ConstantOpNo != III.ImmOpNo) - swapMIOperands(MI, III.ConstantOpNo, III.ImmOpNo); + if (III.OpNoForForwarding != III.ImmOpNo) + swapMIOperands(MI, III.OpNoForForwarding, III.ImmOpNo); // If the R0/X0 register is special for the original instruction and not for // the new instruction (or vice versa), we need to fix up the register class. Index: llvm/trunk/test/CodeGen/PowerPC/bitcasts-direct-move.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/bitcasts-direct-move.ll +++ llvm/trunk/test/CodeGen/PowerPC/bitcasts-direct-move.ll @@ -18,7 +18,7 @@ entry: %0 = bitcast double %a to i64 ret i64 %0 -; CHECK-P7: stfdx 1, +; CHECK-P7: stfd 1, ; CHECK-P7: ld 3, ; CHECK: mffprd 3, 1 } @@ -39,7 +39,7 @@ %0 = bitcast i64 %a to double ret double %0 ; CHECK-P7: std 3, -; CHECK-P7: lfdx 1, +; CHECK-P7: lfd 1, ; CHECK: mtvsrd 1, 3 } @@ -58,7 +58,7 @@ entry: %0 = bitcast double %a to i64 ret i64 %0 -; CHECK-P7: stfdx 1, +; CHECK-P7: stfd 1, ; CHECK-P7: ld 3, ; CHECK: mffprd 3, 1 } @@ -79,6 +79,6 @@ %0 = bitcast i64 %a to double ret double %0 ; CHECK-P7: std 3, -; CHECK-P7: lfdx 1, +; CHECK-P7: lfd 1, ; CHECK: mtvsrd 1, 3 } Index: llvm/trunk/test/CodeGen/PowerPC/branch_coalesce.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/branch_coalesce.ll +++ llvm/trunk/test/CodeGen/PowerPC/branch_coalesce.ll @@ -13,10 +13,8 @@ ; CHECK-DAG: addis [[LD2REG:[0-9]+]], 2, .LCPI0_1@toc@ha ; CHECK-DAG: xxlxor 2, 2, 2 ; CHECK-NOT: beq -; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]] -; CHECK-DAG: addi [[LD2BASE:[0-9]+]], [[LD2REG]] -; CHECK-DAG: lfdx 1, 0, [[LD1BASE]] -; CHECK-DAG: lfdx 3, 0, [[LD2BASE]] +; CHECK-DAG: lfd 1, .LCPI0_0@toc@l([[LD1REG]]) +; CHECK-DAG: lfd 3, .LCPI0_1@toc@l([[LD2REG]]) ; CHECK: .LBB[[LAB1]] ; CHECK: xsadddp 0, 1, 2 ; CHECK: xsadddp 1, 0, 3 @@ -32,16 +30,14 @@ ; CHECK-NOCOALESCE-NEXT: beq 0, .LBB0_4 ; CHECK-NOCOALESCE-NEXT: .LBB0_3: # %entry ; CHECK-NOCOALESCE-NEXT: addis 3, 2, .LCPI0_1@toc@ha -; CHECK-NOCOALESCE-NEXT: addi 3, 3, .LCPI0_1@toc@l -; CHECK-NOCOALESCE-NEXT: lfdx 3, 0, 3 +; CHECK-NOCOALESCE-NEXT: lfd 3, .LCPI0_1@toc@l(3) ; CHECK-NOCOALESCE-NEXT: .LBB0_4: # %entry ; CHECK-NOCOALESCE-NEXT: xsadddp 0, 1, 2 ; CHECK-NOCOALESCE-NEXT: xsadddp 1, 0, 3 ; CHECK-NOCOALESCE-NEXT: blr ; CHECK-NOCOALESCE-NEXT: .LBB0_5: # %entry ; CHECK-NOCOALESCE-NEXT: addis 3, 2, .LCPI0_0@toc@ha -; CHECK-NOCOALESCE-NEXT: addi 3, 3, .LCPI0_0@toc@l -; CHECK-NOCOALESCE-NEXT: lfdx 1, 0, 3 +; CHECK-NOCOALESCE-NEXT: lfd 1, .LCPI0_0@toc@l(3) ; CHECK-NOCOALESCE-NEXT: beq 0, .LBB0_2 ; CHECK-NOCOALESCE-NEXT: .LBB0_6: # %entry ; CHECK-NOCOALESCE-NEXT: xxlxor 2, 2, 2 Index: llvm/trunk/test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll +++ llvm/trunk/test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -fast-isel -mattr=+vsx -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64VSX +; RUN: llc < %s -O0 -fast-isel -mattr=+vsx -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -ppc-late-peephole=false | FileCheck %s --check-prefix=ELF64VSX ;; The semantics of VSX stores for when R0 is used is different depending on ;; whether it is used as base or offset. If used as base, the effective Index: llvm/trunk/test/CodeGen/PowerPC/float-to-int.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/float-to-int.ll +++ llvm/trunk/test/CodeGen/PowerPC/float-to-int.ll @@ -21,7 +21,7 @@ ; CHECK-VSX: @foo ; CHECK-VSX: xscvdpsxds [[REG:[0-9]+]], 1 -; CHECK-VSX: stfdx [[REG]], +; CHECK-VSX: stfd [[REG]], ; CHECK-VSX: ld 3, ; CHECK-VSX: blr @@ -44,7 +44,7 @@ ; CHECK-VSX: @foo2 ; CHECK-VSX: xscvdpsxds [[REG:[0-9]+]], 1 -; CHECK-VSX: stfdx [[REG]], +; CHECK-VSX: stfd [[REG]], ; CHECK-VSX: ld 3, ; CHECK-VSX: blr @@ -67,7 +67,7 @@ ; CHECK-VSX: @foo3 ; CHECK-VSX: xscvdpuxds [[REG:[0-9]+]], 1 -; CHECK-VSX: stfdx [[REG]], +; CHECK-VSX: stfd [[REG]], ; CHECK-VSX: ld 3, ; CHECK-VSX: blr @@ -90,7 +90,7 @@ ; CHECK-VSX: @foo4 ; CHECK-VSX: xscvdpuxds [[REG:[0-9]+]], 1 -; CHECK-VSX: stfdx [[REG]], +; CHECK-VSX: stfd [[REG]], ; CHECK-VSX: ld 3, ; CHECK-VSX: blr Index: llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll +++ llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll @@ -165,16 +165,14 @@ ; FMF-LABEL: fmul_fma_reassoc1: ; FMF: # %bb.0: ; FMF-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; FMF-NEXT: addi 3, 3, .LCPI6_0@toc@l -; FMF-NEXT: lfsx 0, 0, 3 +; FMF-NEXT: lfs 0, .LCPI6_0@toc@l(3) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_reassoc1: ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; GLOBAL-NEXT: addi 3, 3, .LCPI6_0@toc@l -; GLOBAL-NEXT: lfsx 0, 0, 3 +; GLOBAL-NEXT: lfs 0, .LCPI6_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul float %x, 42.0 @@ -196,16 +194,14 @@ ; FMF-LABEL: fmul_fma_reassoc2: ; FMF: # %bb.0: ; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; FMF-NEXT: addi 3, 3, .LCPI7_0@toc@l -; FMF-NEXT: lfsx 0, 0, 3 +; FMF-NEXT: lfs 0, .LCPI7_0@toc@l(3) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_reassoc2: ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; GLOBAL-NEXT: addi 3, 3, .LCPI7_0@toc@l -; GLOBAL-NEXT: lfsx 0, 0, 3 +; GLOBAL-NEXT: lfs 0, .LCPI7_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul reassoc float %x, 42.0 @@ -227,16 +223,14 @@ ; FMF-LABEL: fmul_fma_fast1: ; FMF: # %bb.0: ; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha -; FMF-NEXT: addi 3, 3, .LCPI8_0@toc@l -; FMF-NEXT: lfsx 0, 0, 3 +; FMF-NEXT: lfs 0, .LCPI8_0@toc@l(3) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_fast1: ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha -; GLOBAL-NEXT: addi 3, 3, .LCPI8_0@toc@l -; GLOBAL-NEXT: lfsx 0, 0, 3 +; GLOBAL-NEXT: lfs 0, .LCPI8_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul float %x, 42.0 @@ -258,16 +252,14 @@ ; FMF-LABEL: fmul_fma_fast2: ; FMF: # %bb.0: ; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha -; FMF-NEXT: addi 3, 3, .LCPI9_0@toc@l -; FMF-NEXT: lfsx 0, 0, 3 +; FMF-NEXT: lfs 0, .LCPI9_0@toc@l(3) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_fast2: ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha -; GLOBAL-NEXT: addi 3, 3, .LCPI9_0@toc@l -; GLOBAL-NEXT: lfsx 0, 0, 3 +; GLOBAL-NEXT: lfs 0, .LCPI9_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul fast float %x, 42.0 @@ -294,8 +286,7 @@ ; FMF-NEXT: # %bb.1: ; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha ; FMF-NEXT: xsrsqrtesp 3, 1 -; FMF-NEXT: addi 3, 3, .LCPI10_0@toc@l -; FMF-NEXT: lfsx 0, 0, 3 +; FMF-NEXT: lfs 0, .LCPI10_0@toc@l(3) ; FMF-NEXT: xsmulsp 2, 1, 0 ; FMF-NEXT: xsmulsp 4, 3, 3 ; FMF-NEXT: xssubsp 2, 2, 1 @@ -317,8 +308,7 @@ ; GLOBAL-NEXT: fneg 0, 1 ; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha ; GLOBAL-NEXT: fmr 4, 1 -; GLOBAL-NEXT: addi 3, 3, .LCPI10_0@toc@l -; GLOBAL-NEXT: lfsx 3, 0, 3 +; GLOBAL-NEXT: lfs 3, .LCPI10_0@toc@l(3) ; GLOBAL-NEXT: xsmaddasp 4, 0, 3 ; GLOBAL-NEXT: xsmulsp 0, 2, 2 ; GLOBAL-NEXT: xsmaddasp 3, 4, 0 @@ -352,8 +342,7 @@ ; FMF-NEXT: fneg 0, 1 ; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha ; FMF-NEXT: fmr 4, 1 -; FMF-NEXT: addi 3, 3, .LCPI11_0@toc@l -; FMF-NEXT: lfsx 3, 0, 3 +; FMF-NEXT: lfs 3, .LCPI11_0@toc@l(3) ; FMF-NEXT: xsmaddasp 4, 0, 3 ; FMF-NEXT: xsmulsp 0, 2, 2 ; FMF-NEXT: xsmaddasp 3, 4, 0 @@ -373,8 +362,7 @@ ; GLOBAL-NEXT: fneg 0, 1 ; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha ; GLOBAL-NEXT: fmr 4, 1 -; GLOBAL-NEXT: addi 3, 3, .LCPI11_0@toc@l -; GLOBAL-NEXT: lfsx 3, 0, 3 +; GLOBAL-NEXT: lfs 3, .LCPI11_0@toc@l(3) ; GLOBAL-NEXT: xsmaddasp 4, 0, 3 ; GLOBAL-NEXT: xsmulsp 0, 2, 2 ; GLOBAL-NEXT: xsmaddasp 3, 4, 0 Index: llvm/trunk/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll +++ llvm/trunk/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll @@ -7,10 +7,8 @@ define i128 @test_abs(ppc_fp128 %x) nounwind { entry: ; PPC64-LABEL: test_abs: -; PPC64-DAG: stfdx 2, 0, [[ADDR_HI:[0-9]+]] -; PPC64-DAG: stfdx 1, 0, [[ADDR_LO:[0-9]+]] -; PPC64-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]] -; PPC64-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]] +; PPC64-DAG: stfd 2, [[OFFSET_HI:-?[0-9]+]]([[SP:[0-9]+]]) +; PPC64-DAG: stfd 1, [[OFFSET_LO:-?[0-9]+]]([[SP]]) ; PPC64-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]]) ; PPC64-DAG: ld [[LO:[0-9]+]], [[OFFSET_HI]]([[SP]]) ; PPC64-DAG: rldicr [[FLIP_BIT:[0-9]+]], [[HI]], 0, 0 @@ -44,10 +42,8 @@ define i128 @test_neg(ppc_fp128 %x) nounwind { entry: ; PPC64-LABEL: test_neg: -; PPC64-DAG: stfdx 2, 0, [[ADDR_HI:[0-9]+]] -; PPC64-DAG: stfdx 1, 0, [[ADDR_LO:[0-9]+]] -; PPC64-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]] -; PPC64-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]] +; PPC64-DAG: stfd 2, [[OFFSET_HI:-?[0-9]+]]([[SP:[0-9]+]]) +; PPC64-DAG: stfd 1, [[OFFSET_LO:-?[0-9]+]]([[SP]]) ; PPC64-DAG: li [[FLIP_BIT:[0-9]+]], 1 ; PPC64-DAG: sldi [[FLIP_BIT]], [[FLIP_BIT]], 63 ; PPC64-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]]) @@ -85,8 +81,7 @@ define i128 @test_copysign(ppc_fp128 %x) nounwind { entry: ; PPC64-LABEL: test_copysign: -; PPC64-DAG: stfdx 1, 0, [[ADDR_REG:[0-9]+]] -; PPC64-DAG: addi [[ADDR_REG]], 1, [[OFFSET:-?[0-9]+]] +; PPC64-DAG: stfd 1, [[OFFSET:-?[0-9]+]](1) ; PPC64-DAG: li [[HI_TMP:[0-9]+]], 16399 ; PPC64-DAG: li [[LO_TMP:[0-9]+]], 3019 ; PPC64-NOT: BARRIER Index: llvm/trunk/test/CodeGen/PowerPC/i64-to-float.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/i64-to-float.ll +++ llvm/trunk/test/CodeGen/PowerPC/i64-to-float.ll @@ -20,7 +20,7 @@ ; CHECK-VSX: @foo ; CHECK-VSX: std 3, -; CHECK-VSX: lfdx [[REG:[0-9]+]], +; CHECK-VSX: lfd [[REG:[0-9]+]], ; CHECK-VSX: fcfids 1, [[REG]] ; CHECK-VSX: blr @@ -44,7 +44,7 @@ ; CHECK-VSX: @goo ; CHECK-VSX: std 3, -; CHECK-VSX: lfdx [[REG:[0-9]+]], +; CHECK-VSX: lfd [[REG:[0-9]+]], ; CHECK-VSX: xscvsxddp 1, [[REG]] ; CHECK-VSX: blr @@ -68,7 +68,7 @@ ; CHECK-VSX: @foou ; CHECK-VSX: std 3, -; CHECK-VSX: lfdx [[REG:[0-9]+]], +; CHECK-VSX: lfd [[REG:[0-9]+]], ; CHECK-VSX: fcfidus 1, [[REG]] ; CHECK-VSX: blr @@ -92,7 +92,7 @@ ; CHECK-VSX: @goou ; CHECK-VSX: std 3, -; CHECK-VSX: lfdx [[REG:[0-9]+]], +; CHECK-VSX: lfd [[REG:[0-9]+]], ; CHECK-VSX: xscvuxddp 1, [[REG]] ; CHECK-VSX: blr Index: llvm/trunk/test/CodeGen/PowerPC/lxv-aligned-stack-slots.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/lxv-aligned-stack-slots.ll +++ llvm/trunk/test/CodeGen/PowerPC/lxv-aligned-stack-slots.ll @@ -1,4 +1,4 @@ -; RUN: llc -O3 -o - %s | FileCheck %s +; RUN: llc -O3 -ppc-late-peephole=false -o - %s | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux-gnu" Index: llvm/trunk/test/CodeGen/PowerPC/mcm-12.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/mcm-12.ll +++ llvm/trunk/test/CodeGen/PowerPC/mcm-12.ll @@ -26,8 +26,7 @@ ; CHECK-VSX: .quad 4562098671269285104 ; CHECK-VSX-LABEL: test_double_const: ; CHECK-VSX: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha -; CHECK-VSX: addi [[REG1]], {{[0-9]+}}, [[VAR]]@toc@l -; CHECK-VSX: lfdx {{[0-9]+}}, 0, [[REG1]] +; CHECK-VSX: lfd {{[0-9]+}}, [[VAR]]@toc@l({{[0-9]+}}) ; CHECK-P9: [[VAR:[a-z0-9A-Z_.]+]]: ; CHECK-P9: .quad 4562098671269285104 Index: llvm/trunk/test/CodeGen/PowerPC/mcm-4.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/mcm-4.ll +++ llvm/trunk/test/CodeGen/PowerPC/mcm-4.ll @@ -33,8 +33,7 @@ ; MEDIUM-VSX: .quad 4562098671269285104 ; MEDIUM-VSX-LABEL: test_double_const: ; MEDIUM-VSX: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha -; MEDIUM-VSX: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l -; MEDIUM-VSX: lfdx {{[0-9]+}}, 0, [[REG2]] +; MEDIUM-VSX: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) ; LARGE: [[VAR:[a-z0-9A-Z_.]+]]: ; LARGE: .quad 4562098671269285104 Index: llvm/trunk/test/CodeGen/PowerPC/ppc64-align-long-double.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ppc64-align-long-double.ll +++ llvm/trunk/test/CodeGen/PowerPC/ppc64-align-long-double.ll @@ -44,10 +44,8 @@ ; CHECK-VSX-DAG: std 3, 48(1) ; CHECK-VSX-DAG: std 5, -16(1) ; CHECK-VSX-DAG: std 6, -8(1) -; CHECK-VSX-DAG: addi [[REG1:[0-9]+]], 1, -16 -; CHECK-VSX-DAG: addi 3, 1, -8 -; CHECK-VSX: lfdx 1, 0, [[REG1]] -; CHECK-VSX: lfdx 2, 0, 3 +; CHECK-VSX: lfd 1, -16(1) +; CHECK-VSX: lfd 2, -8(1) ; FIXME-VSX: addi 4, 1, 48 ; FIXME-VSX: lxsdx 1, 4, 3 Index: llvm/trunk/test/CodeGen/PowerPC/ppc64le-smallarg.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/ppc64le-smallarg.ll +++ llvm/trunk/test/CodeGen/PowerPC/ppc64le-smallarg.ll @@ -42,8 +42,7 @@ ret float %x } ; CHECK: @callee2 -; CHECK: addi [[TOCREG:[0-9]+]], 1, 136 -; CHECK: lfsx {{[0-9]+}}, {{[0-9]+}}, [[TOCREG]] +; CHECK: lfs {{[0-9]+}}, 136(1) ; CHECK: blr define void @caller2() { @@ -53,8 +52,7 @@ ret void } ; CHECK: @caller2 -; CHECK: addi [[TOCOFF:[0-9]+]], {{[0-9]+}}, 136 -; CHECK: stfsx {{[0-9]+}}, 0, [[TOCOFF]] +; CHECK: stfs {{[0-9]+}}, 136({{[0-9]+}}) ; CHECK: bl test2 declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float) Index: llvm/trunk/test/CodeGen/PowerPC/pr25157-peephole.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/pr25157-peephole.ll +++ llvm/trunk/test/CodeGen/PowerPC/pr25157-peephole.ll @@ -57,7 +57,7 @@ } ; CHECK-LABEL: @aercalc_ -; CHECK: lfsx +; CHECK: lfs ; CHECK: xxspltd ; CHECK: stxvd2x ; CHECK-NOT: xxswapd Index: llvm/trunk/test/CodeGen/PowerPC/pr25157.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/pr25157.ll +++ llvm/trunk/test/CodeGen/PowerPC/pr25157.ll @@ -57,6 +57,6 @@ } ; CHECK-LABEL: @aercalc_ -; CHECK: lfsx +; CHECK: lfs ; CHECK-P9-LABEL: @aercalc_ ; CHECK-P9: lfs Index: llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_1.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_1.ll +++ llvm/trunk/test/CodeGen/PowerPC/scalar_vector_test_1.ll @@ -163,16 +163,14 @@ ; P8LE-LABEL: s2v_test_f2: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: addi r3, r3, 8 -; P8LE-NEXT: lfdx f0, 0, r3 +; P8LE-NEXT: lfd f0, 8(r3) ; P8LE-NEXT: xxspltd vs0, vs0, 0 ; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f2: ; P8BE: # %bb.0: # %entry -; P8BE-NEXT: addi r3, r3, 8 -; P8BE-NEXT: lfdx f0, 0, r3 +; P8BE-NEXT: lfd f0, 8(r3) ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 ; P8BE-NEXT: blr entry: @@ -238,16 +236,14 @@ ; P8LE-LABEL: s2v_test_f4: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: addi r3, r3, 8 -; P8LE-NEXT: lfdx f0, 0, r3 +; P8LE-NEXT: lfd f0, 8(r3) ; P8LE-NEXT: xxspltd vs0, vs0, 0 ; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f4: ; P8BE: # %bb.0: # %entry -; P8BE-NEXT: addi r3, r3, 8 -; P8BE-NEXT: lfdx f0, 0, r3 +; P8BE-NEXT: lfd f0, 8(r3) ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 ; P8BE-NEXT: blr entry: Index: llvm/trunk/test/CodeGen/PowerPC/select_const.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/select_const.ll +++ llvm/trunk/test/CodeGen/PowerPC/select_const.ll @@ -992,13 +992,11 @@ ; ALL-NEXT: bc 12, 1, .LBB48_2 ; ALL-NEXT: # %bb.1: ; ALL-NEXT: addis 3, 2, .LCPI48_0@toc@ha -; ALL-NEXT: addi 3, 3, .LCPI48_0@toc@l -; ALL-NEXT: lfdx 1, 0, 3 +; ALL-NEXT: lfd 1, .LCPI48_0@toc@l(3) ; ALL-NEXT: blr ; ALL-NEXT: .LBB48_2: ; ALL-NEXT: addis 3, 2, .LCPI48_1@toc@ha -; ALL-NEXT: addi 3, 3, .LCPI48_1@toc@l -; ALL-NEXT: lfsx 1, 0, 3 +; ALL-NEXT: lfs 1, .LCPI48_1@toc@l(3) ; ALL-NEXT: blr %sel = select i1 %cond, double -4.0, double 23.3 %bo = frem double %sel, 5.1 Index: llvm/trunk/test/CodeGen/PowerPC/toc-float.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/toc-float.ll +++ llvm/trunk/test/CodeGen/PowerPC/toc-float.ll @@ -1,24 +1,29 @@ -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 <%s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 <%s | FileCheck -check-prefix=CHECK-P9 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 <%s | FileCheck -check-prefix=CHECK-P8 %s ; As the constant could be represented as float, a float is ; loaded from constant pool. define double @doubleConstant1() { ret double 1.400000e+01 -} ; CHECK-LABEL: doubleConstant1: -; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha -; CHECK: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK-P9: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK-P8: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +} ; As the constant couldn't be represented as float, a double is ; loaded from constant pool. define double @doubleConstant2() { ret double 2.408904e+01 -} ; CHECK-LABEL: doubleConstant2: -; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha -; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK-P9: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK-P8: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +} @FArr = hidden local_unnamed_addr global [10 x float] zeroinitializer, align 4 @@ -26,19 +31,24 @@ %1 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* @FArr, i64 0, i64 3), align 4 %2 = fadd float %1, 0x400B333340000000 ret float %2 -} ; CHECK-LABEL: floatConstantArray -; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[REG2:[0-9]+]] -; CHECK: lfs {{[0-9]+}}, [[VAR]]@toc@l+[[REG2]]([[REG1]]) +; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[REG2:[0-9]+]] +; CHECK-P9: lfs {{[0-9]+}}, [[VAR]]@toc@l+[[REG2]]([[REG1]]) +; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK-P8: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l +; CHECK-P8: lfs {{[0-9]+}}, 12([[REG2]]) +} define float @floatConstant() { ret float 0x400470A3E0000000 -} ; CHECK-LABEL: floatConstant: -; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha -; CHECK: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK-P9: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK-P8: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +} ; llvm put the hidden globals into the TOC table. ; TODO - do some analysis and decide which globals could be put into TOC. @@ -48,11 +58,14 @@ %1 = load double, double* getelementptr inbounds ([200 x double], [200 x double]* @d, i64 0, i64 3), align 8 %2 = fadd double %1, 6.880000e+00 ret double %2 -} ; CHECK-LABEL: doubleConstantArray -; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[REG2:[0-9]+]] -; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l+[[REG2]]([[REG1]]) +; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[REG2:[0-9]+]] +; CHECK-P9: lfd {{[0-9]+}}, [[VAR]]@toc@l+[[REG2]]([[REG1]]) +; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK-P8: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l +; CHECK-P8: lfd {{[0-9]+}}, 24([[REG2]]) +} @arr = hidden local_unnamed_addr global [20000 x double] zeroinitializer, align 8 @@ -60,12 +73,34 @@ %1 = load double, double* getelementptr inbounds ([20000 x double], [20000 x double]* @arr, i64 0, i64 4096), align 8 %2 = fadd double %1, 6.880000e+00 ret double %2 -} -; access element that out of range +; Access an element with an offset that doesn't fit in the displacement field of LFD. ; CHECK-LABEL: doubleLargeConstantArray -; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha -; CHECK: li [[REG2:[0-9]+]], 0 -; CHECK: addi [[REG3:[0-9]+]], [[REG1]], [[VAR:[a-z0-9A-Z_.]+]]@toc@l -; CHECK: ori [[REG4:[0-9]+]], [[REG2]], 32768 -; CHECK: lfdx {{[0-9]+}}, [[REG3]], [[REG4]] +; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK-P9: li [[REG2:[0-9]+]], 0 +; CHECK-P9: addi [[REG3:[0-9]+]], [[REG1]], [[VAR:[a-z0-9A-Z_.]+]]@toc@l +; CHECK-P9: ori [[REG4:[0-9]+]], [[REG2]], 32768 +; CHECK-P9: lfdx {{[0-9]+}}, [[REG3]], [[REG4]] +; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK-P8: li [[REG2:[0-9]+]], 0 +; CHECK-P8: addi [[REG3:[0-9]+]], [[REG1]], [[VAR:[a-z0-9A-Z_.]+]]@toc@l +; CHECK-P8: ori [[REG4:[0-9]+]], [[REG2]], 32768 +; CHECK-P8: lfdx {{[0-9]+}}, [[REG3]], [[REG4]] +} + +@vec_arr = global [10 x <4 x i32>] zeroinitializer, align 16 + +define <4 x i32> @vectorArray() #0 { +entry: + %0 = load <4 x i32>, <4 x i32>* getelementptr inbounds ([10 x <4 x i32>], [10 x <4 x i32>]* @vec_arr, i64 0, i64 2), align 16 + ret <4 x i32> %0 + +; CHECK-LABEL: vectorArray +; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK-P9: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]]) +; CHECK-P9: lxv {{[0-9]+}}, 32([[REG2]]) +; CHECK-P8: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK-P8: ld [[REG2:[0-9]+]], [[VAR]]@toc@l([[REG1]]) +; CHECK-P8: addi [[REG3:[0-9]+]], [[REG2]], 32 +; CHECK-P8: lvx {{[0-9]+}}, 0, [[REG3]] +} Index: llvm/trunk/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll +++ llvm/trunk/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll @@ -124,7 +124,7 @@ ret void ; CHECK-LABEL: @dblToFloat ; CHECK: lfdx [[REGLD5:[0-9]+]], -; CHECK: stfsx [[REGLD5]], +; CHECK: stfs [[REGLD5]], ; CHECK-P9-LABEL: @dblToFloat ; CHECK-P9: lfd [[REGLD5:[0-9]+]], ; CHECK-P9: stfs [[REGLD5]], @@ -140,7 +140,7 @@ ret void ; CHECK-LABEL: @floatToDbl ; CHECK: lfsx [[REGLD5:[0-9]+]], -; CHECK: stfdx [[REGLD5]], +; CHECK: stfd [[REGLD5]], ; CHECK-P9-LABEL: @floatToDbl ; CHECK-P9: lfs [[REGLD5:[0-9]+]], ; CHECK-P9: stfd [[REGLD5]],