Index: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -6044,28 +6044,37 @@ unsigned FirstOp; unsigned StorageOpcode = N->getMachineOpcode(); + bool RequiresMod4Offset = false; switch (StorageOpcode) { default: continue; + case PPC::LWA: + case PPC::LD: + case PPC::DFLOADf64: + case PPC::DFLOADf32: + RequiresMod4Offset = true; + LLVM_FALLTHROUGH; case PPC::LBZ: case PPC::LBZ8: - case PPC::LD: case PPC::LFD: case PPC::LFS: case PPC::LHA: case PPC::LHA8: case PPC::LHZ: case PPC::LHZ8: - case PPC::LWA: case PPC::LWZ: case PPC::LWZ8: FirstOp = 0; break; + case PPC::STD: + case PPC::DFSTOREf64: + case PPC::DFSTOREf32: + RequiresMod4Offset = true; + LLVM_FALLTHROUGH; case PPC::STB: case PPC::STB8: - case PPC::STD: case PPC::STFD: case PPC::STFS: case PPC::STH: @@ -6112,9 +6121,7 @@ // For these cases, the immediate may not be divisible by 4, in // which case the fold is illegal for DS-form instructions. (The // other cases provide aligned addresses and are always safe.) - if ((StorageOpcode == PPC::LWA || - StorageOpcode == PPC::LD || - StorageOpcode == PPC::STD) && + if (RequiresMod4Offset && (!isa(Base.getOperand(1)) || Base.getConstantOperandVal(1) % 4 != 0)) continue; @@ -6176,8 +6183,7 @@ if (auto *C = dyn_cast(ImmOpnd)) { Offset += C->getSExtValue(); - if ((StorageOpcode == PPC::LWA || StorageOpcode == PPC::LD || - StorageOpcode == PPC::STD) && (Offset % 4) != 0) + if (RequiresMod4Offset && (Offset % 4) != 0) continue; if (!isInt<16>(Offset)) @@ -6209,8 +6215,7 @@ // We can't perform this optimization for data whose alignment // is insufficient for the instruction encoding. if (GV->getAlignment() < 4 && - (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD || - StorageOpcode == PPC::LWA || (Offset % 4) != 0)) { + (RequiresMod4Offset || (Offset % 4) != 0)) { LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); continue; } Index: llvm/lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2065,6 +2065,12 @@ return true; } +#ifndef NDEBUG +static bool isAnImmediateOperand(const MachineOperand &MO) { + return MO.isCPI() || MO.isGlobal() || MO.isImm(); +} +#endif + bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { auto &MBB = *MI.getParent(); auto DL = MI.getDebugLoc(); @@ -2087,7 +2093,8 @@ case PPC::DFSTOREf64: { assert(Subtarget.hasP9Vector() && "Invalid D-Form Pseudo-ops on Pre-P9 target."); - assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() && + assert(MI.getOperand(2).isReg() && + isAnImmediateOperand(MI.getOperand(1)) && "D-form op must have register and immediate operands"); return expandVSXMemPseudo(MI); } Index: llvm/test/CodeGen/PowerPC/mcm-12.ll =================================================================== --- llvm/test/CodeGen/PowerPC/mcm-12.ll +++ llvm/test/CodeGen/PowerPC/mcm-12.ll @@ -33,5 +33,4 @@ ; CHECK-P9: .quad 4562098671269285104 ; CHECK-P9-LABEL: test_double_const: ; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha -; CHECK-P9: addi [[REG1]], {{[0-9]+}}, [[VAR]]@toc@l -; CHECK-P9: lfd {{[0-9]+}}, 0([[REG1]]) +; CHECK-P9: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) Index: llvm/test/CodeGen/PowerPC/toc-float.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/toc-float.ll @@ -0,0 +1,71 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr9 <%s | FileCheck %s + +; As the constant could be represented as float, a float is +; loaded from constant pool. +define double @doubleConstant1() { + ret double 1.400000e+01 +} + +; CHECK-LABEL: doubleConstant1: +; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) + +; As the constant couldn't be represented as float, a double is +; loaded from constant pool. +define double @doubleConstant2() { + ret double 2.408904e+01 +} + +; CHECK-LABEL: doubleConstant2: +; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) + +@FArr = hidden local_unnamed_addr global [10 x float] zeroinitializer, align 4 + +define float @floatConstantArray() local_unnamed_addr { + %1 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* @FArr, i64 0, i64 3), align 4 + %2 = fadd float %1, 0x400B333340000000 + ret float %2 +} + +; CHECK-LABEL: floatConstantArray +; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[REG2:[0-9]+]] +; CHECK: lfs {{[0-9]+}}, [[VAR]]@toc@l+[[REG2]]([[REG1]]) + +define float @floatConstant() { + ret float 0x400470A3E0000000 +} + +; CHECK-LABEL: floatConstant: +; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) + +; llvm put the hidden globals into the TOC table. +; TODO - do some analysis and decide which globals could be put into TOC. +@d = hidden local_unnamed_addr global [200 x double] zeroinitializer, align 8 + +define double @doubleConstantArray() { + %1 = load double, double* getelementptr inbounds ([200 x double], [200 x double]* @d, i64 0, i64 3), align 8 + %2 = fadd double %1, 6.880000e+00 + ret double %2 +} + +; CHECK-LABEL: doubleConstantArray +; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[REG2:[0-9]+]] +; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l+[[REG2]]([[REG1]]) + +@arr = hidden local_unnamed_addr global [20000 x double] zeroinitializer, align 8 + +define double @doubleLargeConstantArray() { + %1 = load double, double* getelementptr inbounds ([20000 x double], [20000 x double]* @arr, i64 0, i64 4096), align 8 + %2 = fadd double %1, 6.880000e+00 + ret double %2 +} + +; access element that out of range +; CHECK-LABEL: doubleLargeConstantArray +; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha +; CHECK: li [[REG2:[0-9]+]], 0 +; CHECK: addi [[REG3:[0-9]+]], [[REG1]], [[VAR:[a-z0-9A-Z_.]+]]@toc@l +; CHECK: ori [[REG4:[0-9]+]], [[REG2]], 32768 +; CHECK: lfdx {{[0-9]+}}, [[REG3]], [[REG4]]