Skip to content

Commit 9f0fe9a

Browse files
author
QingShan Zhang
committedJun 19, 2018
If the arch is P9, we will select the DFLOADf32/DFLOADf64 pseudo instruction when we are loading a floating,
and expand it post RA basing on the register pressure. However, we miss to do the add-imm peephole for these pseudo instruction. Differential Revision: https://reviews.llvm.org/D47568 Reviewed By: Nemanjai llvm-svn: 335024
1 parent ec03fbe commit 9f0fe9a

File tree

4 files changed

+95
-13
lines changed

4 files changed

+95
-13
lines changed
 

‎llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

+15-10
Original file line numberDiff line numberDiff line change
@@ -6044,28 +6044,37 @@ void PPCDAGToDAGISel::PeepholePPC64() {
60446044

60456045
unsigned FirstOp;
60466046
unsigned StorageOpcode = N->getMachineOpcode();
6047+
bool RequiresMod4Offset = false;
60476048

60486049
switch (StorageOpcode) {
60496050
default: continue;
60506051

6052+
case PPC::LWA:
6053+
case PPC::LD:
6054+
case PPC::DFLOADf64:
6055+
case PPC::DFLOADf32:
6056+
RequiresMod4Offset = true;
6057+
LLVM_FALLTHROUGH;
60516058
case PPC::LBZ:
60526059
case PPC::LBZ8:
6053-
case PPC::LD:
60546060
case PPC::LFD:
60556061
case PPC::LFS:
60566062
case PPC::LHA:
60576063
case PPC::LHA8:
60586064
case PPC::LHZ:
60596065
case PPC::LHZ8:
6060-
case PPC::LWA:
60616066
case PPC::LWZ:
60626067
case PPC::LWZ8:
60636068
FirstOp = 0;
60646069
break;
60656070

6071+
case PPC::STD:
6072+
case PPC::DFSTOREf64:
6073+
case PPC::DFSTOREf32:
6074+
RequiresMod4Offset = true;
6075+
LLVM_FALLTHROUGH;
60666076
case PPC::STB:
60676077
case PPC::STB8:
6068-
case PPC::STD:
60696078
case PPC::STFD:
60706079
case PPC::STFS:
60716080
case PPC::STH:
@@ -6112,9 +6121,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
61126121
// For these cases, the immediate may not be divisible by 4, in
61136122
// which case the fold is illegal for DS-form instructions. (The
61146123
// other cases provide aligned addresses and are always safe.)
6115-
if ((StorageOpcode == PPC::LWA ||
6116-
StorageOpcode == PPC::LD ||
6117-
StorageOpcode == PPC::STD) &&
6124+
if (RequiresMod4Offset &&
61186125
(!isa<ConstantSDNode>(Base.getOperand(1)) ||
61196126
Base.getConstantOperandVal(1) % 4 != 0))
61206127
continue;
@@ -6176,8 +6183,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
61766183
if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
61776184
Offset += C->getSExtValue();
61786185

6179-
if ((StorageOpcode == PPC::LWA || StorageOpcode == PPC::LD ||
6180-
StorageOpcode == PPC::STD) && (Offset % 4) != 0)
6186+
if (RequiresMod4Offset && (Offset % 4) != 0)
61816187
continue;
61826188

61836189
if (!isInt<16>(Offset))
@@ -6209,8 +6215,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
62096215
// We can't perform this optimization for data whose alignment
62106216
// is insufficient for the instruction encoding.
62116217
if (GV->getAlignment() < 4 &&
6212-
(StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
6213-
StorageOpcode == PPC::LWA || (Offset % 4) != 0)) {
6218+
(RequiresMod4Offset || (Offset % 4) != 0)) {
62146219
LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
62156220
continue;
62166221
}

‎llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -2065,6 +2065,12 @@ bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const {
20652065
return true;
20662066
}
20672067

2068+
#ifndef NDEBUG
2069+
static bool isAnImmediateOperand(const MachineOperand &MO) {
2070+
return MO.isCPI() || MO.isGlobal() || MO.isImm();
2071+
}
2072+
#endif
2073+
20682074
bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
20692075
auto &MBB = *MI.getParent();
20702076
auto DL = MI.getDebugLoc();
@@ -2087,7 +2093,8 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
20872093
case PPC::DFSTOREf64: {
20882094
assert(Subtarget.hasP9Vector() &&
20892095
"Invalid D-Form Pseudo-ops on Pre-P9 target.");
2090-
assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() &&
2096+
assert(MI.getOperand(2).isReg() &&
2097+
isAnImmediateOperand(MI.getOperand(1)) &&
20912098
"D-form op must have register and immediate operands");
20922099
return expandVSXMemPseudo(MI);
20932100
}

‎llvm/test/CodeGen/PowerPC/mcm-12.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,4 @@ entry:
3333
; CHECK-P9: .quad 4562098671269285104
3434
; CHECK-P9-LABEL: test_double_const:
3535
; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
36-
; CHECK-P9: addi [[REG1]], {{[0-9]+}}, [[VAR]]@toc@l
37-
; CHECK-P9: lfd {{[0-9]+}}, 0([[REG1]])
36+
; CHECK-P9: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 <%s | FileCheck %s
2+
3+
; As the constant could be represented as float, a float is
4+
; loaded from constant pool.
5+
define double @doubleConstant1() {
6+
ret double 1.400000e+01
7+
}
8+
9+
; CHECK-LABEL: doubleConstant1:
10+
; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
11+
; CHECK: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
12+
13+
; As the constant couldn't be represented as float, a double is
14+
; loaded from constant pool.
15+
define double @doubleConstant2() {
16+
ret double 2.408904e+01
17+
}
18+
19+
; CHECK-LABEL: doubleConstant2:
20+
; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
21+
; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
22+
23+
@FArr = hidden local_unnamed_addr global [10 x float] zeroinitializer, align 4
24+
25+
define float @floatConstantArray() local_unnamed_addr {
26+
%1 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* @FArr, i64 0, i64 3), align 4
27+
%2 = fadd float %1, 0x400B333340000000
28+
ret float %2
29+
}
30+
31+
; CHECK-LABEL: floatConstantArray
32+
; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[REG2:[0-9]+]]
33+
; CHECK: lfs {{[0-9]+}}, [[VAR]]@toc@l+[[REG2]]([[REG1]])
34+
35+
define float @floatConstant() {
36+
ret float 0x400470A3E0000000
37+
}
38+
39+
; CHECK-LABEL: floatConstant:
40+
; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
41+
; CHECK: lfs {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
42+
43+
; llvm put the hidden globals into the TOC table.
44+
; TODO - do some analysis and decide which globals could be put into TOC.
45+
@d = hidden local_unnamed_addr global [200 x double] zeroinitializer, align 8
46+
47+
define double @doubleConstantArray() {
48+
%1 = load double, double* getelementptr inbounds ([200 x double], [200 x double]* @d, i64 0, i64 3), align 8
49+
%2 = fadd double %1, 6.880000e+00
50+
ret double %2
51+
}
52+
53+
; CHECK-LABEL: doubleConstantArray
54+
; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha+[[REG2:[0-9]+]]
55+
; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l+[[REG2]]([[REG1]])
56+
57+
@arr = hidden local_unnamed_addr global [20000 x double] zeroinitializer, align 8
58+
59+
define double @doubleLargeConstantArray() {
60+
%1 = load double, double* getelementptr inbounds ([20000 x double], [20000 x double]* @arr, i64 0, i64 4096), align 8
61+
%2 = fadd double %1, 6.880000e+00
62+
ret double %2
63+
}
64+
65+
; access element that out of range
66+
; CHECK-LABEL: doubleLargeConstantArray
67+
; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
68+
; CHECK: li [[REG2:[0-9]+]], 0
69+
; CHECK: addi [[REG3:[0-9]+]], [[REG1]], [[VAR:[a-z0-9A-Z_.]+]]@toc@l
70+
; CHECK: ori [[REG4:[0-9]+]], [[REG2]], 32768
71+
; CHECK: lfdx {{[0-9]+}}, [[REG3]], [[REG4]]

0 commit comments

Comments
 (0)