Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -2029,17 +2029,17 @@ /// or 64-bit immediate, and if the value can be accurately represented as a /// sign extension from a 16-bit value. If so, this returns true and the /// immediate. -static bool isIntS16Immediate(SDNode *N, short &Imm) { +static bool isIntS16Immediate(SDNode *N, int16_t &Imm) { if (!isa(N)) return false; - Imm = (short)cast(N)->getZExtValue(); + Imm = (int16_t)cast(N)->getZExtValue(); if (N->getValueType(0) == MVT::i32) return Imm == (int32_t)cast(N)->getZExtValue(); else return Imm == (int64_t)cast(N)->getZExtValue(); } -static bool isIntS16Immediate(SDValue Op, short &Imm) { +static bool isIntS16Immediate(SDValue Op, int16_t &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } @@ -2049,7 +2049,7 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const { - short imm = 0; + int16_t imm = 0; if (N.getOpcode() == ISD::ADD) { if (isIntS16Immediate(N.getOperand(1), imm)) return false; // r+i @@ -2139,7 +2139,7 @@ return false; if (N.getOpcode() == ISD::ADD) { - short imm = 0; + int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!Aligned || (imm & 3) == 0)) { Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); @@ -2163,7 +2163,7 @@ return true; // [&g+r] } } else if (N.getOpcode() == ISD::OR) { - short imm = 0; + int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!Aligned || (imm & 3) == 0)) { // If this is an or of disjoint bitfields, we can codegen this as an add @@ -2191,7 +2191,7 @@ // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" - short Imm; + int16_t Imm; if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, @@ -2236,10 +2236,15 @@ if (SelectAddressRegReg(N, Base, Index, DAG)) return true; - // If the operand is an addition, always emit this as [r+r], since this is - // better (for code size, and execution, as the memop does the add for free) - // than emitting an explicit add. - if (N.getOpcode() == ISD::ADD) { + // If the address is the result of an add, we will utilize the fact that the + // address calculation includes an implicit add. However, we can reduce + // register pressure if we do not materialize a constant just for use as the + // index register. We only get rid of the add if it is not an add of a + // value and a 16-bit signed constant and both have a single use. + int16_t imm = 0; + if (N.getOpcode() == ISD::ADD && + (!isIntS16Immediate(N.getOperand(1), imm) || + !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) { Base = N.getOperand(0); Index = N.getOperand(1); return true; Index: test/CodeGen/PowerPC/build-vector-tests.ll =================================================================== --- test/CodeGen/PowerPC/build-vector-tests.ll +++ test/CodeGen/PowerPC/build-vector-tests.ll @@ -1028,7 +1028,7 @@ ; P9LE: vperm ; P9LE: blr ; P8BE: sldi {{r[0-9]+}}, r4, 2 -; P8BE-DAG: lxvw4x {{v[0-9]+}}, r3, +; P8BE-DAG: lxvw4x {{v[0-9]+}}, 0, r3 ; P8BE-DAG: lxvw4x ; P8BE: vperm ; P8BE: blr @@ -2187,7 +2187,7 @@ ; P9LE: vperm ; P9LE: blr ; P8BE-DAG: sldi {{r[0-9]+}}, r4, 2 -; P8BE-DAG: lxvw4x {{v[0-9]+}}, r3 +; P8BE-DAG: lxvw4x {{v[0-9]+}}, 0, r3 ; P8BE-DAG: lxvw4x ; P8BE: vperm ; P8BE: blr Index: test/CodeGen/PowerPC/ppc64le-smallarg.ll =================================================================== --- test/CodeGen/PowerPC/ppc64le-smallarg.ll +++ test/CodeGen/PowerPC/ppc64le-smallarg.ll @@ -53,8 +53,8 @@ ret void } ; CHECK: @caller2 -; CHECK: li [[TOCOFF:[0-9]+]], 136 -; CHECK: stxsspx {{[0-9]+}}, 1, [[TOCOFF]] +; CHECK: addi [[TOCOFF:[0-9]+]], {{[0-9]+}}, 136 +; CHECK: stxsspx {{[0-9]+}}, 0, [[TOCOFF]] ; CHECK: bl test2 declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float) Index: test/CodeGen/PowerPC/select-addrRegRegOnly.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/select-addrRegRegOnly.ll @@ -0,0 +1,37 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown -verify-machineinstrs < %s | FileCheck %s + +; Function Attrs: norecurse nounwind readonly +define float @testSingleAccess(i32* nocapture readonly %arr) local_unnamed_addr #0 { +; CHECK-LABEL: testSingleAccess: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addi 3, 3, 8 +; CHECK-NEXT: lxsiwax 0, 0, 3 +; CHECK-NEXT: xscvsxdsp 1, 0 +; CHECK-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %arr, i64 2 + %0 = load i32, i32* %arrayidx, align 4 + %conv = sitofp i32 %0 to float + ret float %conv +} + +; Function Attrs: norecurse nounwind readonly +define float @testMultipleAccess(i32* nocapture readonly %arr) local_unnamed_addr #0 { +; CHECK-LABEL: testMultipleAccess: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: lwz 4, 8(3) +; CHECK-NEXT: lwz 12, 12(3) +; CHECK-NEXT: add 3, 12, 4 +; CHECK-NEXT: mtvsrwa 0, 3 +; CHECK-NEXT: xscvsxdsp 1, 0 +; CHECK-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %arr, i64 2 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 3 + %1 = load i32, i32* %arrayidx1, align 4 + %add = add nsw i32 %1, %0 + %conv = sitofp i32 %add to float + ret float %conv +} Index: test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll =================================================================== --- test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll +++ test/CodeGen/PowerPC/vsx-partword-int-loads-and-stores.ll @@ -321,8 +321,8 @@ ; CHECK: lxsibzx 34, 0, 3 ; CHECK-NEXT: vspltb 2, 2, 7 ; CHECK-BE-LABEL: vecucus -; CHECK-BE: li [[OFFSET:[0-9]+]], 1 -; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]] +; CHECK-BE: addi [[OFFSET:[0-9]+]], [[OFFSET]], 1 +; CHECK-BE-NEXT: lxsibzx 34, 0, [[OFFSET]] ; CHECK-BE-NEXT: vspltb 2, 2, 7 } @@ -385,8 +385,8 @@ ; CHECK: lxsibzx 34, 0, 3 ; CHECK-NEXT: vspltb 2, 2, 7 ; CHECK-BE-LABEL: vecscus -; CHECK-BE: li [[OFFSET:[0-9]+]], 1 -; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]] +; CHECK-BE: addi [[OFFSET:[0-9]+]], [[OFFSET]], 1 +; CHECK-BE-NEXT: lxsibzx 34, 0, [[OFFSET]] ; CHECK-BE-NEXT: vspltb 2, 2, 7 } @@ -487,8 +487,8 @@ ; CHECK: lxsibzx 34, 0, 3 ; CHECK-NEXT: vspltb 2, 2, 7 ; CHECK-BE-LABEL: vecucss -; CHECK-BE: li [[OFFSET:[0-9]+]], 1 -; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]] +; CHECK-BE: addi [[OFFSET:[0-9]+]], [[OFFSET]], 1 +; CHECK-BE-NEXT: lxsibzx 34, 0, [[OFFSET]] ; CHECK-BE-NEXT: vspltb 2, 2, 7 } @@ -540,8 +540,8 @@ ; CHECK: lxsibzx 34, 0, 3 ; CHECK-NEXT: vspltb 2, 2, 7 ; CHECK-BE-LABEL: vecscss -; CHECK-BE: li [[OFFSET:[0-9]+]], 1 -; CHECK-BE-NEXT: lxsibzx 34, 3, [[OFFSET]] +; CHECK-BE: addi [[OFFSET:[0-9]+]], [[OFFSET]], 1 +; CHECK-BE-NEXT: lxsibzx 34, 0, [[OFFSET]] ; CHECK-BE-NEXT: vspltb 2, 2, 7 }