Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1083,9 +1083,14 @@ // lowest-order bit. unsigned Idx; + // ConstZero means a bit we need to mask off. + // Variable is a bit comes from an input variable. + // VariableKnownToBeZero is also a bit comes from an input variable, + // but it is known to be already zero. So we do not need to mask them. enum Kind { ConstZero, - Variable + Variable, + VariableKnownToBeZero } K; ValueBit(SDValue V, unsigned I, Kind K = Variable) @@ -1094,11 +1099,11 @@ : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {} bool isZero() const { - return K == ConstZero; + return K == ConstZero || K == VariableKnownToBeZero; } bool hasValue() const { - return K == Variable; + return K == Variable || K == VariableKnownToBeZero; } SDValue getValue() const { @@ -1248,8 +1253,14 @@ for (unsigned i = 0; i < NumBits; ++i) if (((Mask >> i) & 1) == 1) Bits[i] = (*LHSBits)[i]; - else - Bits[i] = ValueBit(ValueBit::ConstZero); + else { + // AND instruction masks this bit. If the input is already zero, + // we have nothing to do here. Otherwise, make the bit ConstZero. + if ((*LHSBits)[i].isZero()) + Bits[i] = (*LHSBits)[i]; + else + Bits[i] = ValueBit(ValueBit::ConstZero); + } return std::make_pair(Interesting, &Bits); } @@ -1259,8 +1270,26 @@ const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second; bool AllDisjoint = true; - for (unsigned i = 0; i < NumBits; ++i) - if (LHSBits[i].isZero()) + SDValue LastVal = SDValue(); + unsigned LastIdx = 0; + for (unsigned i = 0; i < NumBits; ++i) { + if (LHSBits[i].isZero() && RHSBits[i].isZero()) { + // If both inputs are known to be zero and one is ConstZero and + // another is VariableKnownToBeZero, we can select whichever + // we like. To minimize the number of bit groups, we select + // VariableKnownToBeZero if this bit is the next bit of the same + // input variable from the previous bit. Otherwise, we select + // ConstZero. + if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal && + LHSBits[i].getValueBitIndex() == LastIdx + 1) + Bits[i] = LHSBits[i]; + else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal && + RHSBits[i].getValueBitIndex() == LastIdx + 1) + Bits[i] = RHSBits[i]; + else + Bits[i] = ValueBit(ValueBit::ConstZero); + } + else if (LHSBits[i].isZero()) Bits[i] = RHSBits[i]; else if (RHSBits[i].isZero()) Bits[i] = LHSBits[i]; @@ -1268,6 +1297,16 @@ AllDisjoint = false; break; } + // We remember the value and bit index of this bit. + if (Bits[i].hasValue()) { + LastVal = Bits[i].getValue(); + LastIdx = Bits[i].getValueBitIndex(); + } + else { + if (LastVal) LastVal = SDValue(); + LastIdx = 0; + } + } if (!AllDisjoint) break; @@ -1293,6 +1332,44 @@ return std::make_pair(Interesting, &Bits); } + case ISD::AssertZext: { + // For AssertZext, we look through the operand and + // mark the bits known to be zero. + const SmallVector *LHSBits; + std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), + NumBits); + + EVT FromType = cast(V.getOperand(1))->getVT(); + const unsigned NumValidBits = FromType.getSizeInBits(); + for (unsigned i = 0; i < NumValidBits; ++i) + Bits[i] = (*LHSBits)[i]; + + // These bits are known to be zero. + for (unsigned i = NumValidBits; i < NumBits; ++i) + Bits[i] = ValueBit((*LHSBits)[i].getValue(), + (*LHSBits)[i].getValueBitIndex(), + ValueBit::VariableKnownToBeZero); + + return std::make_pair(Interesting, &Bits); + } + case ISD::LOAD: + LoadSDNode *LD = cast(V); + if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) { + EVT VT = LD->getMemoryVT(); + const unsigned NumValidBits = VT.getSizeInBits(); + + for (unsigned i = 0; i < NumValidBits; ++i) + Bits[i] = ValueBit(V, i); + + // These bits are known to be zero. + for (unsigned i = NumValidBits; i < NumBits; ++i) + Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero); + + // Zero-extending load itself cannot be optimized. So, it is not + // interesting by itself though it gives useful information. + return std::make_pair(Interesting = false, &Bits); + } + break; } for (unsigned i = 0; i < NumBits; ++i) @@ -1304,7 +1381,7 @@ // For each value (except the constant ones), compute the left-rotate amount // to get it from its original to final position. void computeRotationAmounts() { - HasZeros = false; + NeedMask = false; RLAmt.resize(Bits.size()); for (unsigned i = 0; i < Bits.size(); ++i) if (Bits[i].hasValue()) { @@ -1314,7 +1391,7 @@ else RLAmt[i] = Bits.size() - (VBI - i); } else if (Bits[i].isZero()) { - HasZeros = true; + NeedMask = true; RLAmt[i] = UINT32_MAX; } else { llvm_unreachable("Unknown value bit type"); @@ -1330,6 +1407,7 @@ unsigned LastRLAmt = RLAmt[0]; SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue(); unsigned LastGroupStartIdx = 0; + bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); for (unsigned i = 1; i < Bits.size(); ++i) { unsigned ThisRLAmt = RLAmt[i]; SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); @@ -1342,10 +1420,20 @@ LastGroupStartIdx = 0; } + // If this bit is known to be zero and the current group is a bit group + // of zeros, we do not need to terminate the current bit group even the + // Value or RLAmt does not match here. Instead, we terminate this group + // when the first non-zero bit appears later. + if (IsGroupOfZeros && Bits[i].isZero()) + continue; + // If this bit has the same underlying value and the same rotate factor as // the last one, then they're part of the same group. if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) - continue; + // We cannot continue the current group if this bits is not known to + // be zero in a bit group of zeros. + if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero())) + continue; if (LastValue.getNode()) BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, @@ -1353,6 +1441,7 @@ LastRLAmt = ThisRLAmt; LastValue = ThisValue; LastGroupStartIdx = i; + IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); } if (LastValue.getNode()) BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, @@ -1698,7 +1787,7 @@ // If we've not yet selected a 'starting' instruction, and we have no zeros // to fill in, select the (Value, RLAmt) with the highest priority (largest // number of groups), and start with this rotated value. - if ((!HasZeros || LateMask) && !Res) { + if ((!NeedMask || LateMask) && !Res) { ValueRotInfo &VRI = ValueRotsVec[0]; if (VRI.RLAmt) { if (InstCnt) *InstCnt += 1; @@ -2077,7 +2166,7 @@ // If we've not yet selected a 'starting' instruction, and we have no zeros // to fill in, select the (Value, RLAmt) with the highest priority (largest // number of groups), and start with this rotated value. - if ((!HasZeros || LateMask) && !Res) { + if ((!NeedMask || LateMask) && !Res) { // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32 // groups will come first, and so the VRI representing the largest number // of groups might not be first (it might be the first Repl32 groups). @@ -2230,7 +2319,7 @@ SmallVector Bits; - bool HasZeros; + bool NeedMask; SmallVector RLAmt; SmallVector BitGroups; @@ -2259,10 +2348,10 @@ " selection for: "); LLVM_DEBUG(N->dump(CurDAG)); - // Fill it RLAmt and set HasZeros. + // Fill it RLAmt and set NeedMask. computeRotationAmounts(); - if (!HasZeros) + if (!NeedMask) return Select(N, false); // We currently have two techniques for handling results with zeros: early Index: test/CodeGen/PowerPC/addi-offset-fold.ll =================================================================== --- test/CodeGen/PowerPC/addi-offset-fold.ll +++ test/CodeGen/PowerPC/addi-offset-fold.ll @@ -29,9 +29,8 @@ ; CHECK-DAG: stb 4, -16(1) ; CHECK-DAG: lbz [[REG1:[0-9]+]], -16(1) ; CHECK-DAG: lwz [[REG2:[0-9]+]], -20(1) -; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG1]], 32 -; CHECK-DAG: or [[REG4:[0-9]+]], [[REG2]], [[REG3]] -; CHECK: rldicl 3, [[REG4]], 33, 57 +; CHECK-DAG: rlwinm 3, [[REG2]], 1, 31, 31 +; CHECK: rlwimi 3, [[REG1]], 1, 25, 30 ; CHECK: blr } Index: test/CodeGen/PowerPC/bitfieldinsert.ll =================================================================== --- test/CodeGen/PowerPC/bitfieldinsert.ll +++ test/CodeGen/PowerPC/bitfieldinsert.ll @@ -1,6 +1,35 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s +; equivalent C code +; struct s64 { +; int a:5; +; int b:16; +; long c:42; +; }; +; void bitfieldinsert64(struct s *p, unsigned short v) { +; p->b = v; +; } + +%struct.s64 = type { i64 } + +define void @bitfieldinsert64(%struct.s64* nocapture %p, i16 zeroext %v) { +; CHECK-LABEL: @bitfieldinsert64 +; CHECK: ld [[REG1:[0-9]+]], 0(3) +; CHECK-NEXT: rlwimi [[REG1]], 4, 5, 11, 26 +; CHECK-NEXT: std [[REG1]], 0(3) +; CHECK-NEXT: blr +entry: + %0 = getelementptr inbounds %struct.s64, %struct.s64* %p, i64 0, i32 0 + %1 = zext i16 %v to i64 + %bf.load = load i64, i64* %0, align 8 + %bf.shl = shl nuw nsw i64 %1, 5 + %bf.clear = and i64 %bf.load, -2097121 + %bf.set = or i64 %bf.clear, %bf.shl + store i64 %bf.set, i64* %0, align 8 + ret void +} + ; bitfieldinsert32: Test for rlwimi ; equivalent C code ; struct s32 { @@ -17,9 +46,9 @@ define void @bitfieldinsert32(%struct.s32* nocapture %p, i32 zeroext %v) { ; CHECK-LABEL: @bitfieldinsert32 ; CHECK: lwz [[REG1:[0-9]+]], 0(3) -; CHECK: rlwimi [[REG1]], 4, 8, 8, 23 -; CHECK: stw [[REG1]], 0(3) -; CHECK: blr +; CHECK-NEXT: rlwimi [[REG1]], 4, 8, 8, 23 +; CHECK-NEXT: stw [[REG1]], 0(3) +; CHECK-NEXT: blr entry: %0 = getelementptr inbounds %struct.s32, %struct.s32* %p, i64 0, i32 0 %bf.load = load i32, i32* %0, align 4 Index: test/CodeGen/PowerPC/ppc64le-aggregates.ll =================================================================== --- test/CodeGen/PowerPC/ppc64le-aggregates.ll +++ test/CodeGen/PowerPC/ppc64le-aggregates.ll @@ -236,14 +236,12 @@ ; CHECK-DAG: stfs 6, [[OFF1:[0-9]+]](1) ; CHECK-DAG: stfs 7, [[OFF2:[0-9]+]](1) ; CHECK-DAG: stfs 8, [[OFF3:[0-9]+]](1) -; CHECK-DAG: lwz [[REG0:[0-9]+]], [[OFF0]](1) +; CHECK-DAG: lwz 9, [[OFF0]](1) ; CHECK-DAG: lwz [[REG1:[0-9]+]], [[OFF1]](1) -; CHECK-DAG: lwz [[REG2:[0-9]+]], [[OFF2]](1) +; CHECK-DAG: lwz 10, [[OFF2]](1) ; CHECK-DAG: lwz [[REG3:[0-9]+]], [[OFF3]](1) -; CHECK-DAG: sldi [[REG1]], [[REG1]], 32 -; CHECK-DAG: sldi [[REG3]], [[REG3]], 32 -; CHECK-DAG: or 9, [[REG0]], [[REG1]] -; CHECK-DAG: or 10, [[REG2]], [[REG3]] +; CHECK-DAG: rldimi 9, [[REG1]], 32, 0 +; CHECK-DAG: rldimi 10, [[REG3]], 32, 0 ; CHECK: bl test1 declare void @test1([8 x float], [8 x float]) Index: test/CodeGen/PowerPC/rlwimi-dyn-and.ll =================================================================== --- test/CodeGen/PowerPC/rlwimi-dyn-and.ll +++ test/CodeGen/PowerPC/rlwimi-dyn-and.ll @@ -39,7 +39,7 @@ ret i32 %conv174 ; CHECK-LABEL: @test2 -; CHECK: slwi 3, {{[0-9]+}}, 7 +; CHECK: rlwinm 3, {{[0-9]+}}, 7, 17, 24 ; CHECK: rlwimi 3, {{[0-9]+}}, 15, 16, 16 ; CHECK: blr }