diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2091,7 +2091,7 @@ } // Instruction selection for the 32-bit case. - SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) { + SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt, bool From64) { SDLoc dl(N); SDValue Res; @@ -2137,7 +2137,19 @@ { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), getI32Imm(Bits.size() - BG.EndIdx - 1, dl), getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; - Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); + Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, + From64 ? MVT::i64 : MVT::i32, Ops), + 0); + if (From64) { + auto ResultType = N->getValueType(0); + auto ImDef = + CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType); + SDValue IDVal(ImDef, 0); + Res = + SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType, + IDVal, Res, getI32Imm(1, dl)), + 0); + } } } @@ -2620,6 +2632,59 @@ return Res.getNode(); } + // Some 32-bit logical operations may have truncate-op removed by + // DAGCombineExtBoolTrunc and turned into 64-bit. Among them some can be + // handled by single rlwimi. Here we are going to find out these and setup for + // processing in Select32. + bool mergeBGForSelect32() { + assert(Bits.size() == 64 && "Not on 64 bits path?"); + int BS = BitGroups.size(); + if (BS < 2) + return false; + + // The target pattern is composed by two or three BitGroup fully pack the + // lower word, and there should be two different values with zero rotation + auto IH = BitGroups.begin(); + if (IH->RLAmt != 0 || IH->StartIdx != 0) + return false; + + auto IT = std::prev(BitGroups.end()); + if (IT->RLAmt != 0 || IT->EndIdx != 31 || IT->StartIdx > IT->EndIdx) + return false; + + if (IT->StartIdx != (IH->EndIdx + 1)) { + if (IH->V != IT->V || BS != 3) + return false; + + auto IM = std::next(IH); + if (IM->RLAmt != 0 || IM->StartIdx != (IH->EndIdx + 1) || + IT->StartIdx != (IM->EndIdx + 1)) + return false; + } + + // All high word bits should be zero + unsigned ZBCnt = 0; + for (unsigned i = 0; i < Bits.size(); ++i) + if (Bits[i].isZero()) { + if (i < 32) + return false; + ++ZBCnt; + } + + if (ZBCnt != 32) + return false; + + NeedMask = false; + if (BS == 3) { + LLVM_DEBUG(dbgs() << "\tmergeBGForSelect32 combining final bit group " + "with initial one\n"); + IT->EndIdx = IH->EndIdx; + BitGroups.erase(IH); + } + LLVM_DEBUG(dbgs() << "\tmergeBGForSelect32 fwd BGs to Select32\n"); + return true; + } + SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) { // Fill in BitGroups. collectBitGroups(LateMask); @@ -2630,11 +2695,17 @@ if (Bits.size() == 64) assignRepl32BitGroups(); + // Check those BG with no rotation and contains lower 32 bits non-zero, + // which can turn into single rlwimi in Select32. + bool To32 = false; + if (Bits.size() == 64) + To32 = mergeBGForSelect32(); + // Fill in ValueRotsVec. collectValueRotInfo(); - if (Bits.size() == 32) { - return Select32(N, LateMask, InstCnt); + if (Bits.size() == 32 || To32) { + return Select32(N, LateMask, InstCnt, To32); } else { assert(Bits.size() == 64 && "Not 64 bits here?"); return Select64(N, LateMask, InstCnt); diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rlwimi-and-or-and.ll b/llvm/test/CodeGen/PowerPC/ppc64-rlwimi-and-or-and.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc64-rlwimi-and-or-and.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux" + +define i32 @test1(i32 %a, i32 %b) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwimi 3, 4, 0, 16, 27 +; CHECK-NEXT: blr +entry: + %and = and i32 %a, -65521 + %and1 = and i32 %b, 65520 + %or = or i32 %and1, %and + ret i32 %or +} + +define i32 @test2(i32 %a, i32 %b) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: rlwimi 3, 4, 0, 24, 31 +; CHECK-NEXT: blr +entry: + %and = and i32 %a, -256 + %and1 = and i32 %b, 255 + %or = or i32 %and1, %and + ret i32 %or +}