Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -633,6 +633,13 @@ // If no shift, we're done. if (!Shift) return Result; + // If Hi word == Lo word, + // we can use rldimi to insert the Lo word into Hi word. + if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { + ++Result; + return Result; + } + // Shift for next step if the upper 32-bits were not zero. if (Imm) ++Result; @@ -731,6 +738,14 @@ // If no shift, we're done. if (!Shift) return Result; + // If Hi word == Lo word, + // we can use rldimi to insert the Lo word into Hi word. + if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { + SDValue Ops[] = + { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)}; + return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); + } + // Shift for next step if the upper 32-bits were not zero. if (Imm) { Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, @@ -1659,9 +1674,12 @@ unsigned NumRLInsts = 0; bool FirstBG = true; + bool MoreBG = false; for (auto &BG : BitGroups) { - if (!MatchingBG(BG)) + if (!MatchingBG(BG)) { + MoreBG = true; continue; + } NumRLInsts += SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, !FirstBG); @@ -1679,7 +1697,10 @@ // because that exposes more opportunities for CSE. if (NumAndInsts > NumRLInsts) continue; - if (Use32BitInsts && NumAndInsts == NumRLInsts) + // When merging multiple bit groups, instruction or is used. + // But when rotate is used, rldimi can inert the rotated value into any + // register, so instruction or can be avoided. + if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts) continue; DEBUG(dbgs() << "\t\t\t\tusing masking\n"); Index: test/CodeGen/PowerPC/pr30640.ll =================================================================== --- test/CodeGen/PowerPC/pr30640.ll +++ test/CodeGen/PowerPC/pr30640.ll @@ -0,0 +1,11 @@ +; RUN: llc -O2 -march=ppc64 -mcpu=pwr8 < %s | FileCheck %s + +define i64 @foo() { +entry: + ret i64 -3617008641903833651 + +; CHECK: lis 3, -12851 +; CHECK: ori 3, 3, 52685 +; CHECK: rldimi 3, 3, 32, 0 +} +