Index: lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- lib/Target/PowerPC/PPCMIPeephole.cpp +++ lib/Target/PowerPC/PPCMIPeephole.cpp @@ -47,6 +47,8 @@ STATISTIC(NumFixedPointIterations, "Number of fixed-point iterations converting reg-reg instructions " "to reg-imm ones"); +STATISTIC(NumRotatesCollapsed, + "Number of pairs of rotate left, clear left/right collapsed"); static cl::opt FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), @@ -757,6 +759,56 @@ NumOptADDLIs++; break; } + case PPC::RLDICR: { + // We miss the opportunity to emit an RLDIC when lowering jump tables + // since ISEL sees only a single basic block. When selecting, the clear + // and shift left will be in different blocks. + unsigned SrcReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI->getOpcode() != PPC::RLDICL) + break; + MachineOperand MOpSHSrc = SrcMI->getOperand(2); + MachineOperand MOpMBSrc = SrcMI->getOperand(3); + MachineOperand MOpSHMI = MI.getOperand(2); + MachineOperand MOpMEMI = MI.getOperand(3); + if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && + MOpSHMI.isImm() && MOpMEMI.isImm())) + break; + uint64_t SHSrc = MOpSHSrc.getImm(); + uint64_t MBSrc = MOpMBSrc.getImm(); + uint64_t SHMI = MOpSHMI.getImm(); + uint64_t MEMI = MOpMEMI.getImm(); + uint64_t NewSH = SHSrc + SHMI; + uint64_t NewMB = MBSrc - SHMI; + if (NewMB > 63 || NewSH > 63) + break; + + // The bits cleared with RLDICL are [0, MBSrc). + // The bits cleared with RLDICR are (MEMI, 63]. + // After the sequence, the bits cleared are: + // [0, MBSrc-SHMI) and (MEMI, 63). + // + // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63]. + if ((63 - NewSH) != MEMI) + break; + + LLVM_DEBUG(dbgs() << "Converting pair: "); + LLVM_DEBUG(SrcMI->dump()); + LLVM_DEBUG(MI.dump()); + + MI.setDesc(TII->get(PPC::RLDIC)); + MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg()); + MI.getOperand(2).setImm(NewSH); + MI.getOperand(3).setImm(NewMB); + + LLVM_DEBUG(dbgs() << "To: "); + LLVM_DEBUG(MI.dump()); + NumRotatesCollapsed++; + break; + } } } Index: test/CodeGen/PowerPC/collapse-rotates.mir =================================================================== --- test/CodeGen/PowerPC/collapse-rotates.mir +++ test/CodeGen/PowerPC/collapse-rotates.mir @@ -0,0 +1,65 @@ +# RUN: llc -mtriple=powerpc64le--linux-gnu -start-before ppc-mi-peepholes %s -o - -verify-machineinstrs | FileCheck %s + +--- | + ; ModuleID = 'b.ll' + source_filename = "b.ll" + target datalayout = "e-m:e-i64:64-n32:64" + + define dso_local i64 @test(i64 %l) { + entry: + %shl = shl i64 %l, 3 + ret i64 %shl + } + +... +--- +name: test +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: g8rc, preferred-register: '' } + - { id: 1, class: g8rc, preferred-register: '' } + - { id: 2, class: g8rc, preferred-register: '' } +liveins: + - { reg: '$x3', virtual-reg: '%0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x3 + + %0:g8rc = COPY $x3 + %1:g8rc = RLDICL %0, 2, 32 + %2:g8rc = RLDICR %1, 3, 58 + $x3 = COPY %2 + BLR8 implicit $lr8, implicit $rm, implicit $x3 + +... +# CHECK: rldic 3, 3, 5, 29 Index: test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll =================================================================== --- test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll +++ test/CodeGen/PowerPC/jump-tables-collapse-rotate.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -o - %s | FileCheck %s + +; Function Attrs: nounwind +define dso_local zeroext i32 @test(i32 signext %l) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -32(1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi 3, 3, -1 +; CHECK-NEXT: cmplwi 3, 5 +; CHECK-NEXT: bgt 0, .LBB0_3 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addis 4, 2, .LC0@toc@ha +; CHECK-NEXT: rldic 3, 3, 2, 30 +; CHECK-NEXT: ld 4, .LC0@toc@l(4) +; CHECK-NEXT: lwax 3, 3, 4 +; CHECK-NEXT: add 3, 3, 4 +; CHECK-NEXT: mtctr 3 +; CHECK-NEXT: bctr +; CHECK-NEXT: .LBB0_2: # %sw.bb +; CHECK-NEXT: li 3, 2 +; CHECK-NEXT: bl test1 +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: .LBB0_3: # %sw.default +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: bl test1 +; CHECK-NEXT: nop +; CHECK-NEXT: bl test3 +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: .LBB0_4: # %sw.bb3 +; CHECK-NEXT: li 3, 3 +; CHECK-NEXT: b .LBB0_9 +; CHECK-NEXT: .LBB0_5: # %sw.bb5 +; CHECK-NEXT: li 3, 4 +; CHECK-NEXT: bl test2 +; CHECK-NEXT: nop +; CHECK-NEXT: bl test3 +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: .LBB0_6: # %sw.bb8 +; CHECK-NEXT: li 3, 5 +; CHECK-NEXT: bl test4 +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: .LBB0_7: # %sw.bb10 +; CHECK-NEXT: li 3, 66 +; CHECK-NEXT: bl test4 +; CHECK-NEXT: nop +; CHECK-NEXT: bl test1 +; CHECK-NEXT: nop +; CHECK-NEXT: b .LBB0_10 +; CHECK-NEXT: .LBB0_8: # %sw.bb13 +; CHECK-NEXT: li 3, 66 +; CHECK-NEXT: .LBB0_9: # %return +; CHECK-NEXT: bl test2 +; CHECK-NEXT: nop +; CHECK-NEXT: .LBB0_10: # %return +; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: addi 1, 1, 32 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + switch i32 %l, label %sw.default [ + i32 1, label %sw.bb + i32 2, label %sw.bb3 + i32 3, label %sw.bb5 + i32 4, label %sw.bb8 + i32 5, label %sw.bb10 + i32 6, label %sw.bb13 + ] + +sw.default: ; preds = %entry + %call = tail call signext i32 @test1(i32 signext 1) + %call1 = tail call signext i32 @test3(i32 signext %call) + br label %return + +sw.bb: ; preds = %entry + %call2 = tail call signext i32 @test1(i32 signext 2) + br label %return + +sw.bb3: ; preds = %entry + %call4 = tail call signext i32 @test2(i32 signext 3) + br label %return + +sw.bb5: ; preds = %entry + %call6 = tail call signext i32 @test2(i32 signext 4) + %call7 = tail call signext i32 @test3(i32 signext %call6) + br label %return + +sw.bb8: ; preds = %entry + %call9 = tail call signext i32 @test4(i32 signext 5) + br label %return + +sw.bb10: ; preds = %entry + %call11 = tail call signext i32 @test4(i32 signext 66) + %call12 = tail call signext i32 @test1(i32 signext %call11) + br label %return + +sw.bb13: ; preds = %entry + %call14 = tail call signext i32 @test2(i32 signext 66) + br label %return + +return: ; preds = %sw.bb13, %sw.bb10, %sw.bb8, %sw.bb5, %sw.bb3, %sw.bb, %sw.default + %retval.0 = phi i32 [ %call1, %sw.default ], [ %call14, %sw.bb13 ], [ %call12, %sw.bb10 ], [ %call9, %sw.bb8 ], [ %call7, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb ] + ret i32 %retval.0 +} + +declare signext i32 @test3(i32 signext) + +declare signext i32 @test1(i32 signext) + +declare signext i32 @test2(i32 signext) + +declare signext i32 @test4(i32 signext)