Index: llvm/trunk/lib/Target/PowerPC/P9InstrResources.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/P9InstrResources.td +++ llvm/trunk/lib/Target/PowerPC/P9InstrResources.td @@ -122,7 +122,7 @@ (instrs (instregex "S(L|R)D$"), (instregex "SRAD(I)?$"), - (instregex "EXTSWSLI$"), + (instregex "EXTSWSLI_32_64$"), (instregex "MFV(S)?RD$"), (instregex "MTVSRD$"), (instregex "MTVSRW(A|Z)$"), @@ -158,6 +158,7 @@ XSNEGDP, XSCPSGNDP, MFVSRWZ, + EXTSWSLI, SRADI_32, RLDIC, RFEBB, @@ -1101,8 +1102,9 @@ // The two ops cannot be done in parallel. def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs - (instregex "EXTSWSLIo$"), + (instregex "EXTSWSLI_32_64o$"), (instregex "SRAD(I)?o$"), + EXTSWSLIo, SLDo, SRDo, RLDICo Index: llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td @@ -725,10 +725,17 @@ "sradi", "$rA, $rS, $SH", IIC_IntRotateDI, [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; -defm EXTSWSLI : XSForm_1r<31, 445, (outs g8rc:$rA), (ins gprc:$rS, u6imm:$SH), - "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI, - [(set i64:$rA, (PPCextswsli i32:$rS, (i32 imm:$SH)))]>, - isPPC64, Requires<[IsISA3_0]>; +let Interpretation64Bit = 1, isCodeGenOnly = 1 in +defm EXTSWSLI_32_64 : XSForm_1r<31, 445, (outs g8rc:$rA), + (ins gprc:$rS, u6imm:$SH), + "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI, + [(set i64:$rA, + (PPCextswsli i32:$rS, (i32 imm:$SH)))]>, + isPPC64, Requires<[IsISA3_0]>; + +defm EXTSWSLI : XSForm_1rc<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH), + "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI, + []>, isPPC64, Requires<[IsISA3_0]>; // For fast-isel: let isCodeGenOnly = 1, Defs = [CARRY] in Index: llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -53,6 +53,8 @@ "to reg-imm ones"); STATISTIC(NumRotatesCollapsed, "Number of pairs of rotate left, clear left/right collapsed"); +STATISTIC(NumEXTSWAndSLDICombined, + "Number of pairs of EXTSW and SLDI combined as EXTSWSLI"); static cl::opt FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true), @@ -101,6 +103,7 @@ // Perform peepholes. bool eliminateRedundantCompare(void); bool eliminateRedundantTOCSaves(std::map &TOCSaves); + bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase); bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI); void UpdateTOCSaves(std::map &TOCSaves, MachineInstr *MI); @@ -799,7 +802,8 @@ break; } case PPC::RLDICR: { - Simplified |= emitRLDICWhenLoweringJumpTables(MI); + Simplified |= emitRLDICWhenLoweringJumpTables(MI) || + combineSEXTAndSHL(MI, ToErase); break; } } @@ -1379,6 +1383,72 @@ return true; } +// For case in LLVM IR +// entry: +// %iconv = sext i32 %index to i64 +// br i1 undef label %true, label %false +// true: +// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv +// ... +// PPCISelLowering::combineSHL fails to combine, because sext and shl are in +// different BBs when conducting instruction selection. We can do a peephole +// optimization to combine these two instructions into extswsli after +// instruction selection. +bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI, + MachineInstr *&ToErase) { + if (MI.getOpcode() != PPC::RLDICR) + return false; + + if (!MF->getSubtarget().isISA3_0()) + return false; + + assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands"); + + MachineOperand MOpSHMI = MI.getOperand(2); + MachineOperand MOpMEMI = MI.getOperand(3); + if (!(MOpSHMI.isImm() && MOpMEMI.isImm())) + return false; + + uint64_t SHMI = MOpSHMI.getImm(); + uint64_t MEMI = MOpMEMI.getImm(); + if (SHMI + MEMI != 63) + return false; + + unsigned SrcReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI->getOpcode() != PPC::EXTSW && + SrcMI->getOpcode() != PPC::EXTSW_32_64) + return false; + + // If the register defined by extsw has more than one use, combination is not + // needed. + if (!MRI->hasOneNonDBGUse(SrcReg)) + return false; + + LLVM_DEBUG(dbgs() << "Combining pair: "); + LLVM_DEBUG(SrcMI->dump()); + LLVM_DEBUG(MI.dump()); + + MachineInstr *NewInstr = + BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), + SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI) + : TII->get(PPC::EXTSWSLI_32_64), + MI.getOperand(0).getReg()) + .add(SrcMI->getOperand(1)) + .add(MOpSHMI); + + LLVM_DEBUG(dbgs() << "TO: "); + LLVM_DEBUG(NewInstr->dump()); + ++NumEXTSWAndSLDICombined; + ToErase = &MI; + // SrcMI, which is extsw, is of no use now, erase it. + SrcMI->eraseFromParent(); + return true; +} + } // end default namespace INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE, Index: llvm/trunk/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll +++ llvm/trunk/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll @@ -42,8 +42,7 @@ ; CHECK-P9-NEXT: andi. r5, r5, 1 ; CHECK-P9-NEXT: bc 4, gt, .LBB0_2 ; CHECK-P9-NEXT: # %bb.1: # %true -; CHECK-P9-NEXT: extsw r4, r4 -; CHECK-P9-NEXT: sldi r4, r4, 2 +; CHECK-P9-NEXT: extswsli r4, r4, 2 ; CHECK-P9-NEXT: lwzx r3, r3, r4 ; CHECK-P9-NEXT: blr ; CHECK-P9-NEXT: .LBB0_2: # %false @@ -55,8 +54,7 @@ ; CHECK-P9-BE-NEXT: andi. r5, r5, 1 ; CHECK-P9-BE-NEXT: bc 4, gt, .LBB0_2 ; CHECK-P9-BE-NEXT: # %bb.1: # %true -; CHECK-P9-BE-NEXT: extsw r4, r4 -; CHECK-P9-BE-NEXT: sldi r4, r4, 2 +; CHECK-P9-BE-NEXT: extswsli r4, r4, 2 ; CHECK-P9-BE-NEXT: lwzx r3, r3, r4 ; CHECK-P9-BE-NEXT: blr ; CHECK-P9-BE-NEXT: .LBB0_2: # %false @@ -74,3 +72,131 @@ false: ret i32 %default } + +define dso_local i64 @poc_i64(i64* %base, i32 %index, i1 %flag, i64 %default) { +; CHECK-LABEL: poc_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi. r5, r5, 1 +; CHECK-NEXT: bc 4, gt, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %true +; CHECK-NEXT: extsw r4, r4 +; CHECK-NEXT: sldi r4, r4, 3 +; CHECK-NEXT: ldx r3, r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB1_2: # %false +; CHECK-NEXT: mr r3, r6 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: poc_i64: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: andi. r5, r5, 1 +; CHECK-BE-NEXT: bc 4, gt, .LBB1_2 +; CHECK-BE-NEXT: # %bb.1: # %true +; CHECK-BE-NEXT: extsw r4, r4 +; CHECK-BE-NEXT: sldi r4, r4, 3 +; CHECK-BE-NEXT: ldx r3, r3, r4 +; CHECK-BE-NEXT: blr +; CHECK-BE-NEXT: .LBB1_2: # %false +; CHECK-BE-NEXT: mr r3, r6 +; CHECK-BE-NEXT: blr +; +; CHECK-P9-LABEL: poc_i64: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: andi. r5, r5, 1 +; CHECK-P9-NEXT: bc 4, gt, .LBB1_2 +; CHECK-P9-NEXT: # %bb.1: # %true +; CHECK-P9-NEXT: extswsli r4, r4, 3 +; CHECK-P9-NEXT: ldx r3, r3, r4 +; CHECK-P9-NEXT: blr +; CHECK-P9-NEXT: .LBB1_2: # %false +; CHECK-P9-NEXT: mr r3, r6 +; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: poc_i64: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: andi. r5, r5, 1 +; CHECK-P9-BE-NEXT: bc 4, gt, .LBB1_2 +; CHECK-P9-BE-NEXT: # %bb.1: # %true +; CHECK-P9-BE-NEXT: extswsli r4, r4, 3 +; CHECK-P9-BE-NEXT: ldx r3, r3, r4 +; CHECK-P9-BE-NEXT: blr +; CHECK-P9-BE-NEXT: .LBB1_2: # %false +; CHECK-P9-BE-NEXT: mr r3, r6 +; CHECK-P9-BE-NEXT: blr +entry: + %iconv = sext i32 %index to i64 + br i1 %flag, label %true, label %false + +true: + %ptr = getelementptr inbounds i64, i64* %base, i64 %iconv + %value = load i64, i64* %ptr, align 8 + ret i64 %value + +false: + ret i64 %default +} + +define dso_local i64 @no_extswsli(i64* %base, i32 %index, i1 %flag) { +; CHECK-LABEL: no_extswsli: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi. r5, r5, 1 +; CHECK-NEXT: extsw r4, r4 +; CHECK-NEXT: bc 4, gt, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %true +; CHECK-NEXT: sldi r4, r4, 3 +; CHECK-NEXT: ldx r3, r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB2_2: # %false +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: no_extswsli: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: andi. r5, r5, 1 +; CHECK-BE-NEXT: extsw r4, r4 +; CHECK-BE-NEXT: bc 4, gt, .LBB2_2 +; CHECK-BE-NEXT: # %bb.1: # %true +; CHECK-BE-NEXT: sldi r4, r4, 3 +; CHECK-BE-NEXT: ldx r3, r3, r4 +; CHECK-BE-NEXT: blr +; CHECK-BE-NEXT: .LBB2_2: # %false +; CHECK-BE-NEXT: mr r3, r4 +; CHECK-BE-NEXT: blr +; +; CHECK-P9-LABEL: no_extswsli: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: extsw r4, r4 +; CHECK-P9-NEXT: andi. r5, r5, 1 +; CHECK-P9-NEXT: bc 4, gt, .LBB2_2 +; CHECK-P9-NEXT: # %bb.1: # %true +; CHECK-P9-NEXT: sldi r4, r4, 3 +; CHECK-P9-NEXT: ldx r3, r3, r4 +; CHECK-P9-NEXT: blr +; CHECK-P9-NEXT: .LBB2_2: # %false +; CHECK-P9-NEXT: mr r3, r4 +; CHECK-P9-NEXT: blr +; +; CHECK-P9-BE-LABEL: no_extswsli: +; CHECK-P9-BE: # %bb.0: # %entry +; CHECK-P9-BE-NEXT: extsw r4, r4 +; CHECK-P9-BE-NEXT: andi. r5, r5, 1 +; CHECK-P9-BE-NEXT: bc 4, gt, .LBB2_2 +; CHECK-P9-BE-NEXT: # %bb.1: # %true +; CHECK-P9-BE-NEXT: sldi r4, r4, 3 +; CHECK-P9-BE-NEXT: ldx r3, r3, r4 +; CHECK-P9-BE-NEXT: blr +; CHECK-P9-BE-NEXT: .LBB2_2: # %false +; CHECK-P9-BE-NEXT: mr r3, r4 +; CHECK-P9-BE-NEXT: blr +entry: + %iconv = sext i32 %index to i64 + br i1 %flag, label %true, label %false + +true: + %ptr = getelementptr inbounds i64, i64* %base, i64 %iconv + %value = load i64, i64* %ptr, align 8 + ret i64 %value + +false: + ret i64 %iconv +}