diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -25,13 +25,16 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -964,6 +967,105 @@ return ShAmt < 4 && ShAmt > 0; } +inline static bool isTestRR(MachineInstr &CmpInstr) { + unsigned int opcode = CmpInstr.getOpcode(); + return (opcode == X86::TEST8rr) || (opcode == X86::TEST16rr) || + (opcode == X86::TEST32rr) || (opcode == X86::TEST64rr); +} + +inline static bool isRegDefConvertible(MachineInstr &CmpInstr, + MachineInstr &SUBREG_TO_REG_MI, + const MachineRegisterInfo *MRI, + MachineInstr **DefInstr, + const TargetRegisterInfo *TRI) { + + // FIXME: Generalize for other cmp instructions. + if (!isTestRR(CmpInstr)) + return false; + + assert((SUBREG_TO_REG_MI.getOpcode() == X86::SUBREG_TO_REG) && + "Should be guaranteed by caller."); + assert((CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) && + "This is guaranteed by `X86InstrInfo::analyzeCompare` for `TEST*rr` " + "instructions"); + assert( + (MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &SUBREG_TO_REG_MI) && + "Should be guaranteed by caller."); + + assert(SUBREG_TO_REG_MI.getNumOperands() == 4); + + MachineOperand MO1 = SUBREG_TO_REG_MI.getOperand(1); + + // FIXME: Handle the case that MO1.isCImm() is true if necessary. + assert((MO1.isImm()) && "MO1 expected to be immediate number"); + if (MO1.getImm() != 0) + return false; + + // MO1 is 0, so we get a sequence of instructions like + // %reg = and %reg1 %reg2 + // ... + // %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_index + // testrr %extended_reg, %extended_reg, implicit-def $eflags + + MachineOperand MO2 = SUBREG_TO_REG_MI.getOperand(2); + + assert((MO2.isReg()) && + "MO2 should be register by definition of SUBREG_TO_REG"); + + if (!MO2.isKill()) { + return false; + } + + // SUBREG_TO_REG_MI has the last use of the register value. + MachineInstr *vregDefInstr = MRI->getVRegDef(MO2.getReg()); + + assert(vregDefInstr && "Must have a definition (SSA)"); + + // Requires `SUBREG_TO_REG_MI` and `vregDefInstr` are from the same MBB + // to simplify the subsequent analysis. + // + // FIXME: If `vregDefInstr->getParent()` is the only predecessor of + // `SUBREG_TO_REG_MI.getParent()`, this could be handled. + if (vregDefInstr->getParent() != SUBREG_TO_REG_MI.getParent()) { + return false; + } + + // By now, for instruction + // %res = SUBREG_TO_REG $imm, %ins, %subidx + // + // it's guaranteed that + // 1) $imm is zero + // 2) %ins is defined by `vregDefInstr`. + // 3) `vregDefInstr` is from the same basic block as `SUBREG_TO_REG_MI`. + + // {AND, XOR, OR} set the same flag bits in EFLAGS as TEST does. + if (X86::isAND(vregDefInstr->getOpcode()) || + X86::isOR(vregDefInstr->getOpcode()) || + X86::isXOR(vregDefInstr->getOpcode())) { + for (MachineInstr &Instr : + make_range(std::next(MachineBasicBlock::iterator(vregDefInstr)), + MachineBasicBlock::iterator(SUBREG_TO_REG_MI))) { + if (Instr.modifiesRegister(X86::EFLAGS, TRI)) { + // There are instructions between 'vregDefInstr' and + // 'SUBREG_TO_REG_MI' that modifies EFLAGS. + return false; + } + } + + assert((vregDefInstr->getNumOperands() > 0) && + "vregDefInstr is one of AND/OR/XOR"); + + MachineOperand OutputOperand = vregDefInstr->getOperand(0); + if (OutputOperand.isReg()) { + if (MRI->hasOneUse(OutputOperand.getReg())) { + *DefInstr = vregDefInstr; + return true; + } + } + } + return false; +} + bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, unsigned Opc, bool AllowSP, Register &NewSrc, bool &isKill, MachineOperand &ImplicitOp, @@ -4197,6 +4299,28 @@ MI = &Inst; break; } + + // `Inst` defines `SrcReg`. + // + // Look back for the following patterns + // + // %reg = and ... // An instruction that sets the same + // eflag bits as test. + // ... // EFLAGS not changed. + // %new_reg = subreg_to_reg 0, %reg, %subreg.sub_index + // testrr %new_reg, %new_reg, implicit-def $eflags // could be + // removed. + + MachineInstr *DefInstr = nullptr; + if (IsCmpZero && (Inst.getOpcode() == X86::SUBREG_TO_REG) && + isRegDefConvertible(CmpInstr, Inst, MRI, &DefInstr, TRI)) { + assert(DefInstr != nullptr); + // {AND, OR, XOR} will update SF and won't update OF. + NoSignFlag = false; + ClearsOverflowFlag = false; + MI = DefInstr; + break; + } // Cannot find other candidates before definition of SrcReg. return false; } diff --git a/llvm/test/CodeGen/X86/peephole-test-after-add.mir b/llvm/test/CodeGen/X86/peephole-test-after-add.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/peephole-test-after-add.mir @@ -0,0 +1,140 @@ +# RUN: llc -o - %s -mtriple=x86_64-unknown-linux-gnu --run-pass=peephole-opt | FileCheck %s + +# Check that TEST64rr in ir-block5 is erased since AND32ri8 set the same set bits and no other instructions change EFLAGS in the middle. +--- | + ; ModuleID = 'experimental/users/mingmingl/codegen/example.ll' + source_filename = "experimental/users/mingmingl/codegen/example.ll" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + + define void @func(ptr %0, i64 %1, i64 %2) { + %4 = icmp slt i64 %1, %2 + br i1 %4, label %.preheader, label %24 + + .preheader: ; preds = %3 + br label %5 + + 5: ; preds = %.preheader, %21 + %6 = phi ptr [ %13, %21 ], [ %0, %.preheader ] + %7 = phi i64 [ %22, %21 ], [ %1, %.preheader ] + %8 = getelementptr inbounds i64, ptr %6, i64 -1 + %9 = load i64, ptr %8, align 8 + %10 = and i64 %9, 3 + %11 = icmp eq i64 %10, 0 + %12 = select i1 %11, i64 %9, i64 %10 + %13 = getelementptr inbounds i64, ptr %6, i64 %12 + %14 = icmp slt i64 %7, 0 + br i1 %14, label %21, label %15 + + 15: ; preds = %5 + %16 = icmp eq i64 %10, 0 + %17 = select i1 %16, i64 0, i64 %7 + %18 = getelementptr i64, ptr %13, i64 %17 + %19 = load i64, ptr %18, align 8 + %20 = add i64 %19, %7 + store i64 %20, ptr %18, align 8 + br label %21 + + 21: ; preds = %15, %5 + %22 = phi i64 [ %20, %15 ], [ %7, %5 ] + %23 = icmp slt i64 %22, %2 + br i1 %23, label %5, label %24 + + 24: ; preds = %21, %3 + ret void + } + +... +--- +name: func +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: gr64, preferred-register: '' } + - { id: 1, class: gr64, preferred-register: '' } + - { id: 2, class: gr64, preferred-register: '' } + - { id: 3, class: gr64, preferred-register: '' } + - { id: 4, class: gr64, preferred-register: '' } + - { id: 5, class: gr64, preferred-register: '' } + - { id: 6, class: gr64, preferred-register: '' } + - { id: 7, class: gr64, preferred-register: '' } + - { id: 8, class: gr64, preferred-register: '' } + - { id: 9, class: gr64, preferred-register: '' } + - { id: 10, class: gr64, preferred-register: '' } + - { id: 11, class: gr32, preferred-register: '' } + - { id: 12, class: gr32, preferred-register: '' } + - { id: 13, class: gr64_nosp, preferred-register: '' } + - { id: 14, class: gr64_nosp, preferred-register: '' } + - { id: 15, class: gr64, preferred-register: '' } +liveins: + - { reg: '$rdi', virtual-reg: '%6' } + - { reg: '$rsi', virtual-reg: '%7' } + - { reg: '$rdx', virtual-reg: '%8' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.3): + successors: %bb.1(0x40000000), %bb.5(0x40000000) + liveins: $rdi, $rsi, $rdx + + %8:gr64 = COPY $rdx + %7:gr64 = COPY $rsi + %6:gr64 = COPY $rdi + %9:gr64 = SUB64rr %7, %8, implicit-def $eflags + JCC_1 %bb.5, 13, implicit $eflags + JMP_1 %bb.1 + + bb.1..preheader: + successors: %bb.2(0x80000000) + + + bb.2 (%ir-block.5): + successors: %bb.4(0x30000000), %bb.3(0x50000000) + + %0:gr64 = PHI %6, %bb.1, %3, %bb.4 + %1:gr64 = PHI %7, %bb.1, %5, %bb.4 + %10:gr64 = MOV64rm %0, 1, $noreg, -8, $noreg :: (load (s64) from %ir.8) + %11:gr32 = COPY %10.sub_32bit + + ; CHECK: %12:gr32 = AND32ri8 %11, 3, implicit-def $eflags + ; CHECK-NEXT: %2:gr64 = SUBREG_TO_REG 0, killed %12, %subreg.sub_32bit + ; CHECK-NOT: TEST64rr %2, %2, implicit-def $eflags + ; CHECK-NEXT:%13:gr64_nosp = CMOV64rr %2, %10, 4, implicit $eflags + + %12:gr32 = AND32ri8 %11, 3, implicit-def dead $eflags + %2:gr64 = SUBREG_TO_REG 0, killed %12, %subreg.sub_32bit + TEST64rr %2, %2, implicit-def $eflags + %13:gr64_nosp = CMOV64rr %2, %10, 4, implicit $eflags + %3:gr64 = LEA64r %0, 8, killed %13, 0, $noreg + TEST64rr %1, %1, implicit-def $eflags + JCC_1 %bb.4, 8, implicit $eflags + JMP_1 %bb.3 + + bb.3 (%ir-block.15): + successors: %bb.4(0x80000000) + + TEST64rr %2, %2, implicit-def $eflags + %14:gr64_nosp = CMOV64rr %1, %2, 4, implicit $eflags + %4:gr64 = ADD64rm %1, %3, 8, %14, 0, $noreg, implicit-def dead $eflags :: (load (s64) from %ir.18) + MOV64mr %3, 8, %14, 0, $noreg, %4 :: (store (s64) into %ir.18) + + bb.4 (%ir-block.21): + successors: %bb.2(0x7c000000), %bb.5(0x04000000) + + %5:gr64 = PHI %1, %bb.2, %4, %bb.3 + %15:gr64 = SUB64rr %5, %8, implicit-def $eflags + JCC_1 %bb.2, 12, implicit $eflags + JMP_1 %bb.5 + + bb.5 (%ir-block.24): + RET 0 + +... diff --git a/llvm/test/CodeGen/X86/pr-placeholder.mir b/llvm/test/CodeGen/X86/pr-placeholder.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr-placeholder.mir @@ -0,0 +1,155 @@ +# RUN: llc -o - %s -mtriple=x86_64-unknown-linux-gnu --run-pass=peephole-opt | FileCheck %s + +# This is a companion test of peephole-test-after-add.mir. +# In ir-block5, the following sequence of instructions exist +# +# %res = add ... +# xor ... +# test %res %res +# +# Since EFLAGS are set by xor in the middle, test instruction is retained. + +--- | + ; ModuleID = 'interleave.ll' + source_filename = "interleave.ll" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + + define void @func(ptr %0, i64 %1, i64 %2) { + %4 = icmp slt i64 %1, %2 + br i1 %4, label %.preheader, label %27 + + .preheader: ; preds = %3 + br label %5 + + 5: ; preds = %.preheader, %24 + %6 = phi ptr [ %16, %24 ], [ %0, %.preheader ] + %7 = phi i64 [ %25, %24 ], [ %1, %.preheader ] + %8 = getelementptr inbounds i64, ptr %6, i64 -1 + %9 = load i64, ptr %8, align 8 + %10 = and i64 %9, 3 + %11 = xor i64 %10, 5 + %12 = icmp eq i64 %10, 0 + %13 = getelementptr inbounds i64, ptr %6, i64 %10 + %14 = getelementptr inbounds i64, ptr %6, i64 %9 + %15 = getelementptr inbounds i64, ptr %14, i64 %11 + %16 = select i1 %12, ptr %15, ptr %13 + %17 = icmp slt i64 %7, 0 + br i1 %17, label %24, label %18 + + 18: ; preds = %5 + %19 = icmp eq i64 %10, 0 + %20 = select i1 %19, i64 0, i64 %7 + %21 = getelementptr i64, ptr %16, i64 %20 + %22 = load i64, ptr %21, align 8 + %23 = add i64 %22, %7 + store i64 %23, ptr %21, align 8 + br label %24 + + 24: ; preds = %18, %5 + %25 = phi i64 [ %23, %18 ], [ %7, %5 ] + %26 = icmp slt i64 %25, %2 + br i1 %26, label %5, label %27 + + 27: ; preds = %24, %3 + ret void + } + +... +--- +name: func +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: gr64, preferred-register: '' } + - { id: 1, class: gr64, preferred-register: '' } + - { id: 2, class: gr64_nosp, preferred-register: '' } + - { id: 3, class: gr64, preferred-register: '' } + - { id: 4, class: gr64, preferred-register: '' } + - { id: 5, class: gr64, preferred-register: '' } + - { id: 6, class: gr64, preferred-register: '' } + - { id: 7, class: gr64, preferred-register: '' } + - { id: 8, class: gr64, preferred-register: '' } + - { id: 9, class: gr64, preferred-register: '' } + - { id: 10, class: gr64_nosp, preferred-register: '' } + - { id: 11, class: gr32, preferred-register: '' } + - { id: 12, class: gr32, preferred-register: '' } + - { id: 13, class: gr64_nosp, preferred-register: '' } + - { id: 14, class: gr64, preferred-register: '' } + - { id: 15, class: gr64, preferred-register: '' } + - { id: 16, class: gr64, preferred-register: '' } + - { id: 17, class: gr64_nosp, preferred-register: '' } + - { id: 18, class: gr64, preferred-register: '' } +liveins: + - { reg: '$rdi', virtual-reg: '%6' } + - { reg: '$rsi', virtual-reg: '%7' } + - { reg: '$rdx', virtual-reg: '%8' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.3): + successors: %bb.1, %bb.5 + liveins: $rdi, $rsi, $rdx + + %8:gr64 = COPY $rdx + %7:gr64 = COPY $rsi + %6:gr64 = COPY $rdi + %9:gr64 = SUB64rr %7, %8, implicit-def $eflags + JCC_1 %bb.5, 13, implicit $eflags + JMP_1 %bb.1 + + bb.1..preheader: + + + bb.2 (%ir-block.5): + successors: %bb.4(0x30000000), %bb.3(0x50000000) + + %0:gr64 = PHI %6, %bb.1, %3, %bb.4 + %1:gr64 = PHI %7, %bb.1, %5, %bb.4 + %10:gr64_nosp = MOV64rm %0, 1, $noreg, -8, $noreg :: (load (s64) from %ir.8) + %11:gr32 = COPY %10.sub_32bit + + ;CHECK: %12:gr32 = AND32ri8 %11, 3, implicit-def dead $eflags + ;CHECK: %2:gr64_nosp = SUBREG_TO_REG 0, killed %12, %subreg.sub_32bit + ;CHECK: %13:gr64_nosp = XOR64ri8 %2, 5, implicit-def dead $eflags + ;CHECK: TEST64rr %2, %2, implicit-def $eflags + + %12:gr32 = AND32ri8 %11, 3, implicit-def dead $eflags + %2:gr64_nosp = SUBREG_TO_REG 0, killed %12, %subreg.sub_32bit + %13:gr64_nosp = XOR64ri8 %2, 5, implicit-def dead $eflags + TEST64rr %2, %2, implicit-def $eflags + %14:gr64 = LEA64r %0, 8, %2, 0, $noreg + %15:gr64 = LEA64r %0, 8, %10, 0, $noreg + %16:gr64 = LEA64r killed %15, 8, killed %13, 0, $noreg + %3:gr64 = CMOV64rr %14, killed %16, 4, implicit $eflags + TEST64rr %1, %1, implicit-def $eflags + JCC_1 %bb.4, 8, implicit $eflags + JMP_1 %bb.3 + + bb.3 (%ir-block.18): + + TEST64rr %2, %2, implicit-def $eflags + %17:gr64_nosp = CMOV64rr %1, %2, 4, implicit $eflags + %4:gr64 = ADD64rm %1, %3, 8, %17, 0, $noreg, implicit-def dead $eflags :: (load (s64) from %ir.21) + MOV64mr %3, 8, %17, 0, $noreg, %4 :: (store (s64) into %ir.21) + + bb.4 (%ir-block.24): + successors: %bb.2(0x7c000000), %bb.5(0x04000000) + + %5:gr64 = PHI %1, %bb.2, %4, %bb.3 + %18:gr64 = SUB64rr %5, %8, implicit-def $eflags + JCC_1 %bb.2, 12, implicit $eflags + JMP_1 %bb.5 + + bb.5 (%ir-block.27): + RET 0 + +...