diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -25,13 +25,16 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -964,6 +967,118 @@ return ShAmt < 4 && ShAmt > 0; } +// Returns true if SF is known to be zero after AndInstr executes. +inline static bool isSFZero(const MachineInstr &AndInstr) { + assert(X86::isAND(AndInstr.getOpcode()) && "Caller guaranteed"); + + int64_t Mask = 0, Imm = 0; + const MachineOperand &MO2 = AndInstr.getOperand(2); + // Only AND32* are handled in switch statements; + // that is, {AND8*, AND16*, AND64*} are not handled. + // + // The rationale is that caller knows the AndInstr generates a 32-bit + // integer that iz zero-extended into a 64-bit integer. + switch (AndInstr.getOpcode()) { + default: + return false; + case X86::AND32ri8: + case X86::AND32mi8: + Mask = 0x80; + assert(MO2.isImm() && "AndInstr is And32ri8 or And32mi8"); + Imm = MO2.getImm(); + break; + case X86::AND32ri: + case X86::AND32i32: // AND32i32 is the new op code for AND32ri + case X86::AND32mi: + Mask = 0x80000000; + assert(MO2.isImm() && "AndInstr is And32ri or And32mi or And32i32"); + Imm = MO2.getImm(); + break; + } + return (Mask & Imm) == 0; +} + +inline static bool isRedundantFlagInstr(MachineInstr &CmpInstr, + MachineInstr &CmpValDefInstr, + const MachineRegisterInfo *MRI, + MachineInstr **AndInstr, + const TargetRegisterInfo *TRI) { + // FIXME: Handle the case when CmpValDefInstr.getOpcode() == + // X86::INSERT_SUBREG + if (CmpValDefInstr.getOpcode() != X86::SUBREG_TO_REG) + return false; + + if (CmpInstr.getOpcode() != X86::TEST64rr) + return false; + + // CmpInstr is a TEST64rr instruction, and `X86InstrInfo::analyzeCompare` + // guarantees that it's analyzable only if two registers are identical. + assert( + (CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) && + "CmpInstr is an analyzable TEST64rr, and `X86InstrInfo::analyzeCompare` " + "requires two reg operands are the same."); + + assert( + (MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &CmpValDefInstr) && + "Caller guaranteed."); + + assert((CmpValDefInstr.getNumOperands() == 4) && + "Guaranteed by SUBREG_TO_REG definition."); + + const MachineOperand &MO1 = CmpValDefInstr.getOperand(1); + + assert((MO1.isImm()) && "MO1 expected to be immediate number"); + // As seen in X86 td files, MO1.getImm() is typically 0. + if (MO1.getImm() != 0) + return false; + + const MachineOperand &MO3 = CmpValDefInstr.getOperand(3); + assert(MO3.isImm() && "MO3 is an immediate representing subregister indices"); + // As seen in X86 td files, MO3 is typically sub_32bit or sub_xmm. + if (MO3.getImm() != X86::sub_32bit) + return false; + + const MachineOperand &MO2 = CmpValDefInstr.getOperand(2); + assert((MO2.isReg()) && + "MO2 should be register by definition of SUBREG_TO_REG"); + MachineInstr *VregDefInstr = MRI->getVRegDef(MO2.getReg()); + + assert(VregDefInstr && "Must have a definition (SSA)"); + + // Requires `CmpValDefInstr` and `VregDefInstr` are from the same MBB + // to simplify the subsequent analysis. + // + // FIXME: If `VregDefInstr->getParent()` is the only predecessor of + // `CmpValDefInstr.getParent()`, this could be handled. + if (VregDefInstr->getParent() != CmpValDefInstr.getParent()) { + return false; + } + + if (X86::isAND(VregDefInstr->getOpcode()) && isSFZero(*VregDefInstr)) { + // Get a sequence of instructions like + // %reg = And32* %reg1 %reg2 + // ... // EFLAGS not + // changed %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_32bit // + // EFLAGS not changed TEST64rr %extended_reg, %extended_reg, implicit-def + // $eflags + // + // The TEST64rr could be erased since And32* is known to set SF to zero. + for (MachineInstr &Instr : + make_range(std::next(MachineBasicBlock::iterator(VregDefInstr)), + MachineBasicBlock::iterator(CmpValDefInstr))) { + if (Instr.modifiesRegister(X86::EFLAGS, TRI)) { + // There are instructions between 'VregDefInstr' and + // 'CmpValDefInstr' that modifies EFLAGS. + return false; + } + } + + *AndInstr = VregDefInstr; + return true; + } + return false; +} + bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src, unsigned Opc, bool AllowSP, Register &NewSrc, bool &isKill, MachineOperand &ImplicitOp, @@ -4197,6 +4312,27 @@ MI = &Inst; break; } + + // Look back for an instruction that sets EFLAGS to the same value as + // CmpInstr. + // + // Example: + // %reg = and32ri %in_reg, 5 + // ... // EFLAGS not changed. + // %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index + // test64rr %src_reg, %src_reg, implicit-def $eflags + // + // The test64rr instruction could be erased. + MachineInstr *AndInstr = nullptr; + if (IsCmpZero && + isRedundantFlagInstr(CmpInstr, Inst, MRI, &AndInstr, TRI)) { + assert(AndInstr != nullptr && X86::isAND(AndInstr->getOpcode())); + // AND instruction will update SF and clear OF. + NoSignFlag = false; + ClearsOverflowFlag = true; + MI = AndInstr; + break; + } // Cannot find other candidates before definition of SrcReg. return false; } diff --git a/llvm/test/CodeGen/X86/peephole-test-after-add.mir b/llvm/test/CodeGen/X86/peephole-test-after-add.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/peephole-test-after-add.mir @@ -0,0 +1,140 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -mtriple=x86_64-unknown-linux-gnu --run-pass=peephole-opt | FileCheck %s + +# Test that TEST64rr is erased in `test_erased`, and kept in `test_not_erased_when_eflags_change`. +--- | + ; ModuleID = 'tmp.ll' + source_filename = "tmp.ll" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + + define i64 @test_erased(ptr %0, i64 %1, i64 %2) { + %4 = load i64, ptr %0, align 8 + %5 = and i64 %4, 3 + %6 = icmp eq i64 %5, 0 + %7 = select i1 %6, i64 %1, i64 %5 + store i64 %7, ptr %0, align 8 + ret i64 %5 + } + + define void @test_not_erased_when_eflags_change(ptr %0, i64 %1, i64 %2, i64 %3, ptr %4) { + %6 = load i64, ptr %0, align 8 + %7 = and i64 %6, 3 + %8 = xor i64 %3, 5 + %9 = icmp eq i64 %7, 0 + %10 = select i1 %9, i64 %1, i64 %7 + store i64 %10, ptr %0, align 8 + store i64 %8, ptr %4, align 8 + ret void + } + +... +--- +name: test_erased +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64 } + - { id: 1, class: gr64 } + - { id: 2, class: gr64 } + - { id: 3, class: gr64 } + - { id: 4, class: gr32 } + - { id: 5, class: gr32 } + - { id: 6, class: gr64 } + - { id: 7, class: gr64 } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } + - { reg: '$rsi', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.3): + liveins: $rdi, $rsi + + ; CHECK-LABEL: name: test_erased + ; CHECK: liveins: $rdi, $rsi + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.0) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY [[MOV64rm]].sub_32bit + ; CHECK-NEXT: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[COPY2]], 3, implicit-def $eflags + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[AND32ri8_]], %subreg.sub_32bit + ; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[SUBREG_TO_REG]], [[COPY]], 4, implicit $eflags + ; CHECK-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[CMOV64rr]] :: (store (s64) into %ir.0) + ; CHECK-NEXT: $rax = COPY [[SUBREG_TO_REG]] + ; CHECK-NEXT: RET 0, $rax + %1:gr64 = COPY $rsi + %0:gr64 = COPY $rdi + %3:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load (s64) from %ir.0) + %4:gr32 = COPY %3.sub_32bit + %5:gr32 = AND32ri8 %4, 3, implicit-def dead $eflags + %6:gr64 = SUBREG_TO_REG 0, killed %5, %subreg.sub_32bit + TEST64rr %6, %6, implicit-def $eflags + %7:gr64 = CMOV64rr %6, %1, 4, implicit $eflags + MOV64mr %0, 1, $noreg, 0, $noreg, killed %7 :: (store (s64) into %ir.0) + $rax = COPY %6 + RET 0, $rax + +... +--- +name: test_not_erased_when_eflags_change +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64 } + - { id: 1, class: gr64 } + - { id: 2, class: gr64 } + - { id: 3, class: gr64 } + - { id: 4, class: gr64 } + - { id: 5, class: gr64 } + - { id: 6, class: gr32 } + - { id: 7, class: gr32 } + - { id: 8, class: gr64 } + - { id: 9, class: gr64 } + - { id: 10, class: gr64 } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } + - { reg: '$rsi', virtual-reg: '%1' } + - { reg: '$rcx', virtual-reg: '%3' } + - { reg: '$r8', virtual-reg: '%4' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.5): + liveins: $rdi, $rsi, $rcx, $r8 + + ; CHECK-LABEL: name: test_not_erased_when_eflags_change + ; CHECK: liveins: $rdi, $rsi, $rcx, $r8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $r8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rcx + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY3]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.0) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY [[MOV64rm]].sub_32bit + ; CHECK-NEXT: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[COPY4]], 3, implicit-def dead $eflags + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[AND32ri8_]], %subreg.sub_32bit + ; CHECK-NEXT: [[XOR64ri8_:%[0-9]+]]:gr64 = XOR64ri8 [[COPY1]], 5, implicit-def dead $eflags + ; CHECK-NEXT: TEST64rr [[SUBREG_TO_REG]], [[SUBREG_TO_REG]], implicit-def $eflags + ; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[SUBREG_TO_REG]], [[COPY2]], 4, implicit $eflags + ; CHECK-NEXT: MOV64mr [[COPY3]], 1, $noreg, 0, $noreg, killed [[CMOV64rr]] :: (store (s64) into %ir.0) + ; CHECK-NEXT: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, killed [[XOR64ri8_]] :: (store (s64) into %ir.4) + ; CHECK-NEXT: RET 0 + %4:gr64 = COPY $r8 + %3:gr64 = COPY $rcx + %1:gr64 = COPY $rsi + %0:gr64 = COPY $rdi + %5:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load (s64) from %ir.0) + %6:gr32 = COPY %5.sub_32bit + %7:gr32 = AND32ri8 %6, 3, implicit-def dead $eflags + %8:gr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32bit + %9:gr64 = XOR64ri8 %3, 5, implicit-def dead $eflags + TEST64rr %8, %8, implicit-def $eflags + %10:gr64 = CMOV64rr %8, %1, 4, implicit $eflags + MOV64mr %0, 1, $noreg, 0, $noreg, killed %10 :: (store (s64) into %ir.0) + MOV64mr %4, 1, $noreg, 0, $noreg, killed %9 :: (store (s64) into %ir.4) + RET 0 + +...