diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -971,18 +971,19 @@ MachineInstr **AndInstr, const TargetRegisterInfo *TRI, bool &NoSignFlag, bool &ClearsOverflowFlag) { - if (CmpValDefInstr.getOpcode() != X86::SUBREG_TO_REG) + if (!(CmpValDefInstr.getOpcode() == X86::SUBREG_TO_REG && + CmpInstr.getOpcode() == X86::TEST64rr) && + !(CmpValDefInstr.getOpcode() == X86::COPY && + CmpInstr.getOpcode() == X86::TEST16rr)) return false; - if (CmpInstr.getOpcode() != X86::TEST64rr) - return false; - - // CmpInstr is a TEST64rr instruction, and `X86InstrInfo::analyzeCompare` - // guarantees that it's analyzable only if two registers are identical. - assert( - (CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) && - "CmpInstr is an analyzable TEST64rr, and `X86InstrInfo::analyzeCompare` " - "requires two reg operands are the same."); + // CmpInstr is a TEST16rr/TEST64rr instruction, and + // `X86InstrInfo::analyzeCompare` guarantees that it's analyzable only if two + // registers are identical. + assert((CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) && + "CmpInstr is an analyzable TEST16rr/TEST64rr, and " + "`X86InstrInfo::analyzeCompare` requires two reg operands are the" + "same."); // Caller (`X86InstrInfo::optimizeCompareInstr`) guarantees that // `CmpValDefInstr` defines the value that's used by `CmpInstr`; in this case @@ -990,20 +991,37 @@ // redundant. assert( (MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &CmpValDefInstr) && - "Caller guarantees that TEST64rr is a user of SUBREG_TO_REG."); + "Caller guarantees that TEST64rr is a user of SUBREG_TO_REG or TEST16rr " + "is a user of COPY sub16bit."); + MachineInstr *VregDefInstr = nullptr; + if (CmpValDefInstr.getOpcode() == X86::COPY && + CmpInstr.getOpcode() == X86::TEST16rr) { + VregDefInstr = MRI->getVRegDef(CmpValDefInstr.getOperand(1).getReg()); + if (!VregDefInstr) + return false; + // We can only remove test when AND32ri or AND64ri32 whose imm can fit 16bit + // size, others 32/64 bit ops would test higher bits which test16rr don't + // want to. + if (!((VregDefInstr->getOpcode() == X86::AND32ri || + VregDefInstr->getOpcode() == X86::AND64ri32) && + isInt<16>(VregDefInstr->getOperand(2).getImm()))) + return false; + } - // As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is typically - // 0. - if (CmpValDefInstr.getOperand(1).getImm() != 0) - return false; + if (CmpValDefInstr.getOpcode() == X86::SUBREG_TO_REG && + CmpInstr.getOpcode() == X86::TEST64rr) { + // As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is + // typically 0. + if (CmpValDefInstr.getOperand(1).getImm() != 0) + return false; - // As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically - // sub_32bit or sub_xmm. - if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit) - return false; + // As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically + // sub_32bit or sub_xmm. + if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit) + return false; - MachineInstr *VregDefInstr = - MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg()); + VregDefInstr = MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg()); + } assert(VregDefInstr && "Must have a definition (SSA)"); @@ -1021,6 +1039,11 @@ // ... // EFLAGS not changed // %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_32bit // test64rr %extended_reg, %extended_reg, implicit-def $eflags + // or + // %reg = and32* ... + // ... // EFLAGS not changed. + // %src_reg = copy %reg.sub_16bit:gr32 + // test16rr %src_reg, %src_reg, implicit-def $eflags // // If subsequent readers use a subset of bits that don't change // after `and*` instructions, it's likely that the test64rr could @@ -4421,10 +4444,15 @@ break; } - // Look back for the following pattern, in which case the test64rr - // instruction could be erased. + // Look back for the following pattern, in which case the + // test16rr/test64rr instruction could be erased. // - // Example: + // Example for test16rr: + // %reg = and32ri %in_reg, 5 + // ... // EFLAGS not changed. + // %src_reg = copy %reg.sub_16bit:gr32 + // test16rr %src_reg, %src_reg, implicit-def $eflags + // Example for test64rr: // %reg = and32ri %in_reg, 5 // ... // EFLAGS not changed. // %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index diff --git a/llvm/test/CodeGen/X86/peephole-test-after-add.mir b/llvm/test/CodeGen/X86/peephole-test-after-add.mir --- a/llvm/test/CodeGen/X86/peephole-test-after-add.mir +++ b/llvm/test/CodeGen/X86/peephole-test-after-add.mir @@ -38,6 +38,38 @@ ret void } + define i16 @erase_test16(i16 %0, i16 %1, ptr nocapture %2) { + entry: + %3 = icmp ne i16 %0, 0 + %4 = and i16 %1, 123 + %5 = icmp eq i16 %4, 0 + %6 = select i1 %3, i1 %5, i1 false + br i1 %6, label %if.then, label %if.end + + if.then: ; preds = %entry + store i16 %0, ptr %2, align 4 + br label %if.end + + if.end: ; preds = %if.then, %entry + ret i16 0 + } + + define i16 @erase_test16_bigimm(i16 %0, i32 %1, ptr nocapture %2) { + entry: + %3 = icmp ne i16 %0, 0 + %4 = and i32 %1, 123456 + %truc = trunc i32 %4 to i16 + %5 = icmp eq i16 %truc, 0 + %6 = select i1 %3, i1 %5, i1 false + br i1 %6, label %if.then, label %if.end + + if.then: ; preds = %entry + store i16 %0, ptr %2, align 4 + br label %if.end + + if.end: ; preds = %if.then, %entry + ret i16 0 + } ... --- name: test_erased @@ -194,3 +226,254 @@ RET 0 ... +--- +name: erase_test16 +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: true +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: gr32, preferred-register: '' } + - { id: 1, class: gr32, preferred-register: '' } + - { id: 2, class: gr64, preferred-register: '' } + - { id: 3, class: gr16, preferred-register: '' } + - { id: 4, class: gr16, preferred-register: '' } + - { id: 5, class: gr32, preferred-register: '' } + - { id: 6, class: gr32, preferred-register: '' } + - { id: 7, class: gr16, preferred-register: '' } +liveins: + - { reg: '$edi', virtual-reg: '%0' } + - { reg: '$esi', virtual-reg: '%1' } + - { reg: '$rdx', virtual-reg: '%2' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: erase_test16 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x60000000), %bb.3(0x20000000) + ; CHECK-NEXT: liveins: $edi, $esi, $rdx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit + ; CHECK-NEXT: TEST16rr [[COPY3]], [[COPY3]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.3, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.entry: + ; CHECK-NEXT: successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 123, implicit-def $eflags + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY [[AND32ri]].sub_16bit + ; CHECK-NEXT: JCC_1 %bb.3, 5, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.then: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: MOV16mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s16) into %ir.2, align 4) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.if.end: + ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr16 = COPY [[MOV32r0_]].sub_16bit + ; CHECK-NEXT: $ax = COPY [[COPY5]] + ; CHECK-NEXT: RET 0, $ax + bb.0.entry: + successors: %bb.3(0x60000000), %bb.2(0x20000000) + liveins: $edi, $esi, $rdx + + %2:gr64 = COPY $rdx + %1:gr32 = COPY $esi + %0:gr32 = COPY $edi + %3:gr16 = COPY %0.sub_16bit + TEST16rr %3, %3, implicit-def $eflags + JCC_1 %bb.2, 4, implicit $eflags + JMP_1 %bb.3 + + bb.3.entry: + successors: %bb.1(0x55555555), %bb.2(0x2aaaaaab) + + %5:gr32 = AND32ri %1, 123, implicit-def dead $eflags + %4:gr16 = COPY %5.sub_16bit + TEST16rr %4, %4, implicit-def $eflags + JCC_1 %bb.2, 5, implicit $eflags + JMP_1 %bb.1 + + bb.1.if.then: + successors: %bb.2(0x80000000) + + MOV16mr %2, 1, $noreg, 0, $noreg, %3 :: (store (s16) into %ir.2, align 4) + + bb.2.if.end: + %6:gr32 = MOV32r0 implicit-def dead $eflags + %7:gr16 = COPY %6.sub_16bit + $ax = COPY %7 + RET 0, $ax + +... +--- +name: erase_test16_bigimm +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: true +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: gr32, preferred-register: '' } + - { id: 1, class: gr32, preferred-register: '' } + - { id: 2, class: gr64, preferred-register: '' } + - { id: 3, class: gr16, preferred-register: '' } + - { id: 4, class: gr16, preferred-register: '' } + - { id: 5, class: gr32, preferred-register: '' } + - { id: 6, class: gr32, preferred-register: '' } + - { id: 7, class: gr16, preferred-register: '' } +liveins: + - { reg: '$edi', virtual-reg: '%0' } + - { reg: '$esi', virtual-reg: '%1' } + - { reg: '$rdx', virtual-reg: '%2' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: erase_test16_bigimm + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x60000000), %bb.3(0x20000000) + ; CHECK-NEXT: liveins: $edi, $esi, $rdx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rdx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edi + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY [[COPY2]].sub_16bit + ; CHECK-NEXT: TEST16rr [[COPY3]], [[COPY3]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.3, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.entry: + ; CHECK-NEXT: successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[COPY1]], 57920, implicit-def dead $eflags + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY [[AND32ri]].sub_16bit + ; CHECK-NEXT: TEST16rr [[COPY4]], [[COPY4]], implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.3, 5, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.then: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: MOV16mr [[COPY]], 1, $noreg, 0, $noreg, [[COPY3]] :: (store (s16) into %ir.2, align 4) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.if.end: + ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr16 = COPY [[MOV32r0_]].sub_16bit + ; CHECK-NEXT: $ax = COPY [[COPY5]] + ; CHECK-NEXT: RET 0, $ax + bb.0.entry: + successors: %bb.3(0x60000000), %bb.2(0x20000000) + liveins: $edi, $esi, $rdx + + %2:gr64 = COPY $rdx + %1:gr32 = COPY $esi + %0:gr32 = COPY $edi + %3:gr16 = COPY %0.sub_16bit + TEST16rr %3, %3, implicit-def $eflags + JCC_1 %bb.2, 4, implicit $eflags + JMP_1 %bb.3 + + bb.3.entry: + successors: %bb.1(0x55555555), %bb.2(0x2aaaaaab) + + %5:gr32 = AND32ri %1, 57920, implicit-def dead $eflags + %4:gr16 = COPY %5.sub_16bit + TEST16rr %4, %4, implicit-def $eflags + JCC_1 %bb.2, 5, implicit $eflags + JMP_1 %bb.1 + + bb.1.if.then: + successors: %bb.2(0x80000000) + + MOV16mr %2, 1, $noreg, 0, $noreg, %3 :: (store (s16) into %ir.2, align 4) + + bb.2.if.end: + %6:gr32 = MOV32r0 implicit-def dead $eflags + %7:gr16 = COPY %6.sub_16bit + $ax = COPY %7 + RET 0, $ax + +...