Index: llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h +++ llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h @@ -561,9 +561,14 @@ } /// hasOneNonDBGUse - Return true if there is exactly one non-Debug - /// instruction using the specified register. + /// use of the specified register. bool hasOneNonDBGUse(unsigned RegNo) const; + /// hasOneNonDBGUse - Return true if there is exactly one non-Debug + /// instruction using the specified register. Said instruction may have + /// multiple uses. + bool hasOneNonDBGUser(unsigned RegNo) const; + /// replaceRegWith - Replace all instances of FromReg with ToReg in the /// machine function. This is like llvm-level X->replaceAllUsesWith(Y), /// except that it also changes any definitions of the register as well. Index: llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp +++ llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp @@ -423,6 +423,13 @@ return ++UI == use_nodbg_end(); } +bool MachineRegisterInfo::hasOneNonDBGUser(unsigned RegNo) const { + use_instr_nodbg_iterator UI = use_instr_nodbg_begin(RegNo); + if (UI == use_instr_nodbg_end()) + return false; + return ++UI == use_instr_nodbg_end(); +} + /// clearKillFlags - Iterate over all the uses of the given register and /// clear the kill flag from the MachineOperand. This function is used by /// optimization passes which extend register lifetimes and need only Index: llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp =================================================================== --- llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp +++ llvm/trunk/lib/CodeGen/PeepholeOptimizer.cpp @@ -1306,7 +1306,7 @@ /// Check whether MI is a candidate for folding into a later instruction. /// We only fold loads to virtual registers and the virtual register defined -/// has a single use. +/// has a single user. bool PeepholeOptimizer::isLoadFoldable( MachineInstr &MI, SmallSet &FoldAsLoadDefCandidates) { if (!MI.canFoldAsLoad() || !MI.mayLoad()) @@ -1316,12 +1316,12 @@ return false; unsigned Reg = MI.getOperand(0).getReg(); - // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting + // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting // loads. It should be checked when processing uses of the load, since // uses can be removed during peephole. if (!MI.getOperand(0).getSubReg() && TargetRegisterInfo::isVirtualRegister(Reg) && - MRI->hasOneNonDBGUse(Reg)) { + MRI->hasOneNonDBGUser(Reg)) { FoldAsLoadDefCandidates.insert(Reg); return true; } Index: llvm/trunk/test/CodeGen/X86/addr-mode-matcher-2.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/addr-mode-matcher-2.ll +++ llvm/trunk/test/CodeGen/X86/addr-mode-matcher-2.ll @@ -24,8 +24,7 @@ define void @foo(i1 zeroext, i32) nounwind { ; X86-LABEL: foo: ; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: testb %al, %al +; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) ; X86-NEXT: je .LBB0_1 ; X86-NEXT: # %bb.3: ; X86-NEXT: retl Index: llvm/trunk/test/CodeGen/X86/or-branch.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/or-branch.ll +++ llvm/trunk/test/CodeGen/X86/or-branch.ll @@ -8,8 +8,7 @@ ; JUMP2-NEXT: cmpl $5, {{[0-9]+}}(%esp) ; JUMP2-NEXT: jl .LBB0_3 ; JUMP2-NEXT: # %bb.1: # %entry -; JUMP2-NEXT: movl {{[0-9]+}}(%esp), %eax -; JUMP2-NEXT: testl %eax, %eax +; JUMP2-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; JUMP2-NEXT: je .LBB0_3 ; JUMP2-NEXT: # %bb.2: # %UnifiedReturnBlock ; JUMP2-NEXT: retl Index: llvm/trunk/test/CodeGen/X86/peephole-fold-testrr.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/peephole-fold-testrr.mir +++ llvm/trunk/test/CodeGen/X86/peephole-fold-testrr.mir @@ -0,0 +1,88 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=peephole-opt -mtriple=x86_64-- %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + define i32 @atomic(i8** %arg) { + %load = load atomic i8*, i8** %arg unordered, align 8 + %cmp = icmp eq i8* %load, null + %zext = zext i1 %cmp to i32 + ret i32 %zext + } + + define i32 @nonatomic_unoptimized(i8** %arg) { + %load = load i8*, i8** %arg, align 8 + %cmp = icmp eq i8* %load, null + %zext = zext i1 %cmp to i32 + ret i32 %zext + } + +... +--- +name: atomic +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64 } + - { id: 1, class: gr64 } + - { id: 2, class: gr8 } + - { id: 3, class: gr32 } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $rdi + + ; CHECK-LABEL: name: atomic + ; CHECK: liveins: $rdi + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK: CMP64mi8 [[COPY]], 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load unordered 8 from %ir.arg) + ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags + ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]] + ; CHECK: $eax = COPY [[MOVZX32rr8_]] + ; CHECK: RET 0, $eax + %0:gr64 = COPY $rdi + %1:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load unordered 8 from %ir.arg) + TEST64rr %1, %1, implicit-def $eflags + %2:gr8 = SETCCr 4, implicit $eflags + %3:gr32 = MOVZX32rr8 killed %2 + $eax = COPY %3 + RET 0, $eax + +... +--- +name: nonatomic_unoptimized +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gr64 } + - { id: 1, class: gr64 } + - { id: 2, class: gr8 } + - { id: 3, class: gr32 } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $rdi + + ; CHECK-LABEL: name: nonatomic_unoptimized + ; CHECK: liveins: $rdi + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK: CMP64mi8 [[COPY]], 1, $noreg, 0, $noreg, 0, implicit-def $eflags :: (load 8 from %ir.arg) + ; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags + ; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]] + ; CHECK: $eax = COPY [[MOVZX32rr8_]] + ; CHECK: RET 0, $eax + %0:gr64 = COPY $rdi + %1:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load 8 from %ir.arg) + TEST64rr %1, %1, implicit-def $eflags + %2:gr8 = SETCCr 4, implicit $eflags + %3:gr32 = MOVZX32rr8 killed %2 + $eax = COPY %3 + RET 0, $eax + +... Index: llvm/trunk/test/CodeGen/X86/sibcall.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sibcall.ll +++ llvm/trunk/test/CodeGen/X86/sibcall.ll @@ -264,8 +264,7 @@ define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp { ; X86-LABEL: t11: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: testl %eax, %eax +; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; X86-NEXT: je .LBB11_1 ; X86-NEXT: # %bb.2: # %bb ; X86-NEXT: jmp foo5 # TAILCALL @@ -311,8 +310,7 @@ define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp { ; X86-LABEL: t12: ; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: testl %eax, %eax +; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; X86-NEXT: je .LBB12_1 ; X86-NEXT: # %bb.2: # %bb ; X86-NEXT: jmp foo6 # TAILCALL Index: llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll +++ llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1386,8 +1386,7 @@ ; ENABLE-NEXT: jmp LBB16_1 ; ENABLE-NEXT: LBB16_2: ## %split ; ENABLE-NEXT: movq _irreducibleCFGb@{{.*}}(%rip), %rax -; ENABLE-NEXT: movl (%rax), %eax -; ENABLE-NEXT: testl %eax, %eax +; ENABLE-NEXT: cmpl $0, (%rax) ; ENABLE-NEXT: je LBB16_3 ; ENABLE-NEXT: ## %bb.4: ## %for.body4.i ; ENABLE-NEXT: movq _irreducibleCFGa@{{.*}}(%rip), %rax @@ -1430,8 +1429,7 @@ ; DISABLE-NEXT: jmp LBB16_1 ; DISABLE-NEXT: LBB16_2: ## %split ; DISABLE-NEXT: movq _irreducibleCFGb@{{.*}}(%rip), %rax -; DISABLE-NEXT: movl (%rax), %eax -; DISABLE-NEXT: testl %eax, %eax +; DISABLE-NEXT: cmpl $0, (%rax) ; DISABLE-NEXT: je LBB16_3 ; DISABLE-NEXT: ## %bb.4: ## %for.body4.i ; DISABLE-NEXT: movq _irreducibleCFGa@{{.*}}(%rip), %rax