Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -8007,6 +8007,16 @@ case X86::SQRTSDm: case X86::SQRTSDr_Int: case X86::SQRTSDm_Int: + // GPR + case X86::POPCNT16rm: + case X86::POPCNT32rm: + case X86::POPCNT64rm: + case X86::LZCNT16rm: + case X86::LZCNT32rm: + case X86::LZCNT64rm: + case X86::TZCNT16rm: + case X86::TZCNT32rm: + case X86::TZCNT64rm: return true; } @@ -8220,6 +8230,21 @@ .addReg(XReg, RegState::Undef) .addReg(Reg, RegState::ImplicitDefine); MI.addRegisterKilled(Reg, TRI, true); + } else if (X86::GR64RegClass.contains(Reg)) { + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR64rr), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MI.addRegisterKilled(Reg, TRI, true); + } else if (X86::GR32RegClass.contains(Reg)) { + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR32rr), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MI.addRegisterKilled(Reg, TRI, true); + } else if (X86::GR16RegClass.contains(Reg)) { + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(X86::XOR16rr), Reg) + .addReg(Reg, RegState::Undef) + .addReg(Reg, RegState::Undef); + MI.addRegisterKilled(Reg, TRI, true); } } Index: test/CodeGen/X86/bug3389.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/bug3389.ll @@ -0,0 +1,68 @@ +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7 -mattr=+bmi,+lzcnt | FileCheck %s + +declare i32 @llvm.ctpop.i32(i32) +declare i32 @llvm.cttz.i32(i32, i1) +declare i32 @llvm.ctlz.i32(i32, i1) + +define i32 @loopdep_popcnt(i32* nocapture %x, double* nocapture %y) nounwind { +entry: + %vx = load i32, i32* %x + br label %loop +loop: + %i = phi i32 [ 1, %entry ], [ %inc, %loop ] + %s1 = phi i32 [ %vx, %entry ], [ %s2, %loop ] + %j = tail call i32 @llvm.ctpop.i32(i32 %i) + tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() + %s2 = add i32 %s1, %j + %inc = add nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, 156250000 + br i1 %exitcond, label %ret, label %loop +ret: + ret i32 %s2 + +;CHECK-LABEL:@loopdep_popcnt +;CHECK: xorl [[GPR0:%e[a-d]x]], [[GPR0]] +;CHECk-NEXT: popcntl {{.*}}, [[GPR0]] +} + +define i32 @loopdep_tzct(i32* nocapture %x, double* nocapture %y) nounwind { +entry: + %vx = load i32, i32* %x + br label %loop +loop: + %i = phi i32 [ 1, %entry ], [ %inc, %loop ] + %s1 = phi i32 [ %vx, %entry ], [ %s2, %loop ] + %j = call i32 @llvm.cttz.i32( i32 %i, i1 true ) + tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() + %s2 = add i32 %s1, %j + %inc = add nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, 156250000 + br i1 %exitcond, label %ret, label %loop +ret: + ret i32 %s2 + +;CHECK-LABEL:@loopdep_tzct +;CHECK: xorl [[GPR0:%e[a-d]x]], [[GPR0]] +;CHECk-NEXT: tzcntl {{.*}}, [[GPR0]] +} + +define i32 @loopdep_lzct(i32* nocapture %x, double* nocapture %y) nounwind { +entry: + %vx = load i32, i32* %x + br label %loop +loop: + %i = phi i32 [ 1, %entry ], [ %inc, %loop ] + %s1 = phi i32 [ %vx, %entry ], [ %s2, %loop ] + %j = call i32 @llvm.ctlz.i32( i32 %i, i1 true ) + tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() + %s2 = add i32 %s1, %j + %inc = add nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, 156250000 + br i1 %exitcond, label %ret, label %loop +ret: + ret i32 %s2 + +;CHECK-LABEL:@loopdep_lzct +;CHECK: xorl [[GPR0:%e[a-d]x]], [[GPR0]] +;CHECk-NEXT: lzcntl {{.*}}, [[GPR0]] +} \ No newline at end of file Index: test/CodeGen/X86/clz.ll =================================================================== --- test/CodeGen/X86/clz.ll +++ test/CodeGen/X86/clz.ll @@ -121,6 +121,7 @@ ; X32-CLZ-NEXT: testl %eax, %eax ; X32-CLZ-NEXT: jne .LBB3_1 ; X32-CLZ-NEXT: # BB#2: +; X32-CLZ-NEXT: xorl %eax, %eax ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: addl $32, %eax ; X32-CLZ-NEXT: xorl %edx, %edx @@ -258,6 +259,7 @@ ; X32-CLZ-NEXT: testl %eax, %eax ; X32-CLZ-NEXT: jne .LBB7_1 ; X32-CLZ-NEXT: # BB#2: +; X32-CLZ-NEXT: xorl %eax, %eax ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: addl $32, %eax ; X32-CLZ-NEXT: xorl %edx, %edx @@ -453,6 +455,7 @@ ; X32-CLZ-NEXT: testl %eax, %eax ; X32-CLZ-NEXT: jne .LBB11_1 ; X32-CLZ-NEXT: # BB#2: +; X32-CLZ-NEXT: xorl %eax, %eax ; X32-CLZ-NEXT: lzcntl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: addl $32, %eax ; X32-CLZ-NEXT: xorl %edx, %edx @@ -635,6 +638,7 @@ ; X32-CLZ-NEXT: testl %eax, %eax ; X32-CLZ-NEXT: jne .LBB15_1 ; X32-CLZ-NEXT: # BB#2: +; X32-CLZ-NEXT: xorl %eax, %eax ; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax ; X32-CLZ-NEXT: addl $32, %eax ; X32-CLZ-NEXT: xorl %edx, %edx