Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -58,6 +58,17 @@ ReMatPICStubLoad("remat-pic-stub-load", cl::desc("Re-materialize load from stub in PIC mode"), cl::init(false), cl::Hidden); +static cl::opt +PartialRegUpdateClearance("partial-reg-update-clearance", + cl::desc("Clearance between two register writes " + "for inserting XOR to avoid partial " + "register update"), + cl::init(64), cl::Hidden); +static cl::opt +UndefRegClearance("undef-reg-clearance", + cl::desc("How many idle instructions we would like before " + "certain undef register reads"), + cl::init(64), cl::Hidden); enum { // Select which memory operand is being unfolded. @@ -5972,10 +5983,10 @@ return 0; } - // If any of the preceding 16 instructions are reading Reg, insert a - // dependency breaking instruction. The magic number is based on a few - // Nehalem experiments. - return 16; + // If any instructions in the clearance range are reading Reg, insert a + // dependency breaking instruction, which is inexpensive and is likely to + // be hidden in other instruction's cycles. + return PartialRegUpdateClearance; } // Return true for any instruction the copies the high bits of the first source @@ -6060,8 +6071,7 @@ const MachineOperand &MO = MI->getOperand(OpNum); if (MO.isUndef() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { - // Use the same magic number as getPartialRegUpdateClearance. - return 16; + return UndefRegClearance; } return 0; } Index: test/CodeGen/X86/vec_int_to_fp.ll =================================================================== --- test/CodeGen/X86/vec_int_to_fp.ll +++ test/CodeGen/X86/vec_int_to_fp.ll @@ -1580,6 +1580,7 @@ ; AVX1-NEXT: .LBB45_10: ; AVX1-NEXT: shrq %rax ; AVX1-NEXT: orq %rax, %rcx +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0 ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] @@ -1647,6 +1648,7 @@ ; AVX2-NEXT: .LBB45_10: ; AVX2-NEXT: shrq %rax ; AVX2-NEXT: orq %rax, %rcx +; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0 ; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] @@ -2773,6 +2775,7 @@ ; AVX1-NEXT: .LBB74_10: ; AVX1-NEXT: shrq %rax ; AVX1-NEXT: orq %rax, %rcx +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0 ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] @@ -2841,6 +2844,7 @@ ; AVX2-NEXT: .LBB74_10: ; AVX2-NEXT: shrq %rax ; AVX2-NEXT: orq %rax, %rcx +; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0 ; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] @@ -2993,6 +2997,7 @@ ; SSE-NEXT: .LBB78_10: ; SSE-NEXT: shrq %rax ; SSE-NEXT: orq %rax, %rcx +; SSE-NEXT: xorps %xmm5, %xmm5 ; SSE-NEXT: cvtsi2ssq %rcx, %xmm5 ; SSE-NEXT: addss %xmm5, %xmm5 ; SSE-NEXT: .LBB78_12: @@ -3016,11 +3021,13 @@ ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB78_16 ; SSE-NEXT: # BB#17: +; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE-NEXT: jmp .LBB78_18 ; SSE-NEXT: .LBB78_16: ; SSE-NEXT: shrq %rax ; SSE-NEXT: orq %rax, %rcx +; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rcx, %xmm1 ; SSE-NEXT: addss %xmm1, %xmm1 ; SSE-NEXT: .LBB78_18: @@ -3165,11 +3172,13 @@ ; AVX1-NEXT: testq %rax, %rax ; AVX1-NEXT: js .LBB78_19 ; AVX1-NEXT: # BB#20: +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm5 ; AVX1-NEXT: jmp .LBB78_21 ; AVX1-NEXT: .LBB78_19: ; AVX1-NEXT: shrq %rax ; AVX1-NEXT: orq %rax, %rcx +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0 ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm5 ; AVX1-NEXT: .LBB78_21: @@ -3292,11 +3301,13 @@ ; AVX2-NEXT: testq %rax, %rax ; AVX2-NEXT: js .LBB78_19 ; AVX2-NEXT: # BB#20: +; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm5 ; AVX2-NEXT: jmp .LBB78_21 ; AVX2-NEXT: .LBB78_19: ; AVX2-NEXT: shrq %rax ; AVX2-NEXT: orq %rax, %rcx +; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0 ; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm5 ; AVX2-NEXT: .LBB78_21: