Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -1431,6 +1431,16 @@ virtual void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {} + /// If the instruction `MI` is a dependency breaking instruction, return + /// the register number for which this instruction is dependency breaking. + /// This function may conservatively return an empty Optional even if MI is + /// dependency breaking (resulting in at worst an unnecessary dependency break + /// insertion), but should always return an empty Optional when MI is not + /// dependency breaking. + virtual Optional getDependencyBreakReg(MachineInstr &MI) const { + return Optional{}; + } + /// Create machine specific model for scheduling. virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const { Index: lib/CodeGen/ExecutionDepsFix.cpp =================================================================== --- lib/CodeGen/ExecutionDepsFix.cpp +++ lib/CodeGen/ExecutionDepsFix.cpp @@ -619,6 +619,14 @@ kill(rx); } } + Optional DepReg = TII->getDependencyBreakReg(*MI); + if (DepReg) { + for (int rx : regIndices(DepReg.getValue())) { + // This instruction is a pre-existing dependency break, so there are no + // clearance issues, reset the counter. + LiveRegs[rx].Def = -(1 << 20); + } + } ++CurInstr; } Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -481,6 +481,7 @@ const TargetRegisterInfo *TRI) const override; void breakPartialRegDependency(MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const override; + Optional getDependencyBreakReg(MachineInstr &MI) const override; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, unsigned OpNum, Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -8253,6 +8253,51 @@ } } +Optional X86InstrInfo::getDependencyBreakReg(MachineInstr &MI) const { + unsigned Opc = MI.getOpcode(); + switch (Opc) { + default: + break; + // See the Intel Architecture Optimization Reference Manual + // Section 3.5.1.8 Clearing Registers and Dependency Breaking Idioms + case X86::XOR8rr: + case X86::XOR16rr: + case X86::XOR32rr: + case X86::XOR64rr: + case X86::SUB8rr: + case X86::SUB16rr: + case X86::SUB32rr: + case X86::SUB64rr: + case X86::XORPSrr: + case X86::XORPDrr: + case X86::PXORrr: + case X86::SUBPSrr: + case X86::SUBPDrr: + case X86::PSUBBrr: + case X86::PSUBWrr: + case X86::PSUBDrr: + case X86::PSUBQrr: + case X86::VXORPSrr: + case X86::VXORPDrr: + case X86::VPXORrr: + case X86::VSUBPSrr: + case X86::VSUBPDrr: + case X86::VPSUBBrr: + case X86::VPSUBWrr: + case X86::VPSUBDrr: + case X86::VPSUBQrr: { + unsigned Reg = X86::NoRegister; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || (Reg != X86::NoRegister && MO.getReg() != Reg)) + return Optional{}; + Reg = MO.getReg(); + } + return Optional{Reg}; + } + } + return Optional{}; +} + MachineInstr * X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, Index: test/CodeGen/X86/break-false-dep.ll =================================================================== --- test/CodeGen/X86/break-false-dep.ll +++ test/CodeGen/X86/break-false-dep.ll @@ -334,3 +334,25 @@ loopdone: ret void } + +; Make sure we recognize pre-existing dependency breaking instructions and +; re-use them. In `fcmp ult double %x, 0.0`, the `0.0` constant gets +; materialized as a vxorps +define double @recognize_existing(double %x, i64 %arg) { +;AVX-LABEL:@recognize_existing + tail call void asm sideeffect "", "~{xmm1},~{xmm2},~{xmm3},~{dirflag},~{fpsr},~{flags}"() + tail call void asm sideeffect "", "~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"() + tail call void asm sideeffect "", "~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{dirflag},~{fpsr},~{flags}"() + tail call void asm sideeffect "", "~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"() +;AVX: vxorps [[XMM1:%xmm1]], [[XMM1]], [[XMM1]] +;AVX: vucomisd [[XMM1]], %xmm0 + %1 = fcmp ult double %x, 0.0 + br i1 %1, label %main, label %fake +main: +;AVX-NOT: vxorps +;AVX: vcvtsi2sdq {{.*}}, [[XMM1]], {{%xmm[0-9]+}} + %tmp1 = sitofp i64 %arg to double + ret double %tmp1 +fake: + ret double 0.0 +} Index: test/CodeGen/X86/known-bits-vector.ll =================================================================== --- test/CodeGen/X86/known-bits-vector.ll +++ test/CodeGen/X86/known-bits-vector.ll @@ -42,7 +42,7 @@ ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7] ; X64-NEXT: vmovq %xmm0, %rax -; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; X64-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm0 ; X64-NEXT: retq %1 = and <2 x i64> %a0, %2 = extractelement <2 x i64> %1, i32 0 Index: test/CodeGen/X86/vec_int_to_fp.ll =================================================================== --- test/CodeGen/X86/vec_int_to_fp.ll +++ test/CodeGen/X86/vec_int_to_fp.ll @@ -1665,7 +1665,7 @@ ; VEX-NEXT: testq %rax, %rax ; VEX-NEXT: js .LBB39_8 ; VEX-NEXT: # BB#7: -; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 +; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; VEX-NEXT: .LBB39_8: ; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] ; VEX-NEXT: retq @@ -1824,7 +1824,6 @@ ; SSE-NEXT: xorps %xmm2, %xmm2 ; SSE-NEXT: js .LBB41_2 ; SSE-NEXT: # BB#1: -; SSE-NEXT: xorps %xmm2, %xmm2 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE-NEXT: .LBB41_2: ; SSE-NEXT: movd %xmm1, %rax @@ -1900,7 +1899,7 @@ ; VEX-NEXT: testq %rax, %rax ; VEX-NEXT: js .LBB41_8 ; VEX-NEXT: # BB#7: -; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 +; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; VEX-NEXT: .LBB41_8: ; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] ; VEX-NEXT: retq