Skip to content

Commit f8c7637

Browse files
committedJun 7, 2018
[X86] Block UndefRegUpdate
Summary: Prevent folding of operations with memory loads when one of the sources has undefined register update. Reviewers: craig.topper Subscribers: llvm-commits, mike.dvoretsky, ashlykov Differential Revision: https://reviews.llvm.org/D47621 llvm-svn: 334175
1 parent 1a83d06 commit f8c7637

File tree

4 files changed

+46
-22
lines changed

4 files changed

+46
-22
lines changed
 

‎llvm/lib/Target/X86/X86InstrInfo.cpp

+22-10
Original file line numberDiff line numberDiff line change
@@ -8479,6 +8479,19 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
84798479
return nullptr;
84808480
}
84818481

8482+
static bool shouldPreventUndefRegUpdateMemFold(MachineFunction &MF, MachineInstr &MI) {
8483+
if (MF.getFunction().optForSize() || !hasUndefRegUpdate(MI.getOpcode()) ||
8484+
!MI.getOperand(1).isReg())
8485+
return false;
8486+
8487+
MachineRegisterInfo &RegInfo = MF.getRegInfo();
8488+
MachineInstr *VRegDef = RegInfo.getUniqueVRegDef(MI.getOperand(1).getReg());
8489+
if (VRegDef == nullptr)
8490+
return false;
8491+
return VRegDef->isImplicitDef();
8492+
}
8493+
8494+
84828495
MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
84838496
MachineFunction &MF, MachineInstr &MI, unsigned OpNum,
84848497
ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
@@ -8497,10 +8510,10 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
84978510
MI.getOpcode() == X86::PUSH64r))
84988511
return nullptr;
84998512

8500-
// Avoid partial register update stalls unless optimizing for size.
8501-
// TODO: we should block undef reg update as well.
8513+
// Avoid partial and undef register update stalls unless optimizing for size.
85028514
if (!MF.getFunction().optForSize() &&
8503-
hasPartialRegUpdate(MI.getOpcode(), Subtarget))
8515+
(hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
8516+
shouldPreventUndefRegUpdateMemFold(MF, MI)))
85048517
return nullptr;
85058518

85068519
unsigned NumOps = MI.getDesc().getNumOperands();
@@ -8674,11 +8687,10 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
86748687
if (NoFusing)
86758688
return nullptr;
86768689

8677-
// Unless optimizing for size, don't fold to avoid partial
8678-
// register update stalls
8679-
// TODO: we should block undef reg update as well.
8690+
// Avoid partial and undef register update stalls unless optimizing for size.
86808691
if (!MF.getFunction().optForSize() &&
8681-
hasPartialRegUpdate(MI.getOpcode(), Subtarget))
8692+
(hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
8693+
shouldPreventUndefRegUpdateMemFold(MF, MI)))
86828694
return nullptr;
86838695

86848696
// Don't fold subreg spills, or reloads that use a high subreg.
@@ -8875,10 +8887,10 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
88758887
// Check switch flag
88768888
if (NoFusing) return nullptr;
88778889

8878-
// Avoid partial register update stalls unless optimizing for size.
8879-
// TODO: we should block undef reg update as well.
8890+
// Avoid partial and undef register update stalls unless optimizing for size.
88808891
if (!MF.getFunction().optForSize() &&
8881-
hasPartialRegUpdate(MI.getOpcode(), Subtarget))
8892+
(hasPartialRegUpdate(MI.getOpcode(), Subtarget) ||
8893+
shouldPreventUndefRegUpdateMemFold(MF, MI)))
88828894
return nullptr;
88838895

88848896
// Determine the alignment of the load.

‎llvm/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll

+4-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ define double @long_to_double_rm(i64* %a) {
2727
;
2828
; AVX-LABEL: long_to_double_rm:
2929
; AVX: # %bb.0: # %entry
30-
; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
30+
; AVX-NEXT: movq (%rdi), %rax
31+
; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
3132
; AVX-NEXT: retq
3233
entry:
3334
%0 = load i64, i64* %a
@@ -75,7 +76,8 @@ define float @long_to_float_rm(i64* %a) {
7576
;
7677
; AVX-LABEL: long_to_float_rm:
7778
; AVX: # %bb.0: # %entry
78-
; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
79+
; AVX-NEXT: movq (%rdi), %rax
80+
; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
7981
; AVX-NEXT: retq
8082
entry:
8183
%0 = load i64, i64* %a

‎llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll

+8-4
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ define double @int_to_double_rr(i32 %a) {
4343
; AVX_X86-NEXT: .cfi_def_cfa_register %ebp
4444
; AVX_X86-NEXT: andl $-8, %esp
4545
; AVX_X86-NEXT: subl $8, %esp
46-
; AVX_X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0
46+
; AVX_X86-NEXT: movl 8(%ebp), %eax
47+
; AVX_X86-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0
4748
; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
4849
; AVX_X86-NEXT: fldl (%esp)
4950
; AVX_X86-NEXT: movl %ebp, %esp
@@ -64,7 +65,8 @@ define double @int_to_double_rm(i32* %a) {
6465
;
6566
; AVX-LABEL: int_to_double_rm:
6667
; AVX: # %bb.0: # %entry
67-
; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
68+
; AVX-NEXT: movl (%rdi), %eax
69+
; AVX-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0
6870
; AVX-NEXT: retq
6971
;
7072
; SSE2_X86-LABEL: int_to_double_rm:
@@ -187,7 +189,8 @@ define float @int_to_float_rr(i32 %a) {
187189
; AVX_X86: # %bb.0: # %entry
188190
; AVX_X86-NEXT: pushl %eax
189191
; AVX_X86-NEXT: .cfi_def_cfa_offset 8
190-
; AVX_X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
192+
; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
193+
; AVX_X86-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
191194
; AVX_X86-NEXT: vmovss %xmm0, (%esp)
192195
; AVX_X86-NEXT: flds (%esp)
193196
; AVX_X86-NEXT: popl %eax
@@ -207,7 +210,8 @@ define float @int_to_float_rm(i32* %a) {
207210
;
208211
; AVX-LABEL: int_to_float_rm:
209212
; AVX: # %bb.0: # %entry
210-
; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
213+
; AVX-NEXT: movl (%rdi), %eax
214+
; AVX-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
211215
; AVX-NEXT: retq
212216
;
213217
; SSE2_X86-LABEL: int_to_float_rm:

‎llvm/test/CodeGen/X86/vector-sqrt.ll

+12-6
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
define <2 x double> @sqrtd2(double* nocapture readonly %v) local_unnamed_addr #0 {
66
; CHECK-LABEL: sqrtd2:
77
; CHECK: # %bb.0: # %entry
8-
; CHECK-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0
9-
; CHECK-NEXT: vsqrtsd 8(%rdi), %xmm1, %xmm1
8+
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
9+
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
10+
; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
11+
; CHECK-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1
1012
; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1113
; CHECK-NEXT: retq
1214
entry:
@@ -27,10 +29,14 @@ declare double @sqrt(double) local_unnamed_addr #1
2729
define <4 x float> @sqrtf4(float* nocapture readonly %v) local_unnamed_addr #0 {
2830
; CHECK-LABEL: sqrtf4:
2931
; CHECK: # %bb.0: # %entry
30-
; CHECK-NEXT: vsqrtss (%rdi), %xmm0, %xmm0
31-
; CHECK-NEXT: vsqrtss 4(%rdi), %xmm1, %xmm1
32-
; CHECK-NEXT: vsqrtss 8(%rdi), %xmm2, %xmm2
33-
; CHECK-NEXT: vsqrtss 12(%rdi), %xmm3, %xmm3
32+
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
33+
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
34+
; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
35+
; CHECK-NEXT: vsqrtss %xmm1, %xmm1, %xmm1
36+
; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
37+
; CHECK-NEXT: vsqrtss %xmm2, %xmm2, %xmm2
38+
; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
39+
; CHECK-NEXT: vsqrtss %xmm3, %xmm3, %xmm3
3440
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
3541
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
3642
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]

0 commit comments

Comments
 (0)
Please sign in to comment.