Index: lib/Target/X86/X86SchedBroadwell.td =================================================================== --- lib/Target/X86/X86SchedBroadwell.td +++ lib/Target/X86/X86SchedBroadwell.td @@ -156,7 +156,10 @@ // Idioms that clear a register, like xorps %xmm0, %xmm0. // These can often bypass execution ports completely. -def : WriteRes; +def : WriteRes { + let Latency = 0; + let NumMicroOps = 0; +} // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -111,7 +111,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -def : WriteRes; +def : WriteRes { + let Latency = 0; + let NumMicroOps = 0; +} defm : HWWriteResPair; defm : HWWriteResPair; Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -100,7 +100,10 @@ def : WriteRes; def : WriteRes { let Latency = 5; } def : WriteRes; -def : WriteRes; +def : WriteRes { + let Latency = 0; + let NumMicroOps = 0; +} defm : SBWriteResPair; defm : SBWriteResPair; Index: lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- lib/Target/X86/X86SchedSkylakeClient.td +++ lib/Target/X86/X86SchedSkylakeClient.td @@ -154,7 +154,10 @@ // Idioms that clear a register, like xorps %xmm0, %xmm0. // These can often bypass execution ports completely. -def : WriteRes; +def : WriteRes { + let Latency = 0; + let NumMicroOps = 0; +} // Branches don't produce values, so they have no latency, but they still // consume resources. Indirect branches can fold loads. Index: lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- lib/Target/X86/X86SchedSkylakeServer.td +++ lib/Target/X86/X86SchedSkylakeServer.td @@ -154,7 +154,10 @@ // Idioms that clear a register, like xorps %xmm0, %xmm0. // These can often bypass execution ports completely. -def : WriteRes; +def : WriteRes { + let Latency = 0; + let NumMicroOps = 0; +} // Branches don't produce values, so they have no latency, but they still // consume resources. Indirect branches can fold loads. Index: lib/Target/X86/X86ScheduleAtom.td =================================================================== --- lib/Target/X86/X86ScheduleAtom.td +++ lib/Target/X86/X86ScheduleAtom.td @@ -159,7 +159,10 @@ // These can often bypass execution ports completely. //////////////////////////////////////////////////////////////////////////////// -def : WriteRes; +def : WriteRes { + let Latency = 0; + let NumMicroOps = 0; +} //////////////////////////////////////////////////////////////////////////////// // Branches don't produce values, so they have no latency, but they still Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -246,7 +246,10 @@ // These can often bypass execution ports completely. //////////////////////////////////////////////////////////////////////////////// -def : WriteRes; +def : WriteRes { + let Latency = 0; + let NumMicroOps = 0; +} //////////////////////////////////////////////////////////////////////////////// // Branches don't produce values, so they have no latency, but they still Index: lib/Target/X86/X86ScheduleSLM.td =================================================================== --- lib/Target/X86/X86ScheduleSLM.td +++ lib/Target/X86/X86ScheduleSLM.td @@ -83,7 +83,10 @@ def : WriteRes; def : WriteRes { let Latency = 3; } def : WriteRes; -def : WriteRes; +def : WriteRes { + let Latency = 0; + let NumMicroOps = 0; +} // Load/store MXCSR. // FIXME: These are probably wrong. They are copy pasted from WriteStore/Load. Index: lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- lib/Target/X86/X86ScheduleZnver1.td +++ lib/Target/X86/X86ScheduleZnver1.td @@ -148,7 +148,10 @@ def : WriteRes; def : WriteRes { let Latency = 8; } -def : WriteRes; +def : WriteRes { + let Latency = 0; + let NumMicroOps = 0; +} def : WriteRes; defm : ZnWriteResPair; defm : ZnWriteResPair; Index: test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -3148,20 +3148,20 @@ ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k2 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k3 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k4 -; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k5 ; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpinsrw $0, %eax, %xmm2, %xmm2 ; CHECK-NEXT: kmovw %k1, %eax -; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 +; CHECK-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; CHECK-NEXT: kmovw %k2, %eax -; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 +; CHECK-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; CHECK-NEXT: kmovw %k3, %eax -; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; CHECK-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; CHECK-NEXT: kmovw %k4, %eax -; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k5, %eax -; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 +; CHECK-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 +; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: vpinsrw $6, %eax, %xmm2, %xmm0 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] ; CHECK-NEXT: retq @@ -3238,20 +3238,20 @@ ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k2 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k3 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k4 -; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k5 ; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpinsrw $0, %eax, %xmm2, %xmm2 ; CHECK-NEXT: kmovw %k1, %eax -; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 +; CHECK-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; CHECK-NEXT: kmovw %k2, %eax -; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 +; CHECK-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; CHECK-NEXT: kmovw %k3, %eax -; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; CHECK-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; CHECK-NEXT: kmovw %k4, %eax -; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 -; CHECK-NEXT: kmovw %k5, %eax -; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 +; CHECK-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 +; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: vpinsrw $6, %eax, %xmm2, %xmm0 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7] ; CHECK-NEXT: retq Index: test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics.ll +++ test/CodeGen/X86/avx512vl-intrinsics.ll @@ -128,9 +128,9 @@ ; CHECK-NEXT: movzbl (%rax), %eax ## encoding: [0x0f,0xb6,0x00] ; CHECK-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; CHECK-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xef,0xc9] ; CHECK-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x44,0x24,0xd8] -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0] -; CHECK-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x44,0x24,0xe8] +; CHECK-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x4c,0x24,0xe8] ; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] entry: Index: test/CodeGen/X86/schedule-x86_64.ll =================================================================== --- test/CodeGen/X86/schedule-x86_64.ll +++ test/CodeGen/X86/schedule-x86_64.ll @@ -3465,7 +3465,7 @@ ; GENERIC-LABEL: test_clc_cld_cmc: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: clc # sched: [1:?] +; GENERIC-NEXT: clc # sched: [0:?] ; GENERIC-NEXT: cld # sched: [1:0.33] ; GENERIC-NEXT: cmc # sched: [1:0.33] ; GENERIC-NEXT: #NO_APP @@ -3492,7 +3492,7 @@ ; SANDY-LABEL: test_clc_cld_cmc: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: clc # sched: [1:?] +; SANDY-NEXT: clc # sched: [0:?] ; SANDY-NEXT: cld # sched: [1:0.33] ; SANDY-NEXT: cmc # sched: [1:0.33] ; SANDY-NEXT: #NO_APP @@ -3501,7 +3501,7 @@ ; HASWELL-LABEL: test_clc_cld_cmc: ; HASWELL: # %bb.0: ; HASWELL-NEXT: #APP -; HASWELL-NEXT: clc # sched: [1:?] +; HASWELL-NEXT: clc # sched: [0:?] ; HASWELL-NEXT: cld # sched: [3:1.00] ; HASWELL-NEXT: cmc # sched: [1:0.25] ; HASWELL-NEXT: #NO_APP @@ -3510,7 +3510,7 @@ ; BROADWELL-LABEL: test_clc_cld_cmc: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: clc # sched: [1:?] +; BROADWELL-NEXT: clc # sched: [0:?] ; BROADWELL-NEXT: cld # sched: [3:1.00] ; BROADWELL-NEXT: cmc # sched: [1:0.25] ; BROADWELL-NEXT: #NO_APP @@ -3519,7 +3519,7 @@ ; SKYLAKE-LABEL: test_clc_cld_cmc: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: clc # sched: [1:?] +; SKYLAKE-NEXT: clc # sched: [0:?] ; SKYLAKE-NEXT: cld # sched: [3:1.00] ; SKYLAKE-NEXT: cmc # sched: [1:0.25] ; SKYLAKE-NEXT: #NO_APP @@ -3528,7 +3528,7 @@ ; SKX-LABEL: test_clc_cld_cmc: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: clc # sched: [1:?] +; SKX-NEXT: clc # sched: [0:?] ; SKX-NEXT: cld # sched: [3:1.00] ; SKX-NEXT: cmc # sched: [1:0.25] ; SKX-NEXT: #NO_APP Index: test/CodeGen/X86/sttni.ll =================================================================== --- test/CodeGen/X86/sttni.ll +++ test/CodeGen/X86/sttni.ll @@ -1223,8 +1223,8 @@ ; X32-NEXT: movdqa %xmm0, %xmm2 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: pcmpistrm $24, %xmm1, %xmm0 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: pcmpistri $24, %xmm1, %xmm2 ; X32-NEXT: setb %bl @@ -1265,10 +1265,10 @@ ; X32-NEXT: movdqa %xmm0, %xmm1 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movdqu (%ecx), %xmm2 ; X32-NEXT: pcmpistrm $24, %xmm2, %xmm0 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: pcmpistri $24, %xmm2, %xmm1 ; X32-NEXT: setb %bl @@ -1281,10 +1281,10 @@ ; ; X64-LABEL: pcmpistr_mask_index_flag_load: ; X64: # %bb.0: # %entry -; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movdqa %xmm0, %xmm1 ; X64-NEXT: movdqu (%rdi), %xmm2 ; X64-NEXT: pcmpistrm $24, %xmm2, %xmm0 +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: xorl %edi, %edi ; X64-NEXT: pcmpistri $24, %xmm2, %xmm1 ; X64-NEXT: setb %dil Index: test/CodeGen/X86/vec_cast.ll =================================================================== --- test/CodeGen/X86/vec_cast.ll +++ test/CodeGen/X86/vec_cast.ll @@ -104,11 +104,11 @@ ; ; CHECK-WIN-LABEL: e: ; CHECK-WIN: # %bb.0: -; CHECK-WIN-NEXT: # kill: def $r8w killed $r8w def $r8d ; CHECK-WIN-NEXT: # kill: def $dx killed $dx def $edx ; CHECK-WIN-NEXT: # kill: def $cx killed $cx def $ecx ; CHECK-WIN-NEXT: movd %ecx, %xmm0 ; CHECK-WIN-NEXT: pinsrw $1, %edx, %xmm0 +; CHECK-WIN-NEXT: # kill: def $r8w killed $r8w def $r8d ; CHECK-WIN-NEXT: pinsrw $2, %r8d, %xmm0 ; CHECK-WIN-NEXT: pxor %xmm1, %xmm1 ; CHECK-WIN-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] Index: test/CodeGen/X86/x87-schedule.ll =================================================================== --- test/CodeGen/X86/x87-schedule.ll +++ test/CodeGen/X86/x87-schedule.ll @@ -2849,33 +2849,33 @@ ; SLM-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: ; SLM: # %bb.0: ; SLM-NEXT: #APP -; SLM-NEXT: fld1 # sched: [1:?] +; SLM-NEXT: fld1 # sched: [0:?] ; SLM-NEXT: fldl2e # sched: [100:1.00] ; SLM-NEXT: fldl2t # sched: [100:1.00] ; SLM-NEXT: fldlg2 # sched: [100:1.00] ; SLM-NEXT: fldln2 # sched: [100:1.00] ; SLM-NEXT: fldpi # sched: [100:1.00] -; SLM-NEXT: fldz # sched: [1:?] +; SLM-NEXT: fldz # sched: [0:?] ; SLM-NEXT: #NO_APP ; SLM-NEXT: retl # sched: [4:1.00] ; ; SANDY-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: fld1 # sched: [1:?] +; SANDY-NEXT: fld1 # sched: [0:?] ; SANDY-NEXT: fldl2e # sched: [100:0.33] ; SANDY-NEXT: fldl2t # sched: [100:0.33] ; SANDY-NEXT: fldlg2 # sched: [100:0.33] ; SANDY-NEXT: fldln2 # sched: [100:0.33] ; SANDY-NEXT: fldpi # sched: [100:0.33] -; SANDY-NEXT: fldz # sched: [1:?] +; SANDY-NEXT: fldz # sched: [0:?] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retl # sched: [6:1.00] ; ; HASWELL-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: ; HASWELL: # %bb.0: ; HASWELL-NEXT: #APP -; HASWELL-NEXT: fld1 # sched: [1:?] +; HASWELL-NEXT: fld1 # sched: [0:?] ; HASWELL-NEXT: fldl2e # sched: [1:0.50] ; HASWELL-NEXT: fldl2t # sched: [1:0.50] ; HASWELL-NEXT: fldlg2 # sched: [1:0.50] @@ -2888,52 +2888,52 @@ ; BROADWELL-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: fld1 # sched: [1:?] +; BROADWELL-NEXT: fld1 # sched: [0:?] ; BROADWELL-NEXT: fldl2e # sched: [100:0.25] ; BROADWELL-NEXT: fldl2t # sched: [100:0.25] ; BROADWELL-NEXT: fldlg2 # sched: [100:0.25] ; BROADWELL-NEXT: fldln2 # sched: [100:0.25] ; BROADWELL-NEXT: fldpi # sched: [100:0.25] -; BROADWELL-NEXT: fldz # sched: [1:?] +; BROADWELL-NEXT: fldz # sched: [0:?] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retl # sched: [6:0.50] ; ; SKYLAKE-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: fld1 # sched: [1:?] +; SKYLAKE-NEXT: fld1 # sched: [0:?] ; SKYLAKE-NEXT: fldl2e # sched: [100:0.25] ; SKYLAKE-NEXT: fldl2t # sched: [100:0.25] ; SKYLAKE-NEXT: fldlg2 # sched: [100:0.25] ; SKYLAKE-NEXT: fldln2 # sched: [100:0.25] ; SKYLAKE-NEXT: fldpi # sched: [100:0.25] -; SKYLAKE-NEXT: fldz # sched: [1:?] +; SKYLAKE-NEXT: fldz # sched: [0:?] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retl # sched: [6:0.50] ; ; SKX-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: fld1 # sched: [1:?] +; SKX-NEXT: fld1 # sched: [0:?] ; SKX-NEXT: fldl2e # sched: [100:0.25] ; SKX-NEXT: fldl2t # sched: [100:0.25] ; SKX-NEXT: fldlg2 # sched: [100:0.25] ; SKX-NEXT: fldln2 # sched: [100:0.25] ; SKX-NEXT: fldpi # sched: [100:0.25] -; SKX-NEXT: fldz # sched: [1:?] +; SKX-NEXT: fldz # sched: [0:?] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retl # sched: [6:0.50] ; ; BTVER2-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: fld1 # sched: [1:?] +; BTVER2-NEXT: fld1 # sched: [0:?] ; BTVER2-NEXT: fldl2e # sched: [100:0.50] ; BTVER2-NEXT: fldl2t # sched: [100:0.50] ; BTVER2-NEXT: fldlg2 # sched: [100:0.50] ; BTVER2-NEXT: fldln2 # sched: [100:0.50] ; BTVER2-NEXT: fldpi # sched: [100:0.50] -; BTVER2-NEXT: fldz # sched: [1:?] +; BTVER2-NEXT: fldz # sched: [0:?] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retl # sched: [4:1.00] ; Index: test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll =================================================================== --- test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll +++ test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll @@ -536,15 +536,15 @@ ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: addl {{[0-9]+}}(%esp), %edx +; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: xorl %esi, %esi ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB5_1: # %for.body82.us ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movzbl (%edx,%esi,4), %ebx -; X32-NEXT: movb %bl, (%ecx,%esi) +; X32-NEXT: movzbl (%ecx,%esi,4), %ebx +; X32-NEXT: movb %bl, (%edx,%esi) ; X32-NEXT: incl %esi ; X32-NEXT: cmpl %esi, %eax ; X32-NEXT: jne .LBB5_1 Index: test/tools/llvm-mca/X86/Broadwell/resources-x86_64.s =================================================================== --- test/tools/llvm-mca/X86/Broadwell/resources-x86_64.s +++ test/tools/llvm-mca/X86/Broadwell/resources-x86_64.s @@ -747,7 +747,7 @@ # CHECK-NEXT: 4 6 1.00 * * btcq $7, (%rax) # CHECK-NEXT: 4 6 1.00 * * btrq $7, (%rax) # CHECK-NEXT: 4 6 1.00 * * btsq $7, (%rax) -# CHECK-NEXT: 1 1 - * clc +# CHECK-NEXT: 0 0 - * clc # CHECK-NEXT: 1 1 0.25 decb %dil # CHECK-NEXT: 3 7 1.00 * * decb (%rax) # CHECK-NEXT: 1 1 0.25 decw %di Index: test/tools/llvm-mca/X86/Broadwell/resources-x87.s =================================================================== --- test/tools/llvm-mca/X86/Broadwell/resources-x87.s +++ test/tools/llvm-mca/X86/Broadwell/resources-x87.s @@ -281,13 +281,13 @@ # CHECK-NEXT: 1 6 0.50 * * fldt (%eax) # CHECK-NEXT: 3 7 1.00 * * fldcw (%eax) # CHECK-NEXT: 64 60 14.00 * fldenv (%eax) -# CHECK-NEXT: 1 1 - * fld1 +# CHECK-NEXT: 0 0 - * fld1 # CHECK-NEXT: 1 100 0.25 * fldl2e # CHECK-NEXT: 1 100 0.25 * fldl2t # CHECK-NEXT: 1 100 0.25 * fldlg2 # CHECK-NEXT: 1 100 0.25 * fldln2 # CHECK-NEXT: 1 100 0.25 * fldpi -# CHECK-NEXT: 1 1 - * fldz +# CHECK-NEXT: 0 0 - * fldz # CHECK-NEXT: 1 5 1.00 * fmul %st(0), %st(1) # CHECK-NEXT: 1 5 1.00 * fmul %st(2) # CHECK-NEXT: 2 11 1.00 * * fmuls (%ecx) Index: test/tools/llvm-mca/X86/BtVer2/resources-x87.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-x87.s +++ test/tools/llvm-mca/X86/BtVer2/resources-x87.s @@ -281,13 +281,13 @@ # CHECK-NEXT: 1 5 1.00 * * fldt (%eax) # CHECK-NEXT: 1 5 1.00 * * fldcw (%eax) # CHECK-NEXT: 1 100 0.50 * fldenv (%eax) -# CHECK-NEXT: 1 1 - * fld1 +# CHECK-NEXT: 0 0 - * fld1 # CHECK-NEXT: 1 100 0.50 * fldl2e # CHECK-NEXT: 1 100 0.50 * fldl2t # CHECK-NEXT: 1 100 0.50 * fldlg2 # CHECK-NEXT: 1 100 0.50 * fldln2 # CHECK-NEXT: 1 100 0.50 * fldpi -# CHECK-NEXT: 1 1 - * fldz +# CHECK-NEXT: 0 0 - * fldz # CHECK-NEXT: 1 2 1.00 * fmul %st(0), %st(1) # CHECK-NEXT: 1 2 1.00 * fmul %st(2) # CHECK-NEXT: 1 7 1.00 * * fmuls (%ecx) Index: test/tools/llvm-mca/X86/Haswell/resources-x86_64.s =================================================================== --- test/tools/llvm-mca/X86/Haswell/resources-x86_64.s +++ test/tools/llvm-mca/X86/Haswell/resources-x86_64.s @@ -747,7 +747,7 @@ # CHECK-NEXT: 4 7 1.00 * * btcq $7, (%rax) # CHECK-NEXT: 4 7 1.00 * * btrq $7, (%rax) # CHECK-NEXT: 4 7 1.00 * * btsq $7, (%rax) -# CHECK-NEXT: 1 1 - * clc +# CHECK-NEXT: 0 0 - * clc # CHECK-NEXT: 1 1 0.25 decb %dil # CHECK-NEXT: 3 7 1.00 * * decb (%rax) # CHECK-NEXT: 1 1 0.25 decw %di Index: test/tools/llvm-mca/X86/Haswell/resources-x87.s =================================================================== --- test/tools/llvm-mca/X86/Haswell/resources-x87.s +++ test/tools/llvm-mca/X86/Haswell/resources-x87.s @@ -281,7 +281,7 @@ # CHECK-NEXT: 1 7 0.50 * * fldt (%eax) # CHECK-NEXT: 3 7 1.00 * * fldcw (%eax) # CHECK-NEXT: 64 61 14.00 * fldenv (%eax) -# CHECK-NEXT: 1 1 - * fld1 +# CHECK-NEXT: 0 0 - * fld1 # CHECK-NEXT: 2 1 0.50 * fldl2e # CHECK-NEXT: 2 1 0.50 * fldl2t # CHECK-NEXT: 2 1 0.50 * fldlg2 Index: test/tools/llvm-mca/X86/SLM/resources-x87.s =================================================================== --- test/tools/llvm-mca/X86/SLM/resources-x87.s +++ test/tools/llvm-mca/X86/SLM/resources-x87.s @@ -281,13 +281,13 @@ # CHECK-NEXT: 1 3 1.00 * * fldt (%eax) # CHECK-NEXT: 1 3 1.00 * * fldcw (%eax) # CHECK-NEXT: 1 100 1.00 * fldenv (%eax) -# CHECK-NEXT: 1 1 - * fld1 +# CHECK-NEXT: 0 0 - * fld1 # CHECK-NEXT: 1 100 1.00 * fldl2e # CHECK-NEXT: 1 100 1.00 * fldl2t # CHECK-NEXT: 1 100 1.00 * fldlg2 # CHECK-NEXT: 1 100 1.00 * fldln2 # CHECK-NEXT: 1 100 1.00 * fldpi -# CHECK-NEXT: 1 1 - * fldz +# CHECK-NEXT: 0 0 - * fldz # CHECK-NEXT: 1 5 2.00 * fmul %st(0), %st(1) # CHECK-NEXT: 1 5 2.00 * fmul %st(2) # CHECK-NEXT: 1 8 2.00 * * fmuls (%ecx) Index: test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s =================================================================== --- test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s +++ test/tools/llvm-mca/X86/SandyBridge/resources-x86_64.s @@ -747,7 +747,7 @@ # CHECK-NEXT: 4 7 1.00 * * btcq $7, (%rax) # CHECK-NEXT: 4 7 1.00 * * btrq $7, (%rax) # CHECK-NEXT: 4 7 1.00 * * btsq $7, (%rax) -# CHECK-NEXT: 1 1 - * clc +# CHECK-NEXT: 0 0 - * clc # CHECK-NEXT: 1 1 0.33 decb %dil # CHECK-NEXT: 3 7 1.00 * * decb (%rax) # CHECK-NEXT: 1 1 0.33 decw %di Index: test/tools/llvm-mca/X86/SandyBridge/resources-x87.s =================================================================== --- test/tools/llvm-mca/X86/SandyBridge/resources-x87.s +++ test/tools/llvm-mca/X86/SandyBridge/resources-x87.s @@ -281,13 +281,13 @@ # CHECK-NEXT: 3 9 1.00 * * fldt (%eax) # CHECK-NEXT: 5 8 2.00 * * fldcw (%eax) # CHECK-NEXT: 1 100 0.33 * fldenv (%eax) -# CHECK-NEXT: 1 1 - * fld1 +# CHECK-NEXT: 0 0 - * fld1 # CHECK-NEXT: 1 100 0.33 * fldl2e # CHECK-NEXT: 1 100 0.33 * fldl2t # CHECK-NEXT: 1 100 0.33 * fldlg2 # CHECK-NEXT: 1 100 0.33 * fldln2 # CHECK-NEXT: 1 100 0.33 * fldpi -# CHECK-NEXT: 1 1 - * fldz +# CHECK-NEXT: 0 0 - * fldz # CHECK-NEXT: 1 5 1.00 * fmul %st(0), %st(1) # CHECK-NEXT: 1 5 1.00 * fmul %st(2) # CHECK-NEXT: 2 12 1.00 * * fmuls (%ecx) Index: test/tools/llvm-mca/X86/SkylakeClient/resources-x86_64.s =================================================================== --- test/tools/llvm-mca/X86/SkylakeClient/resources-x86_64.s +++ test/tools/llvm-mca/X86/SkylakeClient/resources-x86_64.s @@ -747,7 +747,7 @@ # CHECK-NEXT: 4 6 1.00 * * btcq $7, (%rax) # CHECK-NEXT: 4 6 1.00 * * btrq $7, (%rax) # CHECK-NEXT: 4 6 1.00 * * btsq $7, (%rax) -# CHECK-NEXT: 1 1 - * clc +# CHECK-NEXT: 0 0 - * clc # CHECK-NEXT: 1 1 0.25 decb %dil # CHECK-NEXT: 3 7 1.00 * * decb (%rax) # CHECK-NEXT: 1 1 0.25 decw %di Index: test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s =================================================================== --- test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s +++ test/tools/llvm-mca/X86/SkylakeClient/resources-x87.s @@ -281,13 +281,13 @@ # CHECK-NEXT: 1 7 0.50 * * fldt (%eax) # CHECK-NEXT: 3 7 1.00 * * fldcw (%eax) # CHECK-NEXT: 64 62 14.00 * fldenv (%eax) -# CHECK-NEXT: 1 1 - * fld1 +# CHECK-NEXT: 0 0 - * fld1 # CHECK-NEXT: 1 100 0.25 * fldl2e # CHECK-NEXT: 1 100 0.25 * fldl2t # CHECK-NEXT: 1 100 0.25 * fldlg2 # CHECK-NEXT: 1 100 0.25 * fldln2 # CHECK-NEXT: 1 100 0.25 * fldpi -# CHECK-NEXT: 1 1 - * fldz +# CHECK-NEXT: 0 0 - * fldz # CHECK-NEXT: 1 4 1.00 * fmul %st(0), %st(1) # CHECK-NEXT: 1 4 1.00 * fmul %st(2) # CHECK-NEXT: 2 11 1.00 * * fmuls (%ecx) Index: test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s =================================================================== --- test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s +++ test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s @@ -747,7 +747,7 @@ # CHECK-NEXT: 4 6 1.00 * * btcq $7, (%rax) # CHECK-NEXT: 4 6 1.00 * * btrq $7, (%rax) # CHECK-NEXT: 4 6 1.00 * * btsq $7, (%rax) -# CHECK-NEXT: 1 1 - * clc +# CHECK-NEXT: 0 0 - * clc # CHECK-NEXT: 1 1 0.25 decb %dil # CHECK-NEXT: 3 7 1.00 * * decb (%rax) # CHECK-NEXT: 1 1 0.25 decw %di Index: test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s =================================================================== --- test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s +++ test/tools/llvm-mca/X86/SkylakeServer/resources-x87.s @@ -281,13 +281,13 @@ # CHECK-NEXT: 1 7 0.50 * * fldt (%eax) # CHECK-NEXT: 3 7 1.00 * * fldcw (%eax) # CHECK-NEXT: 64 62 14.00 * fldenv (%eax) -# CHECK-NEXT: 1 1 - * fld1 +# CHECK-NEXT: 0 0 - * fld1 # CHECK-NEXT: 1 100 0.25 * fldl2e # CHECK-NEXT: 1 100 0.25 * fldl2t # CHECK-NEXT: 1 100 0.25 * fldlg2 # CHECK-NEXT: 1 100 0.25 * fldln2 # CHECK-NEXT: 1 100 0.25 * fldpi -# CHECK-NEXT: 1 1 - * fldz +# CHECK-NEXT: 0 0 - * fldz # CHECK-NEXT: 1 4 1.00 * fmul %st(0), %st(1) # CHECK-NEXT: 1 4 1.00 * fmul %st(2) # CHECK-NEXT: 2 11 1.00 * * fmuls (%ecx)