Index: lib/CodeGen/TwoAddressInstructionPass.cpp =================================================================== --- lib/CodeGen/TwoAddressInstructionPass.cpp +++ lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1205,6 +1205,7 @@ if (!MI->isCommutable()) return false; + bool MadeChange = false; unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg(); unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg(); unsigned OpsNum = MI->getDesc().getNumOperands(); @@ -1223,8 +1224,8 @@ // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp // operands. This makes the live ranges of DstOp and OtherOp joinable. - bool DoCommute = - !BaseOpKilled && isKilled(*MI, OtherOpReg, MRI, TII, LIS, false); + bool OtherOpKilled = isKilled(*MI, OtherOpReg, MRI, TII, LIS, false); + bool DoCommute = !BaseOpKilled && OtherOpKilled; if (!DoCommute && isProfitableToCommute(DstOpReg, BaseOpReg, OtherOpReg, MI, Dist)) { @@ -1238,10 +1239,15 @@ ++NumCommuted; if (AggressiveCommute) ++NumAggrCommuted; - return true; + // There might be more than two commutable operands, update BaseOp and + // contine scanning. + BaseOpReg = OtherOpReg; + BaseOpKilled = OtherOpKilled; + MadeChange = true; + continue; } } - return false; + return MadeChange; } /// For the case where an instruction has a single pair of tied register Index: test/CodeGen/X86/avx512-vpternlog-commute.ll =================================================================== --- test/CodeGen/X86/avx512-vpternlog-commute.ll +++ test/CodeGen/X86/avx512-vpternlog-commute.ll @@ -27,8 +27,7 @@ define <16 x i32> @vpternlog_v16i32_210(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { ; CHECK-LABEL: vpternlog_v16i32_210: ; CHECK: ## %bb.0: -; CHECK-NEXT: vpternlogd $78, %zmm0, %zmm2, %zmm1 -; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 +; CHECK-NEXT: vpternlogd $92, %zmm1, %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 -1) ret <16 x i32> %res @@ -434,8 +433,7 @@ ; CHECK-LABEL: vpternlog_v16i32_210_maskz: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpternlogd $78, %zmm0, %zmm2, %zmm1 {%k1} {z} -; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 +; CHECK-NEXT: vpternlogd $92, %zmm1, %zmm2, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x2, <16 x i32> %x1, <16 x i32> %x0, i32 114, i16 %mask) ret <16 x i32> %res Index: test/CodeGen/X86/recip-fastmath.ll =================================================================== --- test/CodeGen/X86/recip-fastmath.ll +++ test/CodeGen/X86/recip-fastmath.ll @@ -390,10 +390,11 @@ ; ; HASWELL-LABEL: v4f32_one_step: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vrcpps %xmm0, %xmm2 # sched: [5:1.00] +; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [6:0.50] +; HASWELL-NEXT: vfnmadd231ps %xmm0, %xmm2, %xmm1 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %xmm2, %xmm2, %xmm1 # sched: [5:0.50] +; HASWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_one_step: @@ -408,10 +409,11 @@ ; ; KNL-LABEL: v4f32_one_step: ; KNL: # %bb.0: -; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vrcpps %xmm0, %xmm2 # sched: [5:1.00] +; KNL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [6:0.50] +; KNL-NEXT: vfnmadd231ps %xmm0, %xmm2, %xmm1 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %xmm2, %xmm2, %xmm1 # sched: [5:0.50] +; KNL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v4f32_one_step: @@ -665,10 +667,11 @@ ; ; HASWELL-LABEL: v8f32_one_step: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00] +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [7:0.50] +; HASWELL-NEXT: vfnmadd231ps %ymm0, %ymm2, %ymm1 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %ymm2, %ymm2, %ymm1 # sched: [5:0.50] +; HASWELL-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_one_step: @@ -683,10 +686,11 @@ ; ; KNL-LABEL: v8f32_one_step: ; KNL: # %bb.0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00] +; KNL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [7:0.50] +; KNL-NEXT: vfnmadd231ps %ymm0, %ymm2, %ymm1 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %ymm2, %ymm2, %ymm1 # sched: [5:0.50] +; KNL-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v8f32_one_step: Index: test/CodeGen/X86/recip-fastmath2.ll =================================================================== --- test/CodeGen/X86/recip-fastmath2.ll +++ test/CodeGen/X86/recip-fastmath2.ll @@ -451,9 +451,9 @@ ; HASWELL: # %bb.0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50] +; HASWELL-NEXT: vfnmadd231ps %xmm0, %xmm1, %xmm2 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm2 # sched: [5:0.50] +; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm2, %xmm0 # sched: [11:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_one_step2: @@ -471,9 +471,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50] +; KNL-NEXT: vfnmadd231ps %xmm0, %xmm1, %xmm2 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm2 # sched: [5:0.50] +; KNL-NEXT: vmulps {{.*}}(%rip), %xmm2, %xmm0 # sched: [11:0.50] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v4f32_one_step2: @@ -550,10 +550,10 @@ ; HASWELL: # %bb.0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:0.50] -; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd231ps %xmm0, %xmm1, %xmm2 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm2 # sched: [5:0.50] +; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm2, %xmm0 # sched: [11:0.50] +; HASWELL-NEXT: vmulps %xmm2, %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_one_step_2_divs: @@ -572,10 +572,10 @@ ; KNL: # %bb.0: ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:0.50] -; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfnmadd231ps %xmm0, %xmm1, %xmm2 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm2 # sched: [5:0.50] +; KNL-NEXT: vmulps {{.*}}(%rip), %xmm2, %xmm0 # sched: [11:0.50] +; KNL-NEXT: vmulps %xmm2, %xmm0, %xmm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v4f32_one_step_2_divs: @@ -787,9 +787,9 @@ ; HASWELL: # %bb.0: ; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] ; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] +; HASWELL-NEXT: vfnmadd231ps %ymm0, %ymm1, %ymm2 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm2 # sched: [5:0.50] +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm2, %ymm0 # sched: [12:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_one_step2: @@ -807,9 +807,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] ; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50] +; KNL-NEXT: vfnmadd231ps %ymm0, %ymm1, %ymm2 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm2 # sched: [5:0.50] +; KNL-NEXT: vmulps {{.*}}(%rip), %ymm2, %ymm0 # sched: [12:0.50] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v8f32_one_step2: @@ -895,10 +895,10 @@ ; HASWELL: # %bb.0: ; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] ; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:0.50] -; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd231ps %ymm0, %ymm1, %ymm2 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm2 # sched: [5:0.50] +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm2, %ymm0 # sched: [12:0.50] +; HASWELL-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_one_step_2_divs: @@ -917,10 +917,10 @@ ; KNL: # %bb.0: ; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] ; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:0.50] -; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfnmadd231ps %ymm0, %ymm1, %ymm2 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm2 # sched: [5:0.50] +; KNL-NEXT: vmulps {{.*}}(%rip), %ymm2, %ymm0 # sched: [12:0.50] +; KNL-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:0.50] ; KNL-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: v8f32_one_step_2_divs: