diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1334,6 +1334,30 @@ return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00); } } + + // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND + // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same + // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are + // comparisons with the same predicate. This enables optimizations as the + // following one: + // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C) + // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C) + if (Opc == ISD::AND || Opc == ISD::OR) { + if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC && + N01->getOpcode() == ISD::SETCC) { + ISD::CondCode CC1 = cast(N1.getOperand(2))->get(); + ISD::CondCode CC00 = cast(N00.getOperand(2))->get(); + ISD::CondCode CC01 = cast(N01.getOperand(2))->get(); + if (CC1 == CC00 && CC1 != CC01) { + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags); + return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags); + } + if (CC1 == CC01 && CC1 != CC00) { + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags); + return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags); + } + } + } } return SDValue(); diff --git a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll --- a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll +++ b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll @@ -1722,14 +1722,12 @@ ; CHECK-LABEL: test103: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_max_u32_e32 v0, v0, v1 -; CHECK-NEXT: v_max_u32_e32 v1, v2, v3 -; CHECK-NEXT: v_max_u32_e32 v2, v4, v5 -; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v6 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, v1, v6 -; CHECK-NEXT: v_cmp_lt_u32_e64 s1, v2, v6 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: v_max_u32_e32 v4, v4, v5 +; CHECK-NEXT: v_max_u32_e32 v2, v2, v3 +; CHECK-NEXT: v_maxmin_u32 v0, v0, v1, v4 +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v2, v6 +; CHECK-NEXT: v_cmp_lt_u32_e64 s0, v0, v6 +; CHECK-NEXT: s_or_b32 s0, s0, vcc_lo ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ult i32 %arg1, %C @@ -1751,20 +1749,18 @@ ; CHECK-LABEL: test104: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_min_u32_e32 v0, v0, v1 -; CHECK-NEXT: v_max_u32_e32 v1, v2, v3 -; CHECK-NEXT: v_min_u32_e32 v2, v4, v5 -; CHECK-NEXT: v_max_u32_e32 v3, v6, v7 -; CHECK-NEXT: v_min_u32_e32 v4, v8, v9 -; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v10 -; CHECK-NEXT: v_cmp_gt_u32_e64 s0, v1, v10 -; CHECK-NEXT: v_cmp_lt_u32_e64 s1, v2, v10 -; CHECK-NEXT: v_cmp_gt_u32_e64 s2, v3, v10 -; CHECK-NEXT: v_cmp_lt_u32_e64 s3, v4, v10 -; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 -; CHECK-NEXT: s_or_b32 s1, s1, s2 -; CHECK-NEXT: s_or_b32 s0, s3, s0 -; CHECK-NEXT: s_or_b32 s0, s1, s0 +; CHECK-NEXT: v_min_u32_e32 v8, v8, v9 +; CHECK-NEXT: v_max_u32_e32 v2, v2, v3 +; CHECK-NEXT: v_min_u32_e32 v3, v4, v5 +; CHECK-NEXT: v_max_u32_e32 v4, v6, v7 +; CHECK-NEXT: v_min3_u32 v0, v0, v1, v8 +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v2, v10 +; CHECK-NEXT: v_cmp_lt_u32_e64 s0, v3, v10 +; CHECK-NEXT: v_cmp_gt_u32_e64 s1, v4, v10 +; CHECK-NEXT: v_cmp_lt_u32_e64 s2, v0, v10 +; CHECK-NEXT: s_or_b32 s0, s0, s1 +; CHECK-NEXT: s_or_b32 s1, s2, vcc_lo +; CHECK-NEXT: s_or_b32 s0, s0, s1 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; CHECK-NEXT: s_setpc_b64 s[30:31] %cmp1 = icmp ult i32 %arg1, %C diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1476,9 +1476,9 @@ ; GFX1032-NEXT: s_load_dword s2, s[0:1], 0x2c ; GFX1032-NEXT: s_load_dword s3, s[0:1], 0x24 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: v_cmp_ngt_f32_e64 s0, s2, 0 -; GFX1032-NEXT: v_cmp_nlt_f32_e64 s1, s2, 1.0 -; GFX1032-NEXT: v_cmp_nlt_f32_e64 s2, s3, 1.0 +; GFX1032-NEXT: v_cmp_nlt_f32_e64 s0, s2, 1.0 +; GFX1032-NEXT: v_cmp_nlt_f32_e64 s1, s3, 1.0 +; GFX1032-NEXT: v_cmp_ngt_f32_e64 s2, s2, 0 ; GFX1032-NEXT: s_or_b32 s0, s0, s1 ; GFX1032-NEXT: s_or_b32 s0, s0, s2 ; GFX1032-NEXT: s_and_b32 vcc_lo, exec_lo, s0 @@ -1493,12 +1493,12 @@ ; GFX1064-LABEL: test_preserve_condition_undef_flag: ; GFX1064: ; %bb.0: ; %bb0 ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: s_load_dword s2, s[0:1], 0x2c -; GFX1064-NEXT: s_load_dword s4, s[0:1], 0x24 +; GFX1064-NEXT: s_load_dword s4, s[0:1], 0x2c +; GFX1064-NEXT: s_load_dword s2, s[0:1], 0x24 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: v_cmp_ngt_f32_e64 s[0:1], s2, 0 +; GFX1064-NEXT: v_cmp_nlt_f32_e64 s[0:1], s4, 1.0 ; GFX1064-NEXT: v_cmp_nlt_f32_e64 s[2:3], s2, 1.0 -; GFX1064-NEXT: v_cmp_nlt_f32_e64 s[4:5], s4, 1.0 +; GFX1064-NEXT: v_cmp_ngt_f32_e64 s[4:5], s4, 0 ; GFX1064-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] ; GFX1064-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] ; GFX1064-NEXT: s_and_b64 vcc, exec, s[0:1] diff --git a/llvm/test/CodeGen/Hexagon/isel/logical.ll b/llvm/test/CodeGen/Hexagon/isel/logical.ll --- a/llvm/test/CodeGen/Hexagon/isel/logical.ll +++ b/llvm/test/CodeGen/Hexagon/isel/logical.ll @@ -1250,13 +1250,16 @@ ; CHECK-NEXT: p0 = vcmph.eq(r1:0,r7:6) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: p1 = vcmph.eq(r3:2,r7:6) +; CHECK-NEXT: p1 = vcmph.eq(r5:4,r7:6) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: p2 = vcmph.eq(r5:4,r7:6) +; CHECK-NEXT: p2 = vcmph.eq(r3:2,r7:6) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: p0 = and(p2,and(p0,!p1)) +; CHECK-NEXT: p0 = and(p0,p1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = and(p0,!p2) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r1:0 = mask(p0) @@ -1364,13 +1367,16 @@ ; CHECK-NEXT: p0 = vcmph.eq(r1:0,r7:6) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: p1 = vcmph.eq(r3:2,r7:6) +; CHECK-NEXT: p1 = vcmph.eq(r5:4,r7:6) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: p2 = vcmph.eq(r5:4,r7:6) +; CHECK-NEXT: p2 = vcmph.eq(r3:2,r7:6) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: p0 = or(p2,or(p0,!p1)) +; CHECK-NEXT: p0 = or(p0,p1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = or(p0,!p2) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r1:0 = mask(p0) @@ -1712,13 +1718,16 @@ ; CHECK-NEXT: p0 = vcmpb.eq(r1:0,r7:6) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: p1 = vcmpb.eq(r3:2,r7:6) +; CHECK-NEXT: p1 = vcmpb.eq(r5:4,r7:6) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: p2 = vcmpb.eq(r5:4,r7:6) +; CHECK-NEXT: p2 = vcmpb.eq(r3:2,r7:6) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: p0 = and(p2,and(p0,!p1)) +; CHECK-NEXT: p0 = and(p0,p1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = and(p0,!p2) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r1:0 = mask(p0) @@ -1826,13 +1835,16 @@ ; CHECK-NEXT: p0 = vcmpb.eq(r1:0,r7:6) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: p1 = vcmpb.eq(r3:2,r7:6) +; CHECK-NEXT: p1 = vcmpb.eq(r5:4,r7:6) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: p2 = vcmpb.eq(r5:4,r7:6) +; CHECK-NEXT: p2 = vcmpb.eq(r3:2,r7:6) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: p0 = or(p2,or(p0,!p1)) +; CHECK-NEXT: p0 = or(p0,p1) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: p0 = or(p0,!p2) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r1:0 = mask(p0) diff --git a/llvm/test/CodeGen/X86/v8i1-masks.ll b/llvm/test/CodeGen/X86/v8i1-masks.ll --- a/llvm/test/CodeGen/X86/v8i1-masks.ll +++ b/llvm/test/CodeGen/X86/v8i1-masks.ll @@ -1402,9 +1402,9 @@ ; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2 ; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X86-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3 -; X86-NEXT: vandps %ymm3, %ymm2, %ymm2 +; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm4 +; X86-NEXT: vandps %ymm4, %ymm3, %ymm3 +; X86-NEXT: vandps %ymm2, %ymm3, %ymm2 ; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3 ; X86-NEXT: vxorps %ymm1, %ymm3, %ymm1 ; X86-NEXT: vxorps %ymm2, %ymm1, %ymm1 @@ -1419,9 +1419,9 @@ ; X64-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 ; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X64-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X64-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 -; X64-NEXT: vandps %ymm3, %ymm2, %ymm2 +; X64-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm4 +; X64-NEXT: vandps %ymm4, %ymm3, %ymm3 +; X64-NEXT: vandps %ymm2, %ymm3, %ymm2 ; X64-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 ; X64-NEXT: vxorps %ymm1, %ymm3, %ymm1 ; X64-NEXT: vxorps %ymm2, %ymm1, %ymm1 @@ -1437,10 +1437,10 @@ ; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 ; X86-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X86-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X86-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 +; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X86-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm4 +; X86-AVX2-NEXT: vandps %ymm4, %ymm3, %ymm3 +; X86-AVX2-NEXT: vandps %ymm2, %ymm3, %ymm2 ; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] ; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 ; X86-AVX2-NEXT: vxorps %ymm1, %ymm3, %ymm1 @@ -1458,10 +1458,10 @@ ; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 ; X64-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3 ; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X64-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 -; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] -; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 -; X64-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2 +; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1] +; X64-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm4 +; X64-AVX2-NEXT: vandps %ymm4, %ymm3, %ymm3 +; X64-AVX2-NEXT: vandps %ymm2, %ymm3, %ymm2 ; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1] ; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3 ; X64-AVX2-NEXT: vxorps %ymm1, %ymm3, %ymm1 @@ -1476,8 +1476,8 @@ ; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0 ; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 -; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} ; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} +; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1} ; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k2 ; X86-AVX512-NEXT: kxorw %k0, %k2, %k0 ; X86-AVX512-NEXT: kxorw %k1, %k0, %k0 @@ -1492,8 +1492,8 @@ ; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 ; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1 -; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} ; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} +; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} ; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k2 ; X64-AVX512-NEXT: kxorw %k0, %k2, %k0 ; X64-AVX512-NEXT: kxorw %k1, %k0, %k0