Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1094,6 +1094,20 @@ return N00; } + if (TLI.isReassocProfitable(DAG, N0, N1)) { + // Reassociate if (op N00, N1) already exist + if (N1 != N01) + if (SDNode *ExistNode = + DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) + return DAG.getNode(Opc, DL, VT, SDValue(ExistNode, 0), N01); + + // Reassociate if (op N01, N1) already exist + if (N1 != N00) + if (SDNode *ExistNode = + DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) + return DAG.getNode(Opc, DL, VT, SDValue(ExistNode, 0), N00); + } + return SDValue(); } Index: llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll +++ llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll @@ -105,20 +105,17 @@ ; CHECK-NEXT: rdvl x8, #1 ; CHECK-NEXT: mov w9, #67108864 ; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: mov z1.d, x1 ; CHECK-NEXT: punpklo p1.h, p0.b +; CHECK-NEXT: uunpklo z3.d, z0.s ; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: mul x8, x8, x9 ; CHECK-NEXT: mov w9, #33554432 -; CHECK-NEXT: index z2.d, #0, x9 -; CHECK-NEXT: mov z3.d, x8 -; CHECK-NEXT: add z3.d, z2.d, z3.d -; CHECK-NEXT: add z2.d, z2.d, z1.d -; CHECK-NEXT: add z1.d, z3.d, z1.d -; CHECK-NEXT: uunpklo z3.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: st1b { z3.d }, p1, [x0, z2.d] -; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: index z1.d, x1, x9 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: st1b { z3.d }, p1, [x0, z1.d] +; CHECK-NEXT: add z2.d, z1.d, z2.d +; CHECK-NEXT: st1b { z0.d }, p0, [x0, z2.d] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 %t1 = shufflevector %t0, undef, zeroinitializer @@ -140,20 +137,17 @@ ; CHECK-NEXT: mov x9, #-2 ; CHECK-NEXT: lsr x8, x8, #4 ; CHECK-NEXT: movk x9, #64511, lsl #16 -; CHECK-NEXT: mov z1.d, x1 ; CHECK-NEXT: punpklo p1.h, p0.b +; CHECK-NEXT: uunpklo z3.d, z0.s ; CHECK-NEXT: mul x8, x8, x9 ; CHECK-NEXT: mov x9, #-33554433 ; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: index z2.d, #0, x9 -; CHECK-NEXT: mov z3.d, x8 -; CHECK-NEXT: add z3.d, z2.d, z3.d -; CHECK-NEXT: add z2.d, z2.d, z1.d -; CHECK-NEXT: add z1.d, z3.d, z1.d -; CHECK-NEXT: uunpklo z3.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: st1b { z3.d }, p1, [x0, z2.d] -; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: index z1.d, x1, x9 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: st1b { z3.d }, p1, [x0, z1.d] +; CHECK-NEXT: add z2.d, z1.d, z2.d +; CHECK-NEXT: st1b { z0.d }, p0, [x0, z2.d] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 %t1 = shufflevector %t0, undef, zeroinitializer @@ -174,20 +168,17 @@ ; CHECK-NEXT: rdvl x8, #1 ; CHECK-NEXT: mov x9, #-9223372036854775808 ; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: mov z1.d, x1 ; CHECK-NEXT: punpklo p1.h, p0.b +; CHECK-NEXT: uunpklo z3.d, z0.s ; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: mul x8, x8, x9 ; CHECK-NEXT: mov x9, #4611686018427387904 -; CHECK-NEXT: index z2.d, #0, x9 -; CHECK-NEXT: mov z3.d, x8 -; CHECK-NEXT: add z3.d, z2.d, z3.d -; CHECK-NEXT: add z2.d, z2.d, z1.d -; CHECK-NEXT: add z1.d, z3.d, z1.d -; CHECK-NEXT: uunpklo z3.d, z0.s ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: st1b { z3.d }, p1, [x0, z2.d] -; CHECK-NEXT: st1b { z0.d }, p0, [x0, z1.d] +; CHECK-NEXT: index z1.d, x1, x9 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: st1b { z3.d }, p1, [x0, z1.d] +; CHECK-NEXT: add z2.d, z1.d, z2.d +; CHECK-NEXT: st1b { z0.d }, p0, [x0, z2.d] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 %t1 = shufflevector %t0, undef, zeroinitializer Index: llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll +++ llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll @@ -1,10 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; This test used to crash -; GCN-LABEL: {{^}}xor3_i1_const: -; GCN: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], -1 -; GCN: s_xor_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], -1 define amdgpu_ps float @xor3_i1_const(float inreg %arg1, i32 inreg %arg2) { +; GCN-LABEL: xor3_i1_const: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: s_mov_b32 m0, s1 +; GCN-NEXT: v_mov_b32_e32 v1, 0x42640000 +; GCN-NEXT: v_cmp_nlt_f32_e64 s[2:3], s0, 0 +; GCN-NEXT: v_interp_p2_f32 v0, v0, attr0.x +; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v1 +; GCN-NEXT: v_cmp_gt_f32_e64 s[0:1], 0, v0 +; GCN-NEXT: s_or_b64 s[2:3], s[2:3], vcc +; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; GCN-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3] +; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s[0:1] +; GCN-NEXT: ; return to shader part epilog main_body: %tmp26 = fcmp nsz olt float %arg1, 0.000000e+00 %tmp28 = call nsz float @llvm.amdgcn.interp.p2(float undef, float undef, i32 0, i32 0, i32 %arg2) Index: llvm/test/CodeGen/X86/ctpop-combine.ll =================================================================== --- llvm/test/CodeGen/X86/ctpop-combine.ll +++ llvm/test/CodeGen/X86/ctpop-combine.ll @@ -162,33 +162,29 @@ define i1 @ctpop_trunc_non_power2(i255 %x) nounwind { ; CHECK-LABEL: ctpop_trunc_non_power2: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movabsq $9223372036854775807, %r8 # imm = 0x7FFFFFFFFFFFFFFF ; CHECK-NEXT: movq %rcx, %r9 ; CHECK-NEXT: andq %r8, %r9 -; CHECK-NEXT: movq %rdi, %r11 -; CHECK-NEXT: addq $-1, %r11 -; CHECK-NEXT: movq %rsi, %r10 -; CHECK-NEXT: adcq $-1, %r10 -; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: movq %rdi, %r10 +; CHECK-NEXT: addq $-1, %r10 +; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: adcq $-1, %rax -; CHECK-NEXT: movq %rcx, %rbx -; CHECK-NEXT: adcq %r8, %rbx -; CHECK-NEXT: andq %rdi, %r11 -; CHECK-NEXT: andq %rdx, %rax +; CHECK-NEXT: movq %rdx, %r11 +; CHECK-NEXT: adcq $-1, %r11 +; CHECK-NEXT: adcq %r8, %rcx +; CHECK-NEXT: andq %rdi, %r10 +; CHECK-NEXT: andq %rdx, %r11 +; CHECK-NEXT: orq %r10, %r11 +; CHECK-NEXT: andq %r9, %rcx +; CHECK-NEXT: andq %rsi, %rax +; CHECK-NEXT: orq %rcx, %rax ; CHECK-NEXT: orq %r11, %rax -; CHECK-NEXT: andq %rsi, %r10 -; CHECK-NEXT: andq %r8, %rbx -; CHECK-NEXT: andq %rcx, %rbx -; CHECK-NEXT: orq %r10, %rbx -; CHECK-NEXT: orq %rax, %rbx ; CHECK-NEXT: sete %cl ; CHECK-NEXT: orq %rdx, %rdi ; CHECK-NEXT: orq %rsi, %r9 ; CHECK-NEXT: orq %rdi, %r9 ; CHECK-NEXT: setne %al ; CHECK-NEXT: andb %cl, %al -; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq %a = call i255 @llvm.ctpop.i255(i255 %x) %b = trunc i255 %a to i8 ; largest value from ctpop is 255, fits in 8 bits. Index: llvm/test/CodeGen/X86/masked-iv-safe.ll =================================================================== --- llvm/test/CodeGen/X86/masked-iv-safe.ll +++ llvm/test/CodeGen/X86/masked-iv-safe.ll @@ -263,9 +263,10 @@ ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero ; CHECK-NEXT: mulsd %xmm2, %xmm3 ; CHECK-NEXT: movsd %xmm3, (%rdx) +; CHECK-NEXT: addq $2040, %rax # imm = 0x7F8 ; CHECK-NEXT: addq $-8, %rdx ; CHECK-NEXT: addq $134217720, %rcx # imm = 0x7FFFFF8 -; CHECK-NEXT: addq $2040, %rax # imm = 0x7F8 +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: jne .LBB5_1 ; CHECK-NEXT: # %bb.2: # %return ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/masked-iv-unsafe.ll =================================================================== --- llvm/test/CodeGen/X86/masked-iv-unsafe.ll +++ llvm/test/CodeGen/X86/masked-iv-unsafe.ll @@ -481,9 +481,10 @@ ; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero ; CHECK-NEXT: mulsd %xmm2, %xmm3 ; CHECK-NEXT: movsd %xmm3, (%rdx) +; CHECK-NEXT: addq $2040, %rax # imm = 0x7F8 ; CHECK-NEXT: addq $-8, %rdx ; CHECK-NEXT: addq $134217720, %rcx # imm = 0x7FFFFF8 -; CHECK-NEXT: addq $2040, %rax # imm = 0x7F8 +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: jne .LBB8_1 ; CHECK-NEXT: # %bb.2: # %return ; CHECK-NEXT: retq