diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5326,6 +5326,21 @@ return DAG.getNode(HandOpcode, DL, VT, Logic); } + // For funnel shifts FSHL/FSHR: + // logic_op (OP x, x1, s), (OP y, y1, s) --> + // --> OP (logic_op x, y), (logic_op, x1, y1), s + if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) && + N0.getOperand(2) == N1.getOperand(2)) { + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + SDValue X1 = N0.getOperand(1); + SDValue Y1 = N1.getOperand(1); + SDValue S = N0.getOperand(2); + SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y); + SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1); + return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S); + } + // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) // Only perform this optimization up until type legalization, before // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by diff --git a/llvm/test/CodeGen/X86/funnel-shift-logic-fold.ll b/llvm/test/CodeGen/X86/funnel-shift-logic-fold.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/funnel-shift-logic-fold.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64 + +declare i64 @llvm.fshl.i64(i64, i64, i64) nounwind readnone +declare i64 @llvm.fshr.i64(i64, i64, i64) nounwind readnone + +define i64 @hoist_fshl_from_or(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind { +; X64-LABEL: hoist_fshl_from_or: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: orq %rcx, %rsi +; X64-NEXT: orq %rdx, %rax +; X64-NEXT: movl %r8d, %ecx +; X64-NEXT: shldq %cl, %rsi, %rax +; X64-NEXT: retq + %fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %s) + %fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 %s) + %res = or i64 %fshl.0, %fshl.1 + ret i64 %res +} + +define i64 @hoist_fshl_from_and(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind { +; X64-LABEL: hoist_fshl_from_and: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andq %rcx, %rsi +; X64-NEXT: andq %rdx, %rax +; X64-NEXT: movl %r8d, %ecx +; X64-NEXT: shldq %cl, %rsi, %rax +; X64-NEXT: retq + %fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %s) + %fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 %s) + %res = and i64 %fshl.0, %fshl.1 + ret i64 %res +} + +define i64 @hoist_fshl_from_xor(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind { +; X64-LABEL: hoist_fshl_from_xor: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorq %rcx, %rsi +; X64-NEXT: xorq %rdx, %rax +; X64-NEXT: movl %r8d, %ecx +; X64-NEXT: shldq %cl, %rsi, %rax +; X64-NEXT: retq + %fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %s) + %fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 %s) + %res = xor i64 %fshl.0, %fshl.1 + ret i64 %res +} + +define i64 @fshl_or_with_different_shift_value(i64 %a, i64 %b, i64 %c, i64 %d) nounwind { +; X64-LABEL: fshl_or_with_different_shift_value: +; X64: # %bb.0: +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shldq $12, %rsi, %rdi +; X64-NEXT: shldq $13, %rcx, %rax +; X64-NEXT: orq %rdi, %rax +; X64-NEXT: retq + %fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 12) + %fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 13) + %res = or i64 %fshl.0, %fshl.1 + ret i64 %res +} + +define i64 @hoist_fshl_from_or_const_shift(i64 %a, i64 %b, i64 %c, i64 %d) nounwind { +; X64-LABEL: hoist_fshl_from_or_const_shift: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: orq %rcx, %rsi +; X64-NEXT: orq %rdx, %rax +; X64-NEXT: shldq $15, %rsi, %rax +; X64-NEXT: retq + %fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 15) + %fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 15) + %res = or i64 %fshl.0, %fshl.1 + ret i64 %res +} + +define i64 @hoist_fshr_from_or(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind { +; X64-LABEL: hoist_fshr_from_or: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: orq %rdx, %rdi +; X64-NEXT: orq %rcx, %rax +; X64-NEXT: movl %r8d, %ecx +; X64-NEXT: shrdq %cl, %rdi, %rax +; X64-NEXT: retq + %fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %s) + %fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 %s) + %res = or i64 %fshr.0, %fshr.1 + ret i64 %res +} + +define i64 @hoist_fshr_from_and(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind { +; X64-LABEL: hoist_fshr_from_and: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: andq %rdx, %rdi +; X64-NEXT: andq %rcx, %rax +; X64-NEXT: movl %r8d, %ecx +; X64-NEXT: shrdq %cl, %rdi, %rax +; X64-NEXT: retq + %fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %s) + %fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 %s) + %res = and i64 %fshr.0, %fshr.1 + ret i64 %res +} + +define i64 @hoist_fshr_from_xor(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind { +; X64-LABEL: hoist_fshr_from_xor: +; X64: # %bb.0: +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: xorq %rdx, %rdi +; X64-NEXT: xorq %rcx, %rax +; X64-NEXT: movl %r8d, %ecx +; X64-NEXT: shrdq %cl, %rdi, %rax +; X64-NEXT: retq + %fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %s) + %fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 %s) + %res = xor i64 %fshr.0, %fshr.1 + ret i64 %res +} + +define i64 @fshr_or_with_different_shift_value(i64 %a, i64 %b, i64 %c, i64 %d) nounwind { +; X64-LABEL: fshr_or_with_different_shift_value: +; X64: # %bb.0: +; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shldq $52, %rsi, %rdi +; X64-NEXT: shldq $51, %rcx, %rax +; X64-NEXT: orq %rdi, %rax +; X64-NEXT: retq + %fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 12) + %fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 13) + %res = or i64 %fshr.0, %fshr.1 + ret i64 %res +} + +define i64 @hoist_fshr_from_or_const_shift(i64 %a, i64 %b, i64 %c, i64 %d) nounwind { +; X64-LABEL: hoist_fshr_from_or_const_shift: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: orq %rcx, %rsi +; X64-NEXT: orl %edx, %eax +; X64-NEXT: shldq $49, %rsi, %rax +; X64-NEXT: retq + %fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 15) + %fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 15) + %res = or i64 %fshr.0, %fshr.1 + ret i64 %res +} diff --git a/llvm/test/CodeGen/X86/icmp-shift-opt.ll b/llvm/test/CodeGen/X86/icmp-shift-opt.ll --- a/llvm/test/CodeGen/X86/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/X86/icmp-shift-opt.ll @@ -25,12 +25,11 @@ ; X86-NEXT: adcl $0, %esi ; X86-NEXT: adcl $0, %edx ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: shldl $4, %edx, %ebx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: orl %ecx, %ebx ; X86-NEXT: movl %esi, %ebp -; X86-NEXT: orl %ecx, %ebp -; X86-NEXT: shrdl $28, %edx, %ebp ; X86-NEXT: orl %ebx, %ebp +; X86-NEXT: shrdl $28, %ebx, %ebp ; X86-NEXT: jne .LBB0_1 ; X86-NEXT: # %bb.2: # %exit ; X86-NEXT: movl %edi, (%eax) @@ -73,19 +72,15 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_srl_eq_zero: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shrdl $17, %ecx, %eax -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: shldl $15, %edx, %esi -; X86-NEXT: orl %esi, %eax -; X86-NEXT: shrdl $17, %edx, %ecx -; X86-NEXT: orl %eax, %ecx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: shldl $15, %edx, %eax ; X86-NEXT: sete %al -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_srl_eq_zero: @@ -102,19 +97,15 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_srl_ne_zero: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shrdl $17, %ecx, %eax -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: shldl $15, %edx, %esi -; X86-NEXT: orl %esi, %eax -; X86-NEXT: shrdl $17, %edx, %ecx -; X86-NEXT: orl %eax, %ecx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: shldl $15, %edx, %eax ; X86-NEXT: setne %al -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_srl_ne_zero: @@ -131,19 +122,13 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_shl_eq_zero: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shldl $17, %edx, %esi -; X86-NEXT: orl %eax, %edx -; X86-NEXT: shldl $17, %ecx, %edx -; X86-NEXT: shldl $17, %eax, %ecx -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: orl %ecx, %edx +; X86-NEXT: shll $17, %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl %ecx, %eax ; X86-NEXT: sete %al -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_shl_eq_zero: @@ -160,19 +145,13 @@ define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_shl_ne_zero: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shldl $17, %edx, %esi -; X86-NEXT: orl %eax, %edx -; X86-NEXT: shldl $17, %ecx, %edx -; X86-NEXT: shldl $17, %eax, %ecx -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: orl %ecx, %edx +; X86-NEXT: shll $17, %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl %ecx, %eax ; X86-NEXT: setne %al -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_shl_ne_zero: @@ -243,13 +222,11 @@ ; X86-LABEL: opt_setcc_expanded_shl_correct_shifts: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl %eax, %edx -; X86-NEXT: shldl $17, %ecx, %edx +; X86-NEXT: orl %eax, %ecx ; X86-NEXT: shldl $17, %eax, %ecx -; X86-NEXT: orl %edx, %ecx ; X86-NEXT: sete %al ; X86-NEXT: retl ;