diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5326,6 +5326,21 @@ return DAG.getNode(HandOpcode, DL, VT, Logic); } + // For funnel shifts FSHL/FSHR: + // logic_op (OP x, x1, s), (OP y, y1, s) --> + // --> OP (logic_op x, y), (logic_op, x1, y1), s + if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) && + N0.getOperand(2) == N1.getOperand(2)) { + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + SDValue X1 = N0.getOperand(1); + SDValue Y1 = N1.getOperand(1); + SDValue S = N0.getOperand(2); + SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y); + SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1); + return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S); + } + // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) // Only perform this optimization up until type legalization, before // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by diff --git a/llvm/test/CodeGen/X86/funnel-shift-logic-fold.ll b/llvm/test/CodeGen/X86/funnel-shift-logic-fold.ll --- a/llvm/test/CodeGen/X86/funnel-shift-logic-fold.ll +++ b/llvm/test/CodeGen/X86/funnel-shift-logic-fold.ll @@ -7,12 +7,11 @@ define i64 @hoist_fshl_from_or(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind { ; X64-LABEL: hoist_fshl_from_or: ; X64: # %bb.0: -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: orq %rcx, %rsi +; X64-NEXT: orq %rdx, %rax ; X64-NEXT: movl %r8d, %ecx -; X64-NEXT: shldq %cl, %rsi, %rdi -; X64-NEXT: shldq %cl, %rax, %rdx -; X64-NEXT: orq %rdi, %rdx -; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shldq %cl, %rsi, %rax ; X64-NEXT: retq %fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %s) %fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 %s) @@ -23,12 +22,11 @@ define i64 @hoist_fshl_from_and(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind { ; X64-LABEL: hoist_fshl_from_and: ; X64: # %bb.0: -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andq %rcx, %rsi +; X64-NEXT: andq %rdx, %rax ; X64-NEXT: movl %r8d, %ecx -; X64-NEXT: shldq %cl, %rsi, %rdi -; X64-NEXT: shldq %cl, %rax, %rdx -; X64-NEXT: andq %rdi, %rdx -; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shldq %cl, %rsi, %rax ; X64-NEXT: retq %fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %s) %fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 %s) @@ -39,12 +37,11 @@ define i64 @hoist_fshl_from_xor(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind { ; X64-LABEL: hoist_fshl_from_xor: ; X64: # %bb.0: -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: xorq %rcx, %rsi +; X64-NEXT: xorq %rdx, %rax ; X64-NEXT: movl %r8d, %ecx -; X64-NEXT: shldq %cl, %rsi, %rdi -; X64-NEXT: shldq %cl, %rax, %rdx -; X64-NEXT: xorq %rdi, %rdx -; X64-NEXT: movq %rdx, %rax +; X64-NEXT: shldq %cl, %rsi, %rax ; X64-NEXT: retq %fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %s) %fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 %s) @@ -69,10 +66,10 @@ define i64 @hoist_fshl_from_or_const_shift(i64 %a, i64 %b, i64 %c, i64 %d) nounwind { ; X64-LABEL: hoist_fshl_from_or_const_shift: ; X64: # %bb.0: -; X64-NEXT: movq %rdx, %rax -; X64-NEXT: shldq $15, %rsi, %rdi -; X64-NEXT: shldq $15, %rcx, %rax -; X64-NEXT: orq %rdi, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: orq %rcx, %rsi +; X64-NEXT: orq %rdx, %rax +; X64-NEXT: shldq $15, %rsi, %rax ; X64-NEXT: retq %fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 15) %fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 15) @@ -83,11 +80,11 @@ define i64 @hoist_fshr_from_or(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind { ; X64-LABEL: hoist_fshr_from_or: ; X64: # %bb.0: -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: orq %rdx, %rdi +; X64-NEXT: orq %rcx, %rax ; X64-NEXT: movl %r8d, %ecx -; X64-NEXT: shrdq %cl, %rdi, %rsi -; X64-NEXT: shrdq %cl, %rdx, %rax -; X64-NEXT: orq %rsi, %rax +; X64-NEXT: shrdq %cl, %rdi, %rax ; X64-NEXT: retq %fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %s) %fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 %s) @@ -98,11 +95,11 @@ define i64 @hoist_fshr_from_and(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind { ; X64-LABEL: hoist_fshr_from_and: ; X64: # %bb.0: -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: andq %rdx, %rdi +; X64-NEXT: andq %rcx, %rax ; X64-NEXT: movl %r8d, %ecx -; X64-NEXT: shrdq %cl, %rdi, %rsi -; X64-NEXT: shrdq %cl, %rdx, %rax -; X64-NEXT: andq %rsi, %rax +; X64-NEXT: shrdq %cl, %rdi, %rax ; X64-NEXT: retq %fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %s) %fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 %s) @@ -113,11 +110,11 @@ define i64 @hoist_fshr_from_xor(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind { ; X64-LABEL: hoist_fshr_from_xor: ; X64: # %bb.0: -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: xorq %rdx, %rdi +; X64-NEXT: xorq %rcx, %rax ; X64-NEXT: movl %r8d, %ecx -; X64-NEXT: shrdq %cl, %rdi, %rsi -; X64-NEXT: shrdq %cl, %rdx, %rax -; X64-NEXT: xorq %rsi, %rax +; X64-NEXT: shrdq %cl, %rdi, %rax ; X64-NEXT: retq %fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %s) %fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 %s) @@ -142,10 +139,10 @@ define i64 @hoist_fshr_from_or_const_shift(i64 %a, i64 %b, i64 %c, i64 %d) nounwind { ; X64-LABEL: hoist_fshr_from_or_const_shift: ; X64: # %bb.0: -; X64-NEXT: movq %rdx, %rax -; X64-NEXT: shldq $49, %rsi, %rdi -; X64-NEXT: shldq $49, %rcx, %rax -; X64-NEXT: orq %rdi, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: orq %rcx, %rsi +; X64-NEXT: orl %edx, %eax +; X64-NEXT: shldq $49, %rsi, %rax ; X64-NEXT: retq %fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 15) %fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 15) diff --git a/llvm/test/CodeGen/X86/icmp-shift-opt.ll b/llvm/test/CodeGen/X86/icmp-shift-opt.ll --- a/llvm/test/CodeGen/X86/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/X86/icmp-shift-opt.ll @@ -25,12 +25,11 @@ ; X86-NEXT: adcl $0, %esi ; X86-NEXT: adcl $0, %edx ; X86-NEXT: adcl $0, %ecx -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: shldl $4, %edx, %ebx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: orl %ecx, %ebx ; X86-NEXT: movl %esi, %ebp -; X86-NEXT: orl %ecx, %ebp -; X86-NEXT: shrdl $28, %edx, %ebp ; X86-NEXT: orl %ebx, %ebp +; X86-NEXT: shrdl $28, %ebx, %ebp ; X86-NEXT: jne .LBB0_1 ; X86-NEXT: # %bb.2: # %exit ; X86-NEXT: movl %edi, (%eax) @@ -73,19 +72,15 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_srl_eq_zero: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shrdl $17, %ecx, %eax -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: shldl $15, %edx, %esi -; X86-NEXT: orl %esi, %eax -; X86-NEXT: shrdl $17, %edx, %ecx -; X86-NEXT: orl %eax, %ecx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: shldl $15, %edx, %eax ; X86-NEXT: sete %al -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_srl_eq_zero: @@ -102,19 +97,15 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_srl_ne_zero: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shrdl $17, %ecx, %eax -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: shldl $15, %edx, %esi -; X86-NEXT: orl %esi, %eax -; X86-NEXT: shrdl $17, %edx, %ecx -; X86-NEXT: orl %eax, %ecx +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: orl %eax, %edx +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: shldl $15, %edx, %eax ; X86-NEXT: setne %al -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_srl_ne_zero: @@ -131,19 +122,13 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_shl_eq_zero: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shldl $17, %edx, %esi -; X86-NEXT: orl %eax, %edx -; X86-NEXT: shldl $17, %ecx, %edx -; X86-NEXT: shldl $17, %eax, %ecx -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: orl %ecx, %edx +; X86-NEXT: shll $17, %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl %ecx, %eax ; X86-NEXT: sete %al -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_shl_eq_zero: @@ -160,19 +145,13 @@ define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind { ; X86-LABEL: opt_setcc_shl_ne_zero: ; X86: # %bb.0: -; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: shldl $17, %edx, %esi -; X86-NEXT: orl %eax, %edx -; X86-NEXT: shldl $17, %ecx, %edx -; X86-NEXT: shldl $17, %eax, %ecx -; X86-NEXT: orl %esi, %ecx -; X86-NEXT: orl %ecx, %edx +; X86-NEXT: shll $17, %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl %ecx, %eax ; X86-NEXT: setne %al -; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: opt_setcc_shl_ne_zero: @@ -243,13 +222,11 @@ ; X86-LABEL: opt_setcc_expanded_shl_correct_shifts: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl %eax, %edx -; X86-NEXT: shldl $17, %ecx, %edx +; X86-NEXT: orl %eax, %ecx ; X86-NEXT: shldl $17, %eax, %ecx -; X86-NEXT: orl %edx, %ecx ; X86-NEXT: sete %al ; X86-NEXT: retl ;