diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6710,6 +6710,24 @@ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1); } + auto peekThroughZext = [](SDValue V) { + if (V->getOpcode() == ISD::ZERO_EXTEND) + return V->getOperand(0); + return V; + }; + + // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y + if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL && + N0.getOperand(0) == N1.getOperand(0) && + peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1))) + return N0; + + // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y + if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL && + N0.getOperand(1) == N1.getOperand(0) && + peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1))) + return N0; + return SDValue(); } diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -472,14 +472,12 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) { ; CHECK-LABEL: or_shl_fshl_simplify: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: mvn w9, w2 -; CHECK-NEXT: lsr w10, w0, #1 -; CHECK-NEXT: lsr w9, w10, w9 -; CHECK-NEXT: lsl w8, w1, w8 +; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-NEXT: mvn w8, w2 +; CHECK-NEXT: lsr w9, w0, #1 ; CHECK-NEXT: lsl w10, w1, w2 -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: orr w0, w8, w10 +; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: orr w0, w10, w8 ; CHECK-NEXT: ret %shy = shl i32 %y, %s %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s) @@ -490,14 +488,12 @@ define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) { ; CHECK-LABEL: or_lshr_fshr_simplify: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: mvn w9, w2 -; CHECK-NEXT: lsl w10, w0, #1 -; CHECK-NEXT: lsr w8, w1, w8 -; CHECK-NEXT: lsl w9, w10, w9 +; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-NEXT: mvn w8, w2 +; CHECK-NEXT: lsl w9, w0, #1 ; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: orr w0, w10, w8 +; CHECK-NEXT: lsl w8, w9, w8 +; CHECK-NEXT: orr w0, w8, w10 ; CHECK-NEXT: ret %shy = lshr i32 %y, %s %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s) diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll --- a/llvm/test/CodeGen/RISCV/rv32zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll @@ -3370,7 +3370,6 @@ ; RV32I-NEXT: srli a0, a0, 1 ; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: or_shl_fshl_simplify: @@ -3380,7 +3379,6 @@ ; RV32ZBP-NEXT: srli a0, a0, 1 ; RV32ZBP-NEXT: srl a0, a0, a2 ; RV32ZBP-NEXT: or a0, a1, a0 -; RV32ZBP-NEXT: or a0, a0, a1 ; RV32ZBP-NEXT: ret %shy = shl i32 %y, %s %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s) @@ -3396,7 +3394,6 @@ ; RV32I-NEXT: slli a0, a0, 1 ; RV32I-NEXT: sll a0, a0, a2 ; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: or_lshr_fshr_simplify: @@ -3406,7 +3403,6 @@ ; RV32ZBP-NEXT: slli a0, a0, 1 ; RV32ZBP-NEXT: sll a0, a0, a2 ; RV32ZBP-NEXT: or a0, a0, a1 -; RV32ZBP-NEXT: or a0, a1, a0 ; RV32ZBP-NEXT: ret %shy = lshr i32 %y, %s %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s) diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll --- a/llvm/test/CodeGen/X86/funnel-shift.ll +++ b/llvm/test/CodeGen/X86/funnel-shift.ll @@ -1260,25 +1260,18 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind { ; X86-SSE2-LABEL: or_shl_fshl_simplify: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pushl %esi -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl %eax, %esi -; X86-SSE2-NEXT: shll %cl, %esi ; X86-SSE2-NEXT: shldl %cl, %edx, %eax -; X86-SSE2-NEXT: orl %esi, %eax -; X86-SSE2-NEXT: popl %esi ; X86-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: or_shl_fshl_simplify: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: movl %edx, %ecx ; X64-AVX2-NEXT: movl %esi, %eax -; X64-AVX2-NEXT: shll %cl, %eax ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-AVX2-NEXT: shldl %cl, %edi, %esi -; X64-AVX2-NEXT: orl %esi, %eax +; X64-AVX2-NEXT: shldl %cl, %edi, %eax ; X64-AVX2-NEXT: retq %shy = shl i32 %y, %s %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s) @@ -1289,25 +1282,18 @@ define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) nounwind { ; X86-SSE2-LABEL: or_lshr_fshr_simplify: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pushl %esi -; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movl %eax, %esi -; X86-SSE2-NEXT: shrl %cl, %esi ; X86-SSE2-NEXT: shrdl %cl, %edx, %eax -; X86-SSE2-NEXT: orl %esi, %eax -; X86-SSE2-NEXT: popl %esi ; X86-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: or_lshr_fshr_simplify: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: movl %edx, %ecx ; X64-AVX2-NEXT: movl %esi, %eax -; X64-AVX2-NEXT: shrl %cl, %eax ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-AVX2-NEXT: shrdl %cl, %edi, %esi -; X64-AVX2-NEXT: orl %esi, %eax +; X64-AVX2-NEXT: shrdl %cl, %edi, %eax ; X64-AVX2-NEXT: retq %shy = lshr i32 %y, %s %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)