diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6630,8 +6630,11 @@ // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate // in direction shift1 by Neg. The range [0, EltSize) means that we only need // to consider shift amounts with defined behavior. +// +// The IsRotate flag should be set when the LHS of both shifts is the same. +// Otherwise if matching a general funnel shift, it should be clear. static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, - SelectionDAG &DAG) { + SelectionDAG &DAG, bool IsRotate) { // If EltSize is a power of 2 then: // // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) @@ -6663,8 +6666,11 @@ // always invokes undefined behavior for 32-bit X. // // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. + // + // NOTE: We can only do this when matching an AND and not a general + // funnel shift. unsigned MaskLoBits = 0; - if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { + if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0)); unsigned Bits = Log2_64(EltSize); @@ -6754,7 +6760,8 @@ // (srl x, (*ext y))) -> // (rotr x, y) or (rotl x, (sub 32, y)) EVT VT = Shifted.getValueType(); - if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { + if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG, + /*IsRotate*/ true)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg); @@ -6783,7 +6790,7 @@ // fold (or (shl x0, (*ext (sub 32, y))), // (srl x1, (*ext y))) -> // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y)) - if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) { + if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1, HasPos ? Pos : Neg); diff --git a/llvm/test/CodeGen/X86/shift-double.ll b/llvm/test/CodeGen/X86/shift-double.ll --- a/llvm/test/CodeGen/X86/shift-double.ll +++ b/llvm/test/CodeGen/X86/shift-double.ll @@ -480,23 +480,31 @@ ret i32 %sh } -; PR34641 - Masked Shift Counts +; These are not valid shld/shrd patterns. When the shift amount modulo +; the bitwidth is zero, the result should be an OR of both operands not a +; shift. -define i32 @shld_safe_i32(i32, i32, i32) { -; X86-LABEL: shld_safe_i32: +define i32 @not_shld_i32(i32, i32, i32) { +; X86-LABEL: not_shld_i32: ; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shldl %cl, %edx, %eax +; X86-NEXT: shll %cl, %edx +; X86-NEXT: negb %cl +; X86-NEXT: shrl %cl, %eax +; X86-NEXT: orl %edx, %eax ; X86-NEXT: retl ; -; X64-LABEL: shld_safe_i32: +; X64-LABEL: not_shld_i32: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %esi, %eax +; X64-NEXT: shll %cl, %edi +; X64-NEXT: negb %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shldl %cl, %esi, %eax +; X64-NEXT: shrl %cl, %eax +; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq %4 = and i32 %2, 31 %5 = shl i32 %0, %4 @@ -507,21 +515,27 @@ ret i32 %9 } -define i32 @shrd_safe_i32(i32, i32, i32) { -; X86-LABEL: shrd_safe_i32: +define i32 @not_shrd_i32(i32, i32, i32) { +; X86-LABEL: not_shrd_i32: ; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrdl %cl, %edx, %eax +; X86-NEXT: shrl %cl, %edx +; X86-NEXT: negb %cl +; X86-NEXT: shll %cl, %eax +; X86-NEXT: orl %edx, %eax ; X86-NEXT: retl ; -; X64-LABEL: shrd_safe_i32: +; X64-LABEL: not_shrd_i32: ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movl %esi, %eax +; X64-NEXT: shrl %cl, %edi +; X64-NEXT: negb %cl ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrdl %cl, %esi, %eax +; X64-NEXT: shll %cl, %eax +; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq %4 = and i32 %2, 31 %5 = lshr i32 %0, %4