Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7127,14 +7127,41 @@ N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1))) return IsFSHL ? N0 : N1; - // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth) if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) { + EVT ShAmtTy = N2.getValueType(); + + // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth) if (Cst->getAPIntValue().uge(BitWidth)) { uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth); return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1, - DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType())); + DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy)); } - } + + unsigned ShAmt = Cst->getZExtValue(); + if (ShAmt == 0) + return IsFSHL ? N0 : N1; + + // fold fshl(undef, N1, C) -> lshr(N1, BW-C) + // fold fshr(undef, N1, C) -> lshr(N1, C) + // fold fshl(N0, undef, C) -> lshr(N0, C) + // fold fshr(N0, undef, C) -> lshr(N0, BW-C) + if (N0.isUndef()) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, + DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt, + SDLoc(N), ShAmtTy)); + if (N1.isUndef()) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, + DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, + SDLoc(N), ShAmtTy)); + } + + // fold fshr(undef, N1, N2) -> lshr(N1, N2) + // fold fshl(N0, undef, N2) -> shl(N0, N2) + // TODO: when is it worth doing SUB(BW, N2) as well? + if (N0.isUndef() && !IsFSHL) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2); + if (N1.isUndef() && IsFSHL) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2); // fold (fshl N0, N0, N2) -> (rotl N0, N2) // fold (fshr N0, N0, N2) -> (rotr N0, N2) Index: test/CodeGen/X86/funnel-shift.ll =================================================================== --- test/CodeGen/X86/funnel-shift.ll +++ test/CodeGen/X86/funnel-shift.ll @@ -382,12 +382,13 @@ ; X32-SSE2-LABEL: fshl_i32_undef0_cst: ; X32-SSE2: # %bb.0: ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: shldl $9, %eax, %eax +; X32-SSE2-NEXT: shrl $23, %eax ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshl_i32_undef0_cst: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: shldl $9, %edi, %eax +; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: shrl $23, %eax ; X64-AVX2-NEXT: retq %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 9) ret i32 %res @@ -398,7 +399,7 @@ ; X32-SSE2: # %bb.0: ; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: shldl %cl, %eax, %eax +; X32-SSE2-NEXT: shll %cl, %eax ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshl_i32_undef1: @@ -406,7 +407,7 @@ ; X64-AVX2-NEXT: movl %esi, %ecx ; X64-AVX2-NEXT: movl %edi, %eax ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-AVX2-NEXT: shldl %cl, %eax, %eax +; X64-AVX2-NEXT: shll %cl, %eax ; X64-AVX2-NEXT: retq %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %a1) ret i32 %res @@ -416,13 +417,13 @@ ; X32-SSE2-LABEL: fshl_i32_undef1_cst: ; X32-SSE2: # %bb.0: ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: shldl $9, %eax, %eax +; X32-SSE2-NEXT: shll $9, %eax ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshl_i32_undef1_cst: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: movl %edi, %eax -; X64-AVX2-NEXT: shldl $9, %eax, %eax +; X64-AVX2-NEXT: shll $9, %eax ; X64-AVX2-NEXT: retq %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 9) ret i32 %res @@ -433,7 +434,7 @@ ; X32-SSE2: # %bb.0: ; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: shrdl %cl, %eax, %eax +; X32-SSE2-NEXT: shrl %cl, %eax ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshr_i32_undef0: @@ -441,7 +442,7 @@ ; X64-AVX2-NEXT: movl %esi, %ecx ; X64-AVX2-NEXT: movl %edi, %eax ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-AVX2-NEXT: shrdl %cl, %eax, %eax +; X64-AVX2-NEXT: shrl %cl, %eax ; X64-AVX2-NEXT: retq %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %a1) ret i32 %res @@ -451,13 +452,13 @@ ; X32-SSE2-LABEL: fshr_i32_undef0_cst: ; X32-SSE2: # %bb.0: ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: shrdl $9, %eax, %eax +; X32-SSE2-NEXT: shrl $9, %eax ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshr_i32_undef0_cst: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: movl %edi, %eax -; X64-AVX2-NEXT: shrdl $9, %eax, %eax +; X64-AVX2-NEXT: shrl $9, %eax ; X64-AVX2-NEXT: retq %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 9) ret i32 %res @@ -485,12 +486,13 @@ ; X32-SSE2-LABEL: fshr_i32_undef1_cst: ; X32-SSE2: # %bb.0: ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: shrdl $9, %eax, %eax +; X32-SSE2-NEXT: shll $23, %eax ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshr_i32_undef1_cst: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: shrdl $9, %edi, %eax +; X64-AVX2-NEXT: movl %edi, %eax +; X64-AVX2-NEXT: shll $23, %eax ; X64-AVX2-NEXT: retq %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 9) ret i32 %res