Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2083,6 +2083,27 @@ if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef())) return BinaryOperator::CreateLShr(Op1, ConstantExpr::getSub(WidthC, ShAmtC)); + + // try pruning masked right or left + const APInt *B; + if (match(Op0, m_c_And(m_Value(), m_APInt(B)))) { + APInt D = dyn_cast(ShAmtC)->getValue(); + uint64_t DLV = D.getLimitedValue(); + assert(DLV != 0 && DLV != UINT64_MAX && + "Funnel shift by simple constant not reduced"); + if (B->countTrailingZeros() >= BitWidth - DLV) + return BinaryOperator::CreateLShr( + Op1, ConstantInt::get(II->getContext(), APInt(BitWidth, BitWidth) - D)); + } + if (match(Op1, m_c_And(m_Value(), m_APInt(B)))) { + APInt D = dyn_cast(ShAmtC)->getValue(); + uint64_t DLV = D.getLimitedValue(); + assert(DLV != 0 && DLV != UINT64_MAX && + "Funnel shift by simple constant not reduced"); + if (B->countLeadingZeros() >= DLV) + return BinaryOperator::CreateShl( + Op0, ConstantInt::get(II->getContext(), D)); + } } // The shift amount (operand 2) of a funnel shift is modulo the bitwidth, Index: test/Transforms/InstCombine/fsh.ll =================================================================== --- test/Transforms/InstCombine/fsh.ll +++ test/Transforms/InstCombine/fsh.ll @@ -558,4 +558,42 @@ %r = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 8) ret i16 %r } - +define i32 @fshl_mask_args_same1(i32 %a) { +; CHECK-LABEL: @fshl_mask_args_same1( +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[A:%.*]], 16 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %tmp1 = and i32 %a, 4294901760 ; 0xffff0000 + %tmp2 = call i32 @llvm.fshl.i32(i32 %tmp1, i32 %tmp1, i32 16) + ret i32 %tmp2 +} +define i32 @fshl_mask_args_same2(i32 %a) { +; CHECK-LABEL: @fshl_mask_args_same2( +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A:%.*]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 65280 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %tmp1 = and i32 %a, 255 + %tmp2 = call i32 @llvm.fshl.i32(i32 %tmp1, i32 %tmp1, i32 8) + ret i32 %tmp2 +} +define i32 @fshl_mask_args_same_3(i32 %a) { +; CHECK-LABEL: @fshl_mask_args_same_3( +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[A:%.*]], 24 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %tmp1 = and i32 %a, 255 + %tmp2 = call i32 @llvm.fshl.i32(i32 %tmp1, i32 %tmp1, i32 24) + ret i32 %tmp2 +} +define i32 @fshl_mask_args_same5(i32 %a) { +; CHECK-LABEL: @fshl_mask_args_same5( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A:%.*]], 15 +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP1]], 130560 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %tmp2 = and i32 %a, 4294901760 ; 0xfffff00f + %tmp1 = and i32 %a, 4278190080 ; 0xff00f00f + %tmp3 = call i32 @llvm.fshl.i32(i32 %tmp2, i32 %tmp1, i32 17) + ret i32 %tmp3 +}