Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2083,6 +2083,20 @@ if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef())) return BinaryOperator::CreateLShr(Op1, ConstantExpr::getSub(WidthC, ShAmtC)); + + // try fshl((A & B), (C & D), E) -> lshr/shl + Value *A, *C; + const APInt *B, *D; + if (match(II->getArgOperand(0), m_c_And(m_Value(A), m_APInt(B))) && + match(II->getArgOperand(1), m_c_And(m_Value(C), m_APInt(D))) && + A == C) { + APInt BAndD = (*B & *D), E = dyn_cast(ShAmtC)->getValue(); + if (BAndD.countLeadingZeros() >= E.getLimitedValue()) + return BinaryOperator::CreateShl(Op0, ShAmtC); + if (BAndD.countTrailingZeros() >= BitWidth - E.getLimitedValue()) + return BinaryOperator::CreateLShr( + Op1, ConstantInt::get(II->getContext(), APInt(BitWidth, BitWidth) - E)); + } } // The shift amount (operand 2) of a funnel shift is modulo the bitwidth, Index: test/Transforms/InstCombine/fsh.ll =================================================================== --- test/Transforms/InstCombine/fsh.ll +++ test/Transforms/InstCombine/fsh.ll @@ -558,4 +558,41 @@ %r = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 8) ret i16 %r } - +define i32 @fshl_mask_args_same1(i32 %a) { +; CHECK-LABEL: @fshl_mask_args_same1( +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[A:%.*]], 16 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %tmp1 = and i32 %a, 4294901760 ; 0xffff0000 + %tmp2 = call i32 @llvm.fshl.i32(i32 %tmp1, i32 %tmp1, i32 16) + ret i32 %tmp2 +} +define i32 @fshl_mask_args_same2(i32 %a) { +; CHECK-LABEL: @fshl_mask_args_same2( +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A:%.*]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 65280 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %tmp1 = and i32 %a, 255 + %tmp2 = call i32 @llvm.fshl.i32(i32 %tmp1, i32 %tmp1, i32 8) + ret i32 %tmp2 +} +define i32 @fshl_mask_args_same_3(i32 %a) { +; CHECK-LABEL: @fshl_mask_args_same_3( +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[A:%.*]], 24 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %tmp1 = and i32 %a, 255 + %tmp2 = call i32 @llvm.fshl.i32(i32 %tmp1, i32 %tmp1, i32 24) + ret i32 %tmp2 +} +define i32 @fshl_mask_args_same4(i32 %a) { +; CHECK-LABEL: @fshl_mask_args_same4( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A:%.*]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 16711680 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %tmp1 = and i32 %a, 4278190080 ; 0xff000000 + %tmp2 = call i32 @llvm.fshl.i32(i32 %tmp1, i32 %tmp1, i32 24) + ret i32 %tmp2 +}