Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2014,6 +2014,51 @@ return nullptr; } +// Fold redundant masking. +// Let C3 = C1 shft C2 +// (X & C1) | ((X shft C2) & C3) -> (X & C1) | ((X & C1) shft C2) +// TODO: If the canonical form of AND before any type of shift +// is taken, then this function can be removed. +static bool foldRedundantMask(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + const APInt *AndC, *ShAndC, *ShftAmt; + Value *X; + BinaryOperator *And, *Shift, *DeadAnd; + // 1st: Check for desired pattern + if (!match( + &I, + m_c_Or(m_CombineAnd( + m_And(m_OneUse(m_CombineAnd(m_Shift(m_Value(X), m_APInt(ShftAmt)), + m_BinOp(Shift))), + m_APInt(ShAndC)), m_BinOp(DeadAnd)), + m_CombineAnd(m_And(m_Deferred(X), m_APInt(AndC)), m_BinOp(And))))) + return false; + + const Instruction::BinaryOps Opcode = Shift->getOpcode(); + const auto DoShift = ShftAmt->getZExtValue(); + + // 2nd: Check if the masks match + if (Instruction::Shl == Opcode) { + if (!(*ShAndC == AndC->shl(DoShift) || *AndC == ShAndC->lshr(DoShift))) + return false; + + } else if (Instruction::LShr == Opcode) { + if (!(*ShAndC == AndC->lshr(DoShift) || *AndC == ShAndC->shl(DoShift))) + return false; + + } else if (*ShAndC != AndC->ashr(ShftAmt->getZExtValue())) { + if (ShAndC->countLeadingZeros() < DoShift || *AndC != ShAndC->shl(DoShift)) + // TODO: If the value X is known to have sign bit 0, it behaves as lshr + return false; + } + + // 3rd: Shift the result of the AND + Value *NewShift = + Builder.CreateBinOp(Opcode, And, ConstantInt::get(I.getType(), *ShftAmt)); + DeadAnd->replaceAllUsesWith(NewShift); + return true; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -2156,6 +2201,9 @@ return replaceInstUsesWith(I, V); if (Value *V = matchSelectFromAndOr(D, B, C, A, Builder)) return replaceInstUsesWith(I, V); + + if (C1 && C2 && foldRedundantMask(I, Builder)) + return &I; } } Index: test/Transforms/InstCombine/FoldRedundantShiftedMasks.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/FoldRedundantShiftedMasks.ll @@ -0,0 +1,183 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine %s -o - | FileCheck %s + +; Fold redundant masking operations of shifted value +; In a case where +; x1 = a & 0xFF00 +; x2 = a >> 8 & 0xFF +; we can see x2 as: +; x2 = a >> 8 & 0xFF00 >> 8 +; that can be translated to +; x2 = (a & 0xFF00) >> 8 +; that is +; x2 = x1 >> 8 + + +define i32 @shl(i16 %a) { +; CHECK-LABEL: @shl( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[A:%.*]], 172 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i16 [[TMP0]], 8 +; CHECK-NEXT: [[TMP42:%.*]] = or i16 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP42]] to i32 +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %tmp0 = sext i16 %a to i32 + %tmp1 = shl i32 %tmp0, 8 + %tmp2 = and i32 %tmp0, 172 + %tmp3 = and i32 %tmp1, 44032 + %tmp4 = or i32 %tmp2, %tmp3 + ret i32 %tmp4 +} + +define i32 @lshr(i16 %a) { +; CHECK-LABEL: @lshr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[A:%.*]], -21504 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[TMP3]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %tmp0 = sext i16 %a to i32 + %tmp1 = lshr i32 %tmp0, 8 + %tmp2 = and i32 %tmp1, 172 + %tmp3 = and i32 %tmp0, 44032 + %tmp4 = or i32 %tmp2, %tmp3 + ret i32 %tmp4 +} + +define i32 @ashr(i16 %a) { +; CHECK-LABEL: @ashr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[A:%.*]], -21504 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[TMP3]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %tmp0 = sext i16 %a to i32 + %tmp3 = and i32 %tmp0, 44032 + %tmp2 = ashr i32 %tmp0, 8 + %tmp1 = and i32 %tmp2, 172 + %tmp4 = or i32 %tmp3, %tmp1 + ret i32 %tmp4 +} + +define i32 @ashr2(i16 %a) { +; CHECK-LABEL: @ashr2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[A:%.*]], -256 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[TMP3]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %tmp0 = sext i16 %a to i32 + %tmp3 = and i32 %tmp0, 65280 + %tmp2 = ashr i32 %tmp0, 8 + %tmp1 = and i32 %tmp2, 255 + %tmp4 = or i32 %tmp3, %tmp1 + ret i32 %tmp4 +} + +define i16 @ashr3(i16 %a) { +; CHECK-LABEL: @ashr3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[A:%.*]], -256 +; CHECK-NEXT: [[TMP0:%.*]] = lshr i16 [[A]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = or i16 [[TMP1]], [[TMP0]] +; CHECK-NEXT: ret i16 [[TMP4]] +; +entry: + %tmp1 = and i16 %a, 65280 + %tmp2 = ashr i16 %a, 8 + %tmp3 = and i16 %tmp2, 255 + %tmp4 = or i16 %tmp1, %tmp3 + ret i16 %tmp4 +} + +define i32 @ashr_nogood(i16 %a) { +; CHECK-LABEL: @ashr_nogood( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = sext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 65280 +; CHECK-NEXT: [[TMP2:%.*]] = ashr i32 [[TMP0]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 65535 +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %tmp0 = sext i16 %a to i32 + %tmp1 = and i32 %tmp0, 65280 + %tmp2 = ashr i32 %tmp0, 8 + %tmp3 = and i32 %tmp2, 65535 + %tmp4 = or i32 %tmp1, %tmp3 + ret i32 %tmp4 +} + +define i32 @shl_nogood(i16 %a) { +; CHECK-LABEL: @shl_nogood( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = sext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 172 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 44032 +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %tmp0 = sext i16 %a to i32 + %tmp1 = and i32 %tmp0, 172 + %tmp2 = shl i32 %tmp0, %tmp1 + %tmp3 = and i32 %tmp2, 44032 + %tmp4 = or i32 %tmp1, %tmp3 + ret i32 %tmp4 +} + +define void @foo(i16* %a) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMPA:%.*]] = load i16, i16* [[A:%.*]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[TMPA]], -256 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i16 [[TMPA]], 8 +; CHECK-NEXT: [[OR:%.*]] = or i16 [[TMP1]], [[TMP0]] +; CHECK-NEXT: store i16 [[OR]], i16* [[A]], align 2 +; CHECK-NEXT: ret void +; +entry: + %tmpA = load i16, i16* %a, align 2 + %conv = sext i16 %tmpA to i32 + %and = and i32 %conv, 65280 + %tmp1 = lshr i32 %conv, 8 + %and3 = and i32 %tmp1, 255 + %or = or i32 %and3, %and + %conv4 = trunc i32 %or to i16 + store i16 %conv4, i16* %a, align 2 + ret void +} + +; Check that all uses of x2 are replaced +define i32 @mulUses(i32 %a) { +; CHECK-LABEL: @mulUses( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[A:%.*]], 44032 +; CHECK-NEXT: [[TMP0:%.*]] = lshr exact i32 [[TMP3]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP0]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw i32 [[TMP5]], [[TMP0]] +; CHECK-NEXT: ret i32 [[TMP6]] +; +entry: + %tmp1 = lshr i32 %a, 8 + %x2 = and i32 %tmp1, 172 + %tmp3 = and i32 %a, 44032 + %tmp4 = or i32 %x2, %tmp3 + %tmp5 = add i32 %tmp4, %x2 + %tmp6 = mul i32 %tmp5, %x2 + ret i32 %tmp6 +}