Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2014,6 +2014,48 @@ return nullptr; } +// Fold redundant masking. +// Let C3 = C1 shft C2 +// (X & C1) | ((X shft C2) & C3) -> (X & C1) | ((X & C1) shft C2) +// TODO: If the canonical form of AND before any type of shift +// is taken, then this function can be removed. +static Instruction *foldRedundantMask(BinaryOperator &I, Value *A, Value *B, + InstCombiner::BuilderTy &Builder) { + const APInt *AndC, *ShAndC, *ShftAmt; + Value *X; + // 1st: Check for desired pattern + if (match(&I, m_c_Or(m_And(m_OneUse(m_Shift(m_Value(X), m_APInt(ShftAmt))), + m_APInt(ShAndC)), + m_And(m_Deferred(X), m_APInt(AndC))))) { + // 2nd: Find out which AND is which + BinaryOperator *And = cast(A), *Shift, *ShtAnd; + if (And->getOperand(0) != X) { + ShtAnd = And; + And = cast(B); + } else + ShtAnd = cast(B); + // 3rd: Perform shift of the mask + Shift = cast(ShtAnd->getOperand(0)); + const Instruction::BinaryOps Opcode = Shift->getOpcode(); + APInt OtherMask; + if (Instruction::Shl == Opcode) + OtherMask = AndC->shl(ShftAmt->getZExtValue()); + else if (Instruction::LShr == Opcode) + OtherMask = AndC->lshr(ShftAmt->getZExtValue()); + else + OtherMask = AndC->ashr(ShftAmt->getZExtValue()); + // TODO: Special case for ashr: if X is known to + // be non negative, it behaves as a lshr + + if (OtherMask == *ShAndC) { + Value *NewShift = Builder.CreateBinOp( + Opcode, And, ConstantInt::get(I.getType(), *ShftAmt)); + return BinaryOperator::CreateOr(And, NewShift); + } + } + return nullptr; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -2156,6 +2198,10 @@ return replaceInstUsesWith(I, V); if (Value *V = matchSelectFromAndOr(D, B, C, A, Builder)) return replaceInstUsesWith(I, V); + + if (C1 && C2) + if (Instruction *N = foldRedundantMask(I, Op0, Op1, Builder)) + return N; } } Index: test/Transforms/InstCombine/FoldRedundantShiftedMasks.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/FoldRedundantShiftedMasks.ll @@ -0,0 +1,110 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine %s -o - | FileCheck %s + +; Fold redundant masking operations of shifted value +; In a case where +; x1 = a & 0xFF00 +; x2 = a >> 8 & 0xFF +; we can see x2 as: +; x2 = a >> 8 & 0xFF00 >> 8 +; that can be translated to +; x2 = (a & 0xFF00) >> 8 +; that is +; x2 = x1 >> 8 + + +define i32 @shl(i16 %a) { +; CHECK-LABEL: @shl( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[A:%.*]], 172 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i16 [[TMP0]], 8 +; CHECK-NEXT: [[TMP42:%.*]] = or i16 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP42]] to i32 +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %tmp0 = sext i16 %a to i32 + %tmp1 = shl i32 %tmp0, 8 + %tmp2 = and i32 %tmp0, 172 + %tmp3 = and i32 %tmp1, 44032 + %tmp4 = or i32 %tmp2, %tmp3 + ret i32 %tmp4 +} + +define i32 @lshr(i16 %a) { +; CHECK-LABEL: @lshr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[A:%.*]], -21504 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[TMP3]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %tmp0 = sext i16 %a to i32 + %tmp1 = lshr i32 %tmp0, 8 + %tmp2 = and i32 %tmp1, 172 + %tmp3 = and i32 %tmp0, 44032 + %tmp4 = or i32 %tmp2, %tmp3 + ret i32 %tmp4 +} + +define i32 @ashr(i16 %a) { +; CHECK-LABEL: @ashr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[A:%.*]], -21504 +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP3]] +; +entry: + %tmp0 = sext i16 %a to i32 + %tmp1 = and i32 %tmp0, 44032 + %tmp2 = ashr i32 %tmp0, 8 + %tmp3 = and i32 %tmp2, 172 + %tmp4 = or i32 %tmp1, %tmp3 + ret i32 %tmp4 +} + +define i32 @shl_nogood(i16 %a) { +; CHECK-LABEL: @shl_nogood( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = sext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 172 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 44032 +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %tmp0 = sext i16 %a to i32 + %tmp1 = and i32 %tmp0, 172 + %tmp2 = shl i32 %tmp0, %tmp1 + %tmp3 = and i32 %tmp2, 44032 + %tmp4 = or i32 %tmp1, %tmp3 + ret i32 %tmp4 +} + +define void @foo(i16* %a) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP0]], -256 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i16 [[TMP0]], 8 +; CHECK-NEXT: [[OR:%.*]] = or i16 [[TMP1]], [[TMP2]] +; CHECK-NEXT: store i16 [[OR]], i16* [[A]], align 2 +; CHECK-NEXT: ret void +; +entry: + %tmp0 = load i16, i16* %a, align 2 + %conv = sext i16 %tmp0 to i32 + %and = and i32 %conv, 65280 + %tmp1 = lshr i32 %conv, 8 + %and3 = and i32 %tmp1, 255 + %or = or i32 %and3, %and + %conv4 = trunc i32 %or to i16 + store i16 %conv4, i16* %a, align 2 + ret void +} +