Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2021,6 +2021,56 @@ if (Value *V = SimplifyOrInst(I.getOperand(0), I.getOperand(1), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); + { // Fold redundant masking. + // If we have or ( and x, c1 ), (and ( sh[lr] x, c2), c3) + // and c3 == sh[lr] c + const APInt *AndC, *ShAndC, *ShftAmt; + Value *X1, *X2; + if (match(&I, m_Or(m_c_And(m_Value(X1), m_APInt(AndC)), + m_c_And(m_OneUse(m_Shift(m_Value(X2), m_APInt(ShftAmt))), + m_APInt(ShAndC)))) && + (X1 == X2)) { + BinaryOperator *And = cast(I.getOperand(0)); + BinaryOperator *ShtAnd = cast(I.getOperand(1)); + BinaryOperator *Shift = cast( + isa(ShtAnd->getOperand(0)) ? ShtAnd->getOperand(0) + : ShtAnd->getOperand(1)); + const Instruction::BinaryOps Opcode = Shift->getOpcode(); + APInt OtherMask; + if (Instruction::Shl == Opcode) + OtherMask = AndC->shl(ShftAmt->getZExtValue()); + else if (Instruction::LShr == Opcode) + OtherMask = AndC->lshr(ShftAmt->getZExtValue()); + + if (OtherMask == *ShAndC) { + Value *NewShift = Builder.CreateBinOp( + Opcode, And, ConstantInt::get(I.getType(), *ShftAmt)); + return BinaryOperator::CreateOr(And, NewShift); + } + } + if (match(&I, m_Or(m_c_And(m_OneUse(m_Shift(m_Value(X2), m_APInt(ShftAmt))), + m_APInt(ShAndC)), + m_c_And(m_Value(X1), m_APInt(AndC)))) && + (X1 == X2)) { + BinaryOperator *And = cast(I.getOperand(1)); + BinaryOperator *ShtAnd = cast(I.getOperand(0)); + BinaryOperator *Shift = cast( + isa(ShtAnd->getOperand(0)) ? ShtAnd->getOperand(0) + : ShtAnd->getOperand(1)); + const Instruction::BinaryOps Opcode = Shift->getOpcode(); + APInt OtherMask; + if (Instruction::Shl == Opcode) + OtherMask = AndC->shl(ShftAmt->getZExtValue()); + else if (Instruction::LShr == Opcode) + OtherMask = AndC->lshr(ShftAmt->getZExtValue()); + + if (OtherMask == *ShAndC) { + Value *NewShift = Builder.CreateBinOp( + Opcode, And, ConstantInt::get(I.getType(), *ShftAmt)); + return BinaryOperator::CreateOr(And, NewShift); + } + } + } if (SimplifyAssociativeOrCommutative(I)) return &I; Index: test/Transforms/InstCombine/FoldRedundantShiftedMasks.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/FoldRedundantShiftedMasks.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine %s -o - | FileCheck %s + +; https://reviews.llvm.org/D48278 +; Fold redundant masking operations of shifted value +; In a case where +; x1 = a & 0xFF +; x2 = a << 8 & 0xFF00 +; we can see x2 as: +; x2 = a << 8 & 0xFF << 8 +; that can be translated to +; x2 = (a & 0xFF) << 8 +; that is +; x2 = x1 << 8 + + +define i32 @shl(i16 %a) { +; CHECK-LABEL: @shl( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[A:%.*]], 172 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i16 [[TMP0]], 8 +; CHECK-NEXT: [[TMP42:%.*]] = or i16 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP42]] to i32 +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %tmp0 = sext i16 %a to i32 + %tmp1 = shl i32 %tmp0, 8 + %tmp2 = and i32 %tmp0, 172 + %tmp3 = and i32 %tmp1, 44032 + %tmp4 = or i32 %tmp2, %tmp3 + ret i32 %tmp4 +} + +define i32 @lshr(i16 %a) { +; CHECK-LABEL: @lshr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[A:%.*]], -21504 +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[TMP3]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %tmp0 = sext i16 %a to i32 + %tmp1 = lshr i32 %tmp0, 8 + %tmp2 = and i32 %tmp1, 172 + %tmp3 = and i32 %tmp0, 44032 + %tmp4 = or i32 %tmp2, %tmp3 + ret i32 %tmp4 +} + +define i32 @ashr(i16 %a) { +; CHECK-LABEL: @ashr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[A:%.*]], -21504 +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP3]] +; +entry: + %tmp0 = sext i16 %a to i32 + %tmp1 = and i32 %tmp0, 44032 + %tmp2 = ashr i32 %tmp0, 8 + %tmp3 = and i32 %tmp2, 172 + %tmp4 = or i32 %tmp1, %tmp3 + ret i32 %tmp4 +} + +define i32 @shl_nogood(i16 %a) { +; CHECK-LABEL: @shl_nogood( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = sext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 172 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 44032 +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %tmp0 = sext i16 %a to i32 + %tmp1 = and i32 %tmp0, 172 + %tmp2 = shl i32 %tmp0, %tmp1 + %tmp3 = and i32 %tmp2, 44032 + %tmp4 = or i32 %tmp1, %tmp3 + ret i32 %tmp4 +} + +define void @foo(i16* %a) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP0]], -256 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i16 [[TMP0]], 8 +; CHECK-NEXT: [[OR:%.*]] = or i16 [[TMP1]], [[TMP2]] +; CHECK-NEXT: store i16 [[OR]], i16* [[A]], align 2 +; CHECK-NEXT: ret void +; +entry: + %tmp0 = load i16, i16* %a, align 2 + %conv = sext i16 %tmp0 to i32 + %and = and i32 %conv, 65280 + %tmp1 = lshr i32 %conv, 8 + %and3 = and i32 %tmp1, 255 + %or = or i32 %and3, %and + %conv4 = trunc i32 %or to i16 + store i16 %conv4, i16* %a, align 2 + ret void +} +