Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1411,6 +1411,8 @@ if (Instruction *X = foldShuffledBinop(I)) return X; + if (Instruction *Shift = foldRedundantShiftedMasks(&I)) + return Shift; // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) @@ -2793,3 +2795,100 @@ return nullptr; } + +// fold expressions x1 and x2 alike: +// x1 = ( and, x, 0x00FF ) +// x2 = (( shl x, 8 ) and 0xFF00 ) +// into +// x2 = shl x1, 8 ; reuse the computation of x1 +Instruction *InstCombiner::foldRedundantShiftedMasks(BinaryOperator *AND) { + // 1st Check our desired pattern / structure + if (!AND || AND->getOpcode() != Instruction::And) + return nullptr; + + Instruction *SHIFT = dyn_cast(AND->getOperand(0)); + if (!SHIFT || (SHIFT->getNumOperands() != 2) || (!SHIFT->hasOneUse()) || + !SHIFT->isShift()) + return nullptr; + + unsigned N0Opcode = SHIFT->getOpcode(); + + ConstantInt *ShiftAmount = dyn_cast(SHIFT->getOperand(1)); + if (!ShiftAmount) + return nullptr; + + const APInt &ShiftValue = ShiftAmount->getValue(); + const ConstantInt *Mask = dyn_cast(AND->getOperand(1)); + if (!Mask) + return nullptr; + + const APInt &MaskValue = Mask->getValue(); + Value *MaskedValue = dyn_cast(SHIFT->getOperand(0)); + if (!MaskedValue || (MaskedValue->getNumUses() != 2)) + return nullptr; + + BinaryOperator *OtherUser = + dyn_cast(*MaskedValue->users().begin()); + + if (!OtherUser) + return nullptr; + + if (OtherUser == SHIFT) + OtherUser = + dyn_cast(*std::next(MaskedValue->users().begin())); + + if (!OtherUser || OtherUser == SHIFT || + OtherUser->getOpcode() != Instruction::And || + OtherUser->getParent() != SHIFT->getParent()) + return nullptr; + + ConstantInt *OtherMask = dyn_cast(OtherUser->getOperand(1)); + + if (!OtherMask) + return nullptr; + + const APInt &OtherMaskValue = OtherMask->getValue(); + + if (OtherMaskValue.getBitWidth() != MaskValue.getBitWidth()) + return nullptr; + + // 2nd Check if the masks and shifted masks match + switch (N0Opcode) { + case Instruction::Shl: + if (!(OtherMaskValue.shl(ShiftValue) == MaskValue || + MaskValue.lshr(ShiftValue) == OtherMaskValue)) + return nullptr; + break; + case Instruction::AShr: + if (!(OtherMaskValue.ashr(ShiftValue) == MaskValue)) + return nullptr; + break; + case Instruction::LShr: + if (!(OtherMaskValue.lshr(ShiftValue) == MaskValue || + MaskValue.shl(ShiftValue) == OtherMaskValue)) + return nullptr; + break; + default: + return nullptr; + } + + LLVM_DEBUG( + dbgs() << "\tValue being masked and shift-masked: "; MaskedValue->dump(); + dbgs() << "\t\tApplied mask: 0x" << OtherMaskValue.toString(16, false) + << " : "; + OtherUser->dump(); + dbgs() << "\n\n\tShifted by: " << ShiftValue.getZExtValue() << " : "; + SHIFT->dump(); dbgs() << "\t\tAnd masked by: 0x" + << MaskValue.toString(16, false) << " : "; + AND->dump(); dbgs() << "\n\tCan just shift the masked value from "; + OtherUser->dump();); + // 3rd If OtherUser (the new producer) runs after this SHIFT, then we must + // move it higher. + if (!DT.dominates(OtherUser, SHIFT)) + OtherUser->moveBefore(SHIFT); + + return BinaryOperator::Create((Instruction::BinaryOps)(N0Opcode), OtherUser, + ShiftAmount); + + return nullptr; +} Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -368,6 +368,7 @@ Instruction *visitICmpInst(ICmpInst &I); Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1, BinaryOperator &I); + Instruction *foldRedundantShiftedMasks(BinaryOperator *AND); Instruction *commonCastTransforms(CastInst &CI); Instruction *commonPointerCastTransforms(CastInst &CI); Instruction *visitTrunc(TruncInst &CI); Index: test/Transforms/AggressiveInstCombine/foldRedundantShiftedMasks.ll =================================================================== --- /dev/null +++ test/Transforms/AggressiveInstCombine/foldRedundantShiftedMasks.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine %s -o - | FileCheck %s + +; https://reviews.llvm.org/D48278 +; Fold redundant masking operations of shifted value +; In a case where +; x1 = a & 0xFF +; x2 = a << 8 & 0xFF00 +; we can see x2 as: +; x2 = a << 8 & 0xFF << 8 +; that can be translated to +; x2 = (a & 0xFF) << 8 +; that is +; x2 = x1 << 8 + + +define i32 @shl(i16 %a) { +; CHECK-LABEL: @shl( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[A:%.*]], 172 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i16 [[TMP0]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = or i16 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[TMP3]] +; +entry: + %0 = sext i16 %a to i32 + %1 = shl i32 %0, 8 + %2 = and i32 %0, 172 + %3 = and i32 %1, 44032 + %4 = or i32 %2, %3 + ret i32 %4 +} + +define i32 @lshr(i16 %a) { +; CHECK-LABEL: @lshr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[A:%.*]], -21504 +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP3]] +; +entry: + %0 = sext i16 %a to i32 + %1 = lshr i32 %0, 8 + %2 = and i32 %1, 172 + %3 = and i32 %0, 44032 + %4 = or i32 %2, %3 + ret i32 %4 +} + +define i32 @ashr(i16 %a) { +; CHECK-LABEL: @ashr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[A:%.*]], -21504 +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: ret i32 [[TMP3]] +; +entry: + %0 = sext i16 %a to i32 + %1 = and i32 %0, 44032 + %2 = ashr i32 %0, 8 + %3 = and i32 %2, 172 + %4 = or i32 %1, %3 + ret i32 %4 +} + +define i32 @shl_nogood(i16 %a) { +; CHECK-LABEL: @shl_nogood( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = sext i16 [[A:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 172 +; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 44032 +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %0 = sext i16 %a to i32 + %1 = and i32 %0, 172 + %2 = shl i32 %0, %1 + %3 = and i32 %2, 44032 + %4 = or i32 %1, %3 + ret i32 %4 +}