Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -449,0 +450 @@ + SDValue foldRedundantShiftedMasks(SDNode *N); @@ -4380,0 +4382,3 @@ + if (SDValue R = foldRedundantShiftedMasks(N)) + return R; + @@ -5946,0 +5951,102 @@ +// fold expressions x1 and x2 alike: +// x1 = ( and, x, 0x00FF ) +// x2 = (( shl x, 8 ) and 0xFF00 ) +// into +// x2 = shl x1, 8 ; reuse the computation of x1 +SDValue DAGCombiner::foldRedundantShiftedMasks(SDNode *AND) { + if (!AND) + return SDValue(); + + const SDValue &SHIFT = AND->getOperand(0); + if ((SHIFT.getNumOperands() != 2) || (!SHIFT.hasOneUse())) + return SDValue(); + + const ConstantSDNode *ShiftAmount = + dyn_cast(SHIFT.getOperand(1)); + if (!ShiftAmount) + return SDValue(); + + const ConstantSDNode *Mask = dyn_cast(AND->getOperand(1)); + if (!Mask) + return SDValue(); + + SDValue MASKED = SHIFT.getOperand(0); + const auto &MaskedValue = dyn_cast(MASKED); + unsigned N0Opcode = SHIFT.getOpcode(); + for (SDNode *OtherUser : MaskedValue->uses()) { + if ((&(*OtherUser) == ShiftAmount) || (OtherUser->getOpcode() != ISD::AND)) + continue; + + ConstantSDNode *OtherMask = + dyn_cast(OtherUser->getOperand(1)); + + if (!OtherMask) + continue; + + bool CanReduce = false; + + const APInt &MaskValue = Mask->getAPIntValue(); + const APInt &ShiftValue = ShiftAmount->getAPIntValue(); + const APInt &OtherMaskValue = OtherMask->getAPIntValue(); + + KnownBits MaskedValueBits; + DAG.computeKnownBits(MASKED, MaskedValueBits); + KnownBits ShiftedValueBits; + DAG.computeKnownBits(SHIFT, ShiftedValueBits); + + const APInt EffectiveOtherMask = OtherMaskValue & ~MaskedValueBits.Zero; + const APInt EffectiveMask = MaskValue & ~ShiftedValueBits.Zero; + + LLVM_DEBUG( + dbgs() << "\tValue being masked and shift-masked: "; MASKED.dump(); + dbgs() << "\t\tValue zero bits: 0x" + << MaskedValueBits.Zero.toString(16, false) + << "\n\n\t\tApplied mask: 0x" + << OtherMaskValue.toString(16, false) << " : "; + OtherUser->dump(); + dbgs() << "\t\tEffective mask: 0x" + << EffectiveOtherMask.toString(16, false) + << "\n\n\tShifted by: " << ShiftValue.getZExtValue() << " : "; + SHIFT.dump(); dbgs() << "\t\tAnd masked by: 0x" + << MaskValue.toString(16, false) << " : "; + AND->dump(); dbgs() << "\t\tEffective mask to shifted value: 0x" + << EffectiveMask.toString(16, false) << '\n';); + + switch (N0Opcode) { + case ISD::SHL: + CanReduce = (EffectiveOtherMask.shl(EffectiveMask) == EffectiveMask) || + (EffectiveMask.lshr(ShiftValue) == EffectiveOtherMask); + break; + case ISD::SRA: + if (!MaskedValueBits.Zero.isSignBitSet()) { + CanReduce = (EffectiveOtherMask.ashr(ShiftValue) == EffectiveMask); + break; + } else // Same as SRL + N0Opcode = ISD::SRL; + LLVM_FALLTHROUGH + /* fall-through */ + case ISD::SRL: + CanReduce = (EffectiveOtherMask.lshr(ShiftValue) == EffectiveMask) || + (EffectiveMask.shl(ShiftValue) == EffectiveOtherMask); + break; + case ISD::ROTR: + CanReduce = (EffectiveOtherMask.rotr(ShiftValue) == EffectiveMask); + break; + default: + return SDValue(); + } + if (CanReduce) { + LLVM_DEBUG(dbgs() << "\tCan just shift the masked value\n"); + + SDValue ShiftTheAND(OtherUser, 0); + const SDLoc DL(SHIFT); + EVT VT = AND->getValueType(0); + SDValue NewShift = + DAG.getNode(N0Opcode, DL, VT, ShiftTheAND, SHIFT.getOperand(1)); + AddToWorklist(OtherUser); + return NewShift; + } + } + return SDValue(); +} + Index: test/CodeGen/AArch64/FoldRedundantShiftedMasking.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/FoldRedundantShiftedMasking.ll @@ -0,0 +1,95 @@ +; RUN: llc -march=aarch64 < %s | FileCheck %s -check-prefix=A64 + +define i32 @ror(i32 %a) { +entry: + %m2 = and i32 %a, 3855 + %shl = shl i32 %a, 24 + %shr = lshr i32 %a, 8 + %or = or i32 %shl, %shr + %m1 = and i32 %or, 251658255 + %or2 = or i32 %m1, %m2 + ret i32 %or2 +} +; A64-LABEL: ror +; A64: mov [[R1:w[0-9]]], #3855 +; A64-NEXT: and [[R2:w[0-9]]], w0, [[R1]] +; A64-NEXT: orr [[R3:w[0-9]]], [[R1]], [[R1]], ror #8 + +define i32 @shl(i16 %a) { +entry: + %0 = sext i16 %a to i32 + %1 = and i32 %0, 172 + %2 = shl i32 %0, 8 + %3 = and i32 %2, 44032 + %4 = or i32 %1, %3 + ret i32 %4 +} +; A64-LABEL:shl: +; A64: mov w8, #172 +; A64-NEXT: and w8, w0, w8 +; A64-NEXT: orr w0, w8, w8, lsl #8 + +define i32 @lshr(i16 %a) { +entry: + %0 = sext i16 %a to i32 + %1 = and i32 %0, 44032 + %2 = lshr i32 %0, 8 + %3 = and i32 %2, 172 + %4 = or i32 %1, %3 + ret i32 %4 +} +; A64-LABEL:lshr: +; A64: mov w8, #44032 +; A64-NEXT: and w8, w0, w8 +; A64-NEXT: orr w0, w8, w8, lsr #8 + +define i32 @ashr(i16 %a) { +entry: + %0 = sext i16 %a to i32 + %1 = and i32 %0, 44032 + %2 = ashr i32 %0, 8 + %3 = and i32 %2, 172 + %4 = or i32 %1, %3 + ret i32 %4 +} +; A64-LABEL:ashr: +; A64: mov w8, #44032 +; A64-NEXT: and w8, w0, w8 +; A64-NEXT: orr w0, w8, w8, lsr #8 + + +define i32 @shl_nogood(i16 %a) { +entry: + %0 = sext i16 %a to i32 + %1 = and i32 %0, 172 + %2 = shl i32 %0, %1 + %3 = and i32 %2, 44032 + %4 = or i32 %1, %3 + ret i32 %4 +} + +define i32 @shl_nogood2(i16 %a) { +entry: + %0 = sext i16 %a to i32 + %1 = and i32 %0, 172 + %2 = shl i32 %0, 8 + %3 = and i32 %2, %0 + %4 = or i32 %1, %3 + ret i32 %4 +} +; A64-LABEL:shl_nogood: // @shl_nogood +; A64: sxth w8, w0 +; A64-NEXT: mov w9, #172 +; A64-NEXT: and w9, w8, w9 +; A64-NEXT: lsl w8, w8, w9 +; A64-NEXT: mov w10, #44032 +; A64-NEXT: and w8, w8, w10 +; A64-NEXT: orr w0, w9, w8 +; A64-NEXT: ret +; A64-LABEL:shl_nogood2: // @shl_nogood2 +; A64: sxth w8, w0 +; A64-NEXT: mov w9, #172 +; A64-NEXT: and w9, w8, w9 +; A64-NEXT: and w8, w8, w8, lsl #8 +; A64-NEXT: orr w0, w9, w8 +; A64-NEXT: ret Index: test/CodeGen/ARM/FoldRedundantShiftedMasking.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/FoldRedundantShiftedMasking.ll @@ -0,0 +1,98 @@ +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv4t-arm-none-eabi" + +; RUN: llc -march=arm < %s | FileCheck %s -check-prefix=ARM + +define i32 @ror(i32 %a) { +entry: + %m2 = and i32 %a, 3855 + %shl = shl i32 %a, 24 + %shr = lshr i32 %a, 8 + %or = or i32 %shl, %shr + %m1 = and i32 %or, 251658255 + %or2 = or i32 %m1, %m2 + ret i32 %or2 +} +; ARM-LABEL: ror +; ARM: mov [[R1:r[0-9]]], #15 +; ARM-NEXT: orr [[R2:r[0-9]]], [[R1]], #3840 +; ARM-NEXT: and [[R3:r[0-9]]], r0, [[R1]] +; ARM-NEXT: orr [[R4:r[0-9]]], [[R3]], [[R3]], ror #8 +; ARM-NEXT: mov pc, lr + +define i32 @shl(i16 %a) { +entry: + %0 = sext i16 %a to i32 + %1 = and i32 %0, 172 + %2 = shl i32 %0, 8 + %3 = and i32 %2, 44032 + %4 = or i32 %1, %3 + ret i32 %4 +} +; ARM-LABEL: shl: +; ARM: and r0, r0, #172 +; ARM-NEXT: orr r0, r0, r0, lsl #8 + +define i32 @lshr(i16 %a) { +entry: + %0 = sext i16 %a to i32 + %1 = and i32 %0, 44032 + %2 = lshr i32 %0, 8 + %3 = and i32 %2, 172 + %4 = or i32 %1, %3 + ret i32 %4 +} +; ARM-LABEL: lshr: +; ARM: and r0, r0, #44032 +; ARM-NEXT: orr r0, r0, r0, lsr #8 + +define i32 @ashr(i16 %a) { +entry: + %0 = sext i16 %a to i32 + %1 = and i32 %0, 44032 + %2 = ashr i32 %0, 8 + %3 = and i32 %2, 172 + %4 = or i32 %1, %3 + ret i32 %4 +} +; ARM-LABEL: ashr: +; ARM: and r0, r0, #44032 +; ARM-NEXT: orr r0, r0, r0, lsr #8 + +define i32 @shl_nogood(i16 %a) { +entry: + %0 = sext i16 %a to i32 + %1 = and i32 %0, 172 + %2 = shl i32 %0, %1 + %3 = and i32 %2, 44032 + %4 = or i32 %1, %3 + ret i32 %4 +} + +define i32 @shl_nogood2(i16 %a) { +entry: + %0 = sext i16 %a to i32 + %1 = and i32 %0, 172 + %2 = shl i32 %0, 8 + %3 = and i32 %2, %0 + %4 = or i32 %1, %3 + ret i32 %4 +} +; ARM-LABEL:shl_nogood: +; ARM: lsl r0, r0, #16 +; ARM-NEXT: mov r1, #172 +; ARM-NEXT: and r1, r1, r0, asr #16 +; ARM-NEXT: asr r0, r0, #16 +; ARM-NEXT: mov r2, #44032 +; ARM-NEXT: and r0, r2, r0, lsl r1 +; ARM-NEXT: orr r0, r1, r0 +; ARM-NEXT: mov pc, lr +; ARM-LABEL:shl_nogood2: +; ARM: lsl r0, r0, #16 +; ARM-NEXT: mov r1, #172 +; ARM-NEXT: asr r2, r0, #16 +; ARM-NEXT: and r1, r1, r0, asr #16 +; ARM-NEXT: lsl r2, r2, #8 +; ARM-NEXT: and r0, r2, r0, asr #16 +; ARM-NEXT: orr r0, r1, r0 +; ARM-NEXT: mov pc, lr Index: test/CodeGen/X86/pr32329.ll =================================================================== --- test/CodeGen/X86/pr32329.ll +++ test/CodeGen/X86/pr32329.ll @@ -32 +31,0 @@ -; X86-NEXT: movl obj, %edx @@ -38,6 +37,7 @@ -; X86-NEXT: andl $4194303, %edx # imm = 0x3FFFFF -; X86-NEXT: leal (%edx,%edx), %ebx -; X86-NEXT: subl %eax, %ebx -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: subl %esi, %edi -; X86-NEXT: imull %edi, %ecx +; X86-NEXT: movl $4194303, %edi # imm = 0x3FFFFF +; X86-NEXT: andl obj, %edi +; X86-NEXT: leal (%edi,%edi), %edx +; X86-NEXT: subl %eax, %edx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: subl %esi, %ebx +; X86-NEXT: imull %ebx, %ecx @@ -53 +53 @@ -; X86-NEXT: cmpl %edx, %edi +; X86-NEXT: cmpl %edi, %ebx @@ -57,2 +57,2 @@ -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: movb %bl, var_218 +; X86-NEXT: imull %eax, %edx +; X86-NEXT: movb %dl, var_218 @@ -71 +70,0 @@ -; X64-NEXT: movl {{.*}}(%rip), %eax @@ -77,2 +76,3 @@ -; X64-NEXT: andl $4194303, %eax # imm = 0x3FFFFF -; X64-NEXT: leal (%rax,%rax), %edi +; X64-NEXT: movl $4194303, %esi +; X64-NEXT: andl obj(%rip), %esi +; X64-NEXT: leal (%rsi,%rsi), %edi @@ -80,3 +80,3 @@ -; X64-NEXT: movl %edi, %esi -; X64-NEXT: subl %r8d, %esi -; X64-NEXT: imull %esi, %ecx +; X64-NEXT: movl %edi, %edx +; X64-NEXT: subl %r8d, %edx +; X64-NEXT: imull %edx, %ecx @@ -84 +84 @@ -; X64-NEXT: movl $9, %edx +; X64-NEXT: movl $9, %eax @@ -86,3 +86,3 @@ -; X64-NEXT: shlq %cl, %rdx -; X64-NEXT: movq %rdx, {{.*}}(%rip) -; X64-NEXT: cmpl %eax, %esi +; X64-NEXT: shlq %cl, %rax +; X64-NEXT: movq %rax, {{.*}}(%rip) +; X64-NEXT: cmpl %esi, %edx