diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5728,6 +5728,31 @@ if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) return V; + // Recognize the following pattern: + // + // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask) + // + // where bitmask is a mask that clears the upper bits of AndVT. The + // number of bits in bitmask must be a power of two. + auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) { + if (LHS->getOpcode() != ISD::SIGN_EXTEND) + return false; + + auto *C = dyn_cast(RHS); + if (!C) + return false; + + if (!C->getAPIntValue().isMask( + LHS.getOperand(0).getValueType().getFixedSizeInBits())) + return false; + + return true; + }; + + // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...). + if (IsAndZeroExtMask(N0, N1)) + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); + return SDValue(); } diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll --- a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll @@ -343,8 +343,8 @@ ; ; CHECK-BE-LABEL: and_user: ; CHECK-BE: @ %bb.0: @ %entry -; CHECK-BE-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-BE-NEXT: .save {r4, r5, r6, lr} +; CHECK-BE-NEXT: push {r4, r5, r6, lr} ; CHECK-BE-NEXT: cmp r0, #1 ; CHECK-BE-NEXT: blt .LBB3_4 ; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader @@ -355,24 +355,23 @@ ; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB3_2: @ %for.body ; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: ldrsh lr, [r3, #2]! -; CHECK-BE-NEXT: ldrsh r5, [r2, #2]! -; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2] -; CHECK-BE-NEXT: ldrsh.w r7, [r2, #2] -; CHECK-BE-NEXT: uxth.w r6, lr -; CHECK-BE-NEXT: smlabb r5, r5, lr, r12 -; CHECK-BE-NEXT: smlabb r12, r7, r4, r5 +; CHECK-BE-NEXT: ldrh lr, [r3, #2]! +; CHECK-BE-NEXT: ldrsh r4, [r2, #2]! +; CHECK-BE-NEXT: ldrsh.w r5, [r3, #2] +; CHECK-BE-NEXT: ldrsh.w r6, [r2, #2] +; CHECK-BE-NEXT: smlabb r4, r4, lr, r12 +; CHECK-BE-NEXT: smlabb r12, r6, r5, r4 ; CHECK-BE-NEXT: subs r0, #1 -; CHECK-BE-NEXT: mul r1, r6, r1 +; CHECK-BE-NEXT: mul r1, lr, r1 ; CHECK-BE-NEXT: bne .LBB3_2 ; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-BE-NEXT: pop {r4, r5, r6, pc} ; CHECK-BE-NEXT: .LBB3_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-BE-NEXT: pop {r4, r5, r6, pc} entry: %cmp24 = icmp sgt i32 %arg, 0 br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/ARM/and-sext-combine.ll b/llvm/test/CodeGen/ARM/and-sext-combine.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/and-sext-combine.ll @@ -0,0 +1,29 @@ +; RUN: llc -mtriple=arm-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - -O3 \ +; RUN: -asm-verbose=0 | FileCheck %s + +; This tests exerts the folding of `VT = (and (sign_extend NarrowVT to +; VT) #bitmask)` into `VT = (zero_extend NarrowVT to VT)` when +; #bitmask value is the mask made by all ones that selects the value +; of type NarrowVT inside the value of type VT. The folding is +; implemented in `DAGCombiner::visitAND`. + +; With this the folding, the `and` of the "signed extended load" of +; `%b` in `f_i16_i32` is rendered as a zero extended load. + +; CHECK-LABEL: f_i16_i32: +; CHECK-NEXT: .fnstart +; CHECK-NEXT: ldrh r1, [r1] +; CHECK-NEXT: ldrsh r0, [r0] +; CHECK-NEXT: smulbb r0, r0, r1 +; CHECK-NEXT: mul r0, r0, r1 +; CHECK-NEXT: bx lr +define i32 @f_i16_i32(i16* %a, i16* %b) { + %1 = load i16, i16* %a, align 2 + %sext.1 = sext i16 %1 to i32 + %2 = load i16, i16* %b, align 2 + %sext.2 = sext i16 %2 to i32 + %masked = and i32 %sext.2, 65535 + %mul = mul nsw i32 %sext.2, %sext.1 + %count.next = mul i32 %mul, %masked + ret i32 %count.next +}