Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -500,6 +500,13 @@ bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, EVT LoadResultTy, EVT &ExtVT); + /// Used by BackwardsPropagateMask to find suitable loads. + bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl &Loads, + ConstantSDNode *Mask, SDNode *&UncombinedNode); + /// Attempt to propagate a given AND node back to load leaves so that they + /// can be combined into narrow loads. + bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG); + /// Helper function for MergeConsecutiveStores which merges the /// component store chains. SDValue getMergeStoreChains(SmallVectorImpl &StoreNodes, @@ -3726,6 +3733,129 @@ return true; } +bool DAGCombiner::SearchForAndLoads(SDNode *N, + SmallPtrSetImpl &Loads, + ConstantSDNode *Mask, + SDNode *&NodeToMask) { + // Recursively search for the operands, looking for loads which can be + // narrowed. + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) { + SDValue Op = N->getOperand(i); + + // Constants should already be fixed up... + if (isa(Op)) + continue; + + if (!Op.hasOneUse() || Op.getValueType().isVector()) + return false; + + switch(Op.getOpcode()) { + default: + // Allow one node which will masked along with any loads found. + if (NodeToMask) + return false; + NodeToMask = Op.getNode(); + break; + case ISD::LOAD: { + auto *Load = cast(Op); + EVT ExtVT; + if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT)) { + Loads.insert(Load); + break; + } + return false; + } + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::AssertZext: { + unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + EVT VT = Op.getOpcode() == ISD::AssertZext ? + cast(Op.getOperand(1))->getVT() : + Op.getOperand(0).getValueType(); + + // We can accept extending nodes if the mask is wider or an equal + // width to the original type. + if (ExtVT.bitsGE(VT)) + break; + + // If the type is too large, we'll still have to perfrom a masking + // operation later. + if (NodeToMask) + return false; + + NodeToMask = Op.getNode(); + break; + } + case ISD::OR: + case ISD::XOR: + case ISD::AND: + if (!SearchForAndLoads(Op.getNode(), Loads, Mask, NodeToMask)) + return false; + break; + } + } + return true; +} + +static SDValue InsertAndMask(SDNode *N, SDValue Mask, SelectionDAG &DAG) { + // Insert an and node to mask off the higher bits. + SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0), + SDValue(N, 0), Mask); + SDValue N0 = SDValue(N, 0); + for (auto *User : N->uses()) { + // Skip the And that we just created. + if (User == And.getNode()) + continue; + + // Update the node to use the newly created mask. + SmallVector Ops; + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + SDValue Op = User->getOperand(i); + if (Op == N0) + Ops.push_back(And); + else + Ops.push_back(Op); + } + DAG.UpdateNodeOperands(User, Ops); + } + return And; +} + +bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { + auto *Mask = dyn_cast(N->getOperand(1)); + if (!Mask) + return false; + + if (!Mask->getAPIntValue().isMask()) + return false; + + // No need to do anything if the and directly uses a load. + if (isa(N->getOperand(0))) + return false; + + SmallPtrSet Loads; + SDNode *UncombinedNode = nullptr; + if (SearchForAndLoads(N, Loads, Mask, UncombinedNode)) { + if (Loads.size() == 0) + return false; + + if (UncombinedNode) + InsertAndMask(UncombinedNode, N->getOperand(1), DAG); + + for (auto *Load : Loads) { + SDValue And = InsertAndMask(Load, N->getOperand(1), DAG); + SDValue NewLoad = ReduceLoadWidth(And.getNode()); + assert(NewLoad && + "Shouldn't be masking the load if it can't be narrowed"); + CombineTo(Load, NewLoad, NewLoad.getValue(1)); + } + DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode()); + return true; + } + return false; +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3927,6 +4057,16 @@ } } + if (Level >= AfterLegalizeTypes) { + // Attempt to propagate the AND back up to the leaves which, if they're + // loads, can be combined to narrow loads and the AND node can be removed. + // Perform after legalization so that extend nodes will already be + // combined into the loads. + if (BackwardsPropagateMask(N, DAG)) { + return SDValue(N, 0); + } + } + if (SDValue Combined = visitANDLike(N0, N1, N)) return Combined; Index: test/CodeGen/ARM/and-load-combine.ll =================================================================== --- test/CodeGen/ARM/and-load-combine.ll +++ test/CodeGen/ARM/and-load-combine.ll @@ -5,34 +5,33 @@ ; RUN: llc -mtriple=thumbv8m.main %s -o - | FileCheck %s --check-prefix=THUMB2 define arm_aapcscc zeroext i1 @cmp_xor8_short_short(i16* nocapture readonly %a, + i16* nocapture readonly %b) { ; ARM-LABEL: cmp_xor8_short_short: ; ARM: @ BB#0: @ %entry -; ARM-NEXT: ldrh r0, [r0] -; ARM-NEXT: ldrh r1, [r1] -; ARM-NEXT: eor r1, r1, r0 +; ARM-NEXT: ldrb r2, [r0] ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: tst r1, #255 +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: teq r1, r2 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_xor8_short_short: ; ARMEB: @ BB#0: @ %entry -; ARMEB-NEXT: ldrh r0, [r0] -; ARMEB-NEXT: ldrh r1, [r1] -; ARMEB-NEXT: eor r1, r1, r0 +; ARMEB-NEXT: ldrb r2, [r0, #1] ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: tst r1, #255 +; ARMEB-NEXT: ldrb r1, [r1, #1] +; ARMEB-NEXT: teq r1, r2 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_xor8_short_short: ; THUMB1: @ BB#0: @ %entry -; THUMB1-NEXT: ldrh r0, [r0] -; THUMB1-NEXT: ldrh r2, [r1] +; THUMB1-NEXT: ldrb r0, [r0] +; THUMB1-NEXT: ldrb r2, [r1] ; THUMB1-NEXT: eors r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: lsls r2, r2, #24 +; THUMB1-NEXT: cmp r2, #0 ; THUMB1-NEXT: beq .LBB0_2 ; THUMB1-NEXT: @ BB#1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -41,15 +40,13 @@ ; ; THUMB2-LABEL: cmp_xor8_short_short: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: ldrh r0, [r0] -; THUMB2-NEXT: ldrh r1, [r1] -; THUMB2-NEXT: eors r0, r1 -; THUMB2-NEXT: lsls r0, r0, #24 -; THUMB2-NEXT: mov.w r0, #0 +; THUMB2-NEXT: ldrb r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: teq.w r1, r2 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr - i16* nocapture readonly %b) { entry: %0 = load i16, i16* %a, align 2 %1 = load i16, i16* %b, align 2 @@ -60,34 +57,33 @@ } define arm_aapcscc zeroext i1 @cmp_xor8_short_int(i16* nocapture readonly %a, + i32* nocapture readonly %b) { ; ARM-LABEL: cmp_xor8_short_int: ; ARM: @ BB#0: @ %entry -; ARM-NEXT: ldrh r0, [r0] -; ARM-NEXT: ldr r1, [r1] -; ARM-NEXT: eor r1, r1, r0 +; ARM-NEXT: ldrb r2, [r0] ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: tst r1, #255 +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: teq r1, r2 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_xor8_short_int: ; ARMEB: @ BB#0: @ %entry -; ARMEB-NEXT: ldrh r0, [r0] -; ARMEB-NEXT: ldr r1, [r1] -; ARMEB-NEXT: eor r1, r1, r0 +; ARMEB-NEXT: ldrb r2, [r0, #1] ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: tst r1, #255 +; ARMEB-NEXT: ldrb r1, [r1, #3] +; ARMEB-NEXT: teq r1, r2 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_xor8_short_int: ; THUMB1: @ BB#0: @ %entry -; THUMB1-NEXT: ldrh r0, [r0] -; THUMB1-NEXT: ldr r2, [r1] +; THUMB1-NEXT: ldrb r0, [r0] +; THUMB1-NEXT: ldrb r2, [r1] ; THUMB1-NEXT: eors r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: lsls r2, r2, #24 +; THUMB1-NEXT: cmp r2, #0 ; THUMB1-NEXT: beq .LBB1_2 ; THUMB1-NEXT: @ BB#1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -96,15 +92,13 @@ ; ; THUMB2-LABEL: cmp_xor8_short_int: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: ldrh r0, [r0] -; THUMB2-NEXT: ldr r1, [r1] -; THUMB2-NEXT: eors r0, r1 -; THUMB2-NEXT: lsls r0, r0, #24 -; THUMB2-NEXT: mov.w r0, #0 +; THUMB2-NEXT: ldrb r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: teq.w r1, r2 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr - i32* nocapture readonly %b) { entry: %0 = load i16, i16* %a, align 2 %conv = zext i16 %0 to i32 @@ -116,34 +110,33 @@ } define arm_aapcscc zeroext i1 @cmp_xor8_int_int(i32* nocapture readonly %a, + i32* nocapture readonly %b) { ; ARM-LABEL: cmp_xor8_int_int: ; ARM: @ BB#0: @ %entry -; ARM-NEXT: ldr r0, [r0] -; ARM-NEXT: ldr r1, [r1] -; ARM-NEXT: eor r1, r1, r0 +; ARM-NEXT: ldrb r2, [r0] ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: tst r1, #255 +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: teq r1, r2 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_xor8_int_int: ; ARMEB: @ BB#0: @ %entry -; ARMEB-NEXT: ldr r0, [r0] -; ARMEB-NEXT: ldr r1, [r1] -; ARMEB-NEXT: eor r1, r1, r0 +; ARMEB-NEXT: ldrb r2, [r0, #3] ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: tst r1, #255 +; ARMEB-NEXT: ldrb r1, [r1, #3] +; ARMEB-NEXT: teq r1, r2 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_xor8_int_int: ; THUMB1: @ BB#0: @ %entry -; THUMB1-NEXT: ldr r0, [r0] -; THUMB1-NEXT: ldr r2, [r1] +; THUMB1-NEXT: ldrb r0, [r0] +; THUMB1-NEXT: ldrb r2, [r1] ; THUMB1-NEXT: eors r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: lsls r2, r2, #24 +; THUMB1-NEXT: cmp r2, #0 ; THUMB1-NEXT: beq .LBB2_2 ; THUMB1-NEXT: @ BB#1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -152,15 +145,13 @@ ; ; THUMB2-LABEL: cmp_xor8_int_int: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: ldr r0, [r0] -; THUMB2-NEXT: ldr r1, [r1] -; THUMB2-NEXT: eors r0, r1 -; THUMB2-NEXT: lsls r0, r0, #24 -; THUMB2-NEXT: mov.w r0, #0 +; THUMB2-NEXT: ldrb r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: teq.w r1, r2 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr - i32* nocapture readonly %b) { entry: %0 = load i32, i32* %a, align 4 %1 = load i32, i32* %b, align 4 @@ -171,36 +162,33 @@ } define arm_aapcscc zeroext i1 @cmp_xor16(i32* nocapture readonly %a, + i32* nocapture readonly %b) { ; ARM-LABEL: cmp_xor16: ; ARM: @ BB#0: @ %entry -; ARM-NEXT: ldr r0, [r0] -; ARM-NEXT: movw r2, #65535 -; ARM-NEXT: ldr r1, [r1] -; ARM-NEXT: eor r1, r1, r0 +; ARM-NEXT: ldrh r2, [r0] ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: tst r1, r2 +; ARM-NEXT: ldrh r1, [r1] +; ARM-NEXT: teq r1, r2 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_xor16: ; ARMEB: @ BB#0: @ %entry -; ARMEB-NEXT: ldr r0, [r0] -; ARMEB-NEXT: movw r2, #65535 -; ARMEB-NEXT: ldr r1, [r1] -; ARMEB-NEXT: eor r1, r1, r0 +; ARMEB-NEXT: ldrh r2, [r0, #2] ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: tst r1, r2 +; ARMEB-NEXT: ldrh r1, [r1, #2] +; ARMEB-NEXT: teq r1, r2 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_xor16: ; THUMB1: @ BB#0: @ %entry -; THUMB1-NEXT: ldr r0, [r0] -; THUMB1-NEXT: ldr r2, [r1] +; THUMB1-NEXT: ldrh r0, [r0] +; THUMB1-NEXT: ldrh r2, [r1] ; THUMB1-NEXT: eors r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: lsls r2, r2, #16 +; THUMB1-NEXT: cmp r2, #0 ; THUMB1-NEXT: beq .LBB3_2 ; THUMB1-NEXT: @ BB#1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -209,15 +197,13 @@ ; ; THUMB2-LABEL: cmp_xor16: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: ldr r0, [r0] -; THUMB2-NEXT: ldr r1, [r1] -; THUMB2-NEXT: eors r0, r1 -; THUMB2-NEXT: lsls r0, r0, #16 -; THUMB2-NEXT: mov.w r0, #0 +; THUMB2-NEXT: ldrh r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrh r1, [r1] +; THUMB2-NEXT: teq.w r1, r2 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr - i32* nocapture readonly %b) { entry: %0 = load i32, i32* %a, align 4 %1 = load i32, i32* %b, align 4 @@ -228,34 +214,33 @@ } define arm_aapcscc zeroext i1 @cmp_or8_short_short(i16* nocapture readonly %a, + i16* nocapture readonly %b) { ; ARM-LABEL: cmp_or8_short_short: ; ARM: @ BB#0: @ %entry -; ARM-NEXT: ldrh r0, [r0] -; ARM-NEXT: ldrh r1, [r1] -; ARM-NEXT: orr r1, r1, r0 +; ARM-NEXT: ldrb r0, [r0] +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: orrs r0, r1, r0 ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: tst r1, #255 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_or8_short_short: ; ARMEB: @ BB#0: @ %entry -; ARMEB-NEXT: ldrh r0, [r0] -; ARMEB-NEXT: ldrh r1, [r1] -; ARMEB-NEXT: orr r1, r1, r0 +; ARMEB-NEXT: ldrb r0, [r0, #1] +; ARMEB-NEXT: ldrb r1, [r1, #1] +; ARMEB-NEXT: orrs r0, r1, r0 ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: tst r1, #255 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_or8_short_short: ; THUMB1: @ BB#0: @ %entry -; THUMB1-NEXT: ldrh r0, [r0] -; THUMB1-NEXT: ldrh r2, [r1] +; THUMB1-NEXT: ldrb r0, [r0] +; THUMB1-NEXT: ldrb r2, [r1] ; THUMB1-NEXT: orrs r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: lsls r2, r2, #24 +; THUMB1-NEXT: cmp r2, #0 ; THUMB1-NEXT: beq .LBB4_2 ; THUMB1-NEXT: @ BB#1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -264,15 +249,13 @@ ; ; THUMB2-LABEL: cmp_or8_short_short: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: ldrh r0, [r0] -; THUMB2-NEXT: ldrh r1, [r1] +; THUMB2-NEXT: ldrb r0, [r0] +; THUMB2-NEXT: ldrb r1, [r1] ; THUMB2-NEXT: orrs r0, r1 -; THUMB2-NEXT: lsls r0, r0, #24 ; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr - i16* nocapture readonly %b) { entry: %0 = load i16, i16* %a, align 2 %1 = load i16, i16* %b, align 2 @@ -283,34 +266,33 @@ } define arm_aapcscc zeroext i1 @cmp_or8_short_int(i16* nocapture readonly %a, + i32* nocapture readonly %b) { ; ARM-LABEL: cmp_or8_short_int: ; ARM: @ BB#0: @ %entry -; ARM-NEXT: ldrh r0, [r0] -; ARM-NEXT: ldr r1, [r1] -; ARM-NEXT: orr r1, r1, r0 +; ARM-NEXT: ldrb r0, [r0] +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: orrs r0, r1, r0 ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: tst r1, #255 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_or8_short_int: ; ARMEB: @ BB#0: @ %entry -; ARMEB-NEXT: ldrh r0, [r0] -; ARMEB-NEXT: ldr r1, [r1] -; ARMEB-NEXT: orr r1, r1, r0 +; ARMEB-NEXT: ldrb r0, [r0, #1] +; ARMEB-NEXT: ldrb r1, [r1, #3] +; ARMEB-NEXT: orrs r0, r1, r0 ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: tst r1, #255 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_or8_short_int: ; THUMB1: @ BB#0: @ %entry -; THUMB1-NEXT: ldrh r0, [r0] -; THUMB1-NEXT: ldr r2, [r1] +; THUMB1-NEXT: ldrb r0, [r0] +; THUMB1-NEXT: ldrb r2, [r1] ; THUMB1-NEXT: orrs r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: lsls r2, r2, #24 +; THUMB1-NEXT: cmp r2, #0 ; THUMB1-NEXT: beq .LBB5_2 ; THUMB1-NEXT: @ BB#1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -319,15 +301,13 @@ ; ; THUMB2-LABEL: cmp_or8_short_int: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: ldrh r0, [r0] -; THUMB2-NEXT: ldr r1, [r1] +; THUMB2-NEXT: ldrb r0, [r0] +; THUMB2-NEXT: ldrb r1, [r1] ; THUMB2-NEXT: orrs r0, r1 -; THUMB2-NEXT: lsls r0, r0, #24 ; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr - i32* nocapture readonly %b) { entry: %0 = load i16, i16* %a, align 2 %conv = zext i16 %0 to i32 @@ -339,34 +319,33 @@ } define arm_aapcscc zeroext i1 @cmp_or8_int_int(i32* nocapture readonly %a, + i32* nocapture readonly %b) { ; ARM-LABEL: cmp_or8_int_int: ; ARM: @ BB#0: @ %entry -; ARM-NEXT: ldr r0, [r0] -; ARM-NEXT: ldr r1, [r1] -; ARM-NEXT: orr r1, r1, r0 +; ARM-NEXT: ldrb r0, [r0] +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: orrs r0, r1, r0 ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: tst r1, #255 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_or8_int_int: ; ARMEB: @ BB#0: @ %entry -; ARMEB-NEXT: ldr r0, [r0] -; ARMEB-NEXT: ldr r1, [r1] -; ARMEB-NEXT: orr r1, r1, r0 +; ARMEB-NEXT: ldrb r0, [r0, #3] +; ARMEB-NEXT: ldrb r1, [r1, #3] +; ARMEB-NEXT: orrs r0, r1, r0 ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: tst r1, #255 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_or8_int_int: ; THUMB1: @ BB#0: @ %entry -; THUMB1-NEXT: ldr r0, [r0] -; THUMB1-NEXT: ldr r2, [r1] +; THUMB1-NEXT: ldrb r0, [r0] +; THUMB1-NEXT: ldrb r2, [r1] ; THUMB1-NEXT: orrs r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: lsls r2, r2, #24 +; THUMB1-NEXT: cmp r2, #0 ; THUMB1-NEXT: beq .LBB6_2 ; THUMB1-NEXT: @ BB#1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -375,15 +354,13 @@ ; ; THUMB2-LABEL: cmp_or8_int_int: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: ldr r0, [r0] -; THUMB2-NEXT: ldr r1, [r1] +; THUMB2-NEXT: ldrb r0, [r0] +; THUMB2-NEXT: ldrb r1, [r1] ; THUMB2-NEXT: orrs r0, r1 -; THUMB2-NEXT: lsls r0, r0, #24 ; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr - i32* nocapture readonly %b) { entry: %0 = load i32, i32* %a, align 4 %1 = load i32, i32* %b, align 4 @@ -394,36 +371,33 @@ } define arm_aapcscc zeroext i1 @cmp_or16(i32* nocapture readonly %a, + i32* nocapture readonly %b) { ; ARM-LABEL: cmp_or16: ; ARM: @ BB#0: @ %entry -; ARM-NEXT: ldr r0, [r0] -; ARM-NEXT: movw r2, #65535 -; ARM-NEXT: ldr r1, [r1] -; ARM-NEXT: orr r1, r1, r0 +; ARM-NEXT: ldrh r0, [r0] +; ARM-NEXT: ldrh r1, [r1] +; ARM-NEXT: orrs r0, r1, r0 ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: tst r1, r2 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_or16: ; ARMEB: @ BB#0: @ %entry -; ARMEB-NEXT: ldr r0, [r0] -; ARMEB-NEXT: movw r2, #65535 -; ARMEB-NEXT: ldr r1, [r1] -; ARMEB-NEXT: orr r1, r1, r0 +; ARMEB-NEXT: ldrh r0, [r0, #2] +; ARMEB-NEXT: ldrh r1, [r1, #2] +; ARMEB-NEXT: orrs r0, r1, r0 ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: tst r1, r2 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_or16: ; THUMB1: @ BB#0: @ %entry -; THUMB1-NEXT: ldr r0, [r0] -; THUMB1-NEXT: ldr r2, [r1] +; THUMB1-NEXT: ldrh r0, [r0] +; THUMB1-NEXT: ldrh r2, [r1] ; THUMB1-NEXT: orrs r2, r0 ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: lsls r2, r2, #16 +; THUMB1-NEXT: cmp r2, #0 ; THUMB1-NEXT: beq .LBB7_2 ; THUMB1-NEXT: @ BB#1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -432,15 +406,13 @@ ; ; THUMB2-LABEL: cmp_or16: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: ldr r0, [r0] -; THUMB2-NEXT: ldr r1, [r1] +; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2-NEXT: ldrh r1, [r1] ; THUMB2-NEXT: orrs r0, r1 -; THUMB2-NEXT: lsls r0, r0, #16 ; THUMB2-NEXT: mov.w r0, #0 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr - i32* nocapture readonly %b) { entry: %0 = load i32, i32* %a, align 4 %1 = load i32, i32* %b, align 4 @@ -451,34 +423,32 @@ } define arm_aapcscc zeroext i1 @cmp_and8_short_short(i16* nocapture readonly %a, + i16* nocapture readonly %b) { ; ARM-LABEL: cmp_and8_short_short: ; ARM: @ BB#0: @ %entry -; ARM-NEXT: ldrh r1, [r1] -; ARM-NEXT: ldrh r0, [r0] -; ARM-NEXT: and r1, r0, r1 +; ARM-NEXT: ldrb r2, [r0] ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: tst r1, #255 +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: tst r2, r1 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_and8_short_short: ; ARMEB: @ BB#0: @ %entry -; ARMEB-NEXT: ldrh r1, [r1] -; ARMEB-NEXT: ldrh r0, [r0] -; ARMEB-NEXT: and r1, r0, r1 +; ARMEB-NEXT: ldrb r2, [r0, #1] ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: tst r1, #255 +; ARMEB-NEXT: ldrb r1, [r1, #1] +; ARMEB-NEXT: tst r2, r1 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_and8_short_short: ; THUMB1: @ BB#0: @ %entry -; THUMB1-NEXT: ldrh r1, [r1] -; THUMB1-NEXT: ldrh r2, [r0] -; THUMB1-NEXT: ands r2, r1 +; THUMB1-NEXT: ldrb r2, [r1] +; THUMB1-NEXT: ldrb r3, [r0] ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: lsls r2, r2, #24 +; THUMB1-NEXT: tst r3, r2 ; THUMB1-NEXT: beq .LBB8_2 ; THUMB1-NEXT: @ BB#1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -487,15 +457,13 @@ ; ; THUMB2-LABEL: cmp_and8_short_short: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: ldrh r1, [r1] -; THUMB2-NEXT: ldrh r0, [r0] -; THUMB2-NEXT: ands r0, r1 -; THUMB2-NEXT: lsls r0, r0, #24 -; THUMB2-NEXT: mov.w r0, #0 +; THUMB2-NEXT: ldrb r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: tst r2, r1 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr - i16* nocapture readonly %b) { entry: %0 = load i16, i16* %a, align 2 %1 = load i16, i16* %b, align 2 @@ -506,34 +474,32 @@ } define arm_aapcscc zeroext i1 @cmp_and8_short_int(i16* nocapture readonly %a, + i32* nocapture readonly %b) { ; ARM-LABEL: cmp_and8_short_int: ; ARM: @ BB#0: @ %entry -; ARM-NEXT: ldrh r0, [r0] -; ARM-NEXT: ldr r1, [r1] -; ARM-NEXT: and r1, r1, r0 +; ARM-NEXT: ldrb r2, [r0] ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: tst r1, #255 +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: tst r1, r2 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_and8_short_int: ; ARMEB: @ BB#0: @ %entry -; ARMEB-NEXT: ldrh r0, [r0] -; ARMEB-NEXT: ldr r1, [r1] -; ARMEB-NEXT: and r1, r1, r0 +; ARMEB-NEXT: ldrb r2, [r0, #1] ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: tst r1, #255 +; ARMEB-NEXT: ldrb r1, [r1, #3] +; ARMEB-NEXT: tst r1, r2 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_and8_short_int: ; THUMB1: @ BB#0: @ %entry -; THUMB1-NEXT: ldrh r0, [r0] -; THUMB1-NEXT: ldr r2, [r1] -; THUMB1-NEXT: ands r2, r0 +; THUMB1-NEXT: ldrb r2, [r0] +; THUMB1-NEXT: ldrb r3, [r1] ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: lsls r2, r2, #24 +; THUMB1-NEXT: tst r3, r2 ; THUMB1-NEXT: beq .LBB9_2 ; THUMB1-NEXT: @ BB#1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -542,15 +508,13 @@ ; ; THUMB2-LABEL: cmp_and8_short_int: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: ldrh r0, [r0] -; THUMB2-NEXT: ldr r1, [r1] -; THUMB2-NEXT: ands r0, r1 -; THUMB2-NEXT: lsls r0, r0, #24 -; THUMB2-NEXT: mov.w r0, #0 +; THUMB2-NEXT: ldrb r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: tst r1, r2 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr - i32* nocapture readonly %b) { entry: %0 = load i16, i16* %a, align 2 %1 = load i32, i32* %b, align 4 @@ -562,34 +526,32 @@ } define arm_aapcscc zeroext i1 @cmp_and8_int_int(i32* nocapture readonly %a, + i32* nocapture readonly %b) { ; ARM-LABEL: cmp_and8_int_int: ; ARM: @ BB#0: @ %entry -; ARM-NEXT: ldr r1, [r1] -; ARM-NEXT: ldr r0, [r0] -; ARM-NEXT: and r1, r0, r1 +; ARM-NEXT: ldrb r2, [r0] ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: tst r1, #255 +; ARM-NEXT: ldrb r1, [r1] +; ARM-NEXT: tst r2, r1 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_and8_int_int: ; ARMEB: @ BB#0: @ %entry -; ARMEB-NEXT: ldr r1, [r1] -; ARMEB-NEXT: ldr r0, [r0] -; ARMEB-NEXT: and r1, r0, r1 +; ARMEB-NEXT: ldrb r2, [r0, #3] ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: tst r1, #255 +; ARMEB-NEXT: ldrb r1, [r1, #3] +; ARMEB-NEXT: tst r2, r1 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_and8_int_int: ; THUMB1: @ BB#0: @ %entry -; THUMB1-NEXT: ldr r1, [r1] -; THUMB1-NEXT: ldr r2, [r0] -; THUMB1-NEXT: ands r2, r1 +; THUMB1-NEXT: ldrb r2, [r1] +; THUMB1-NEXT: ldrb r3, [r0] ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: lsls r2, r2, #24 +; THUMB1-NEXT: tst r3, r2 ; THUMB1-NEXT: beq .LBB10_2 ; THUMB1-NEXT: @ BB#1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -598,15 +560,13 @@ ; ; THUMB2-LABEL: cmp_and8_int_int: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: ldr r1, [r1] -; THUMB2-NEXT: ldr r0, [r0] -; THUMB2-NEXT: ands r0, r1 -; THUMB2-NEXT: lsls r0, r0, #24 -; THUMB2-NEXT: mov.w r0, #0 +; THUMB2-NEXT: ldrb r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrb r1, [r1] +; THUMB2-NEXT: tst r2, r1 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr - i32* nocapture readonly %b) { entry: %0 = load i32, i32* %a, align 4 %1 = load i32, i32* %b, align 4 @@ -617,36 +577,32 @@ } define arm_aapcscc zeroext i1 @cmp_and16(i32* nocapture readonly %a, + i32* nocapture readonly %b) { ; ARM-LABEL: cmp_and16: ; ARM: @ BB#0: @ %entry -; ARM-NEXT: ldr r1, [r1] -; ARM-NEXT: movw r2, #65535 -; ARM-NEXT: ldr r0, [r0] -; ARM-NEXT: and r1, r0, r1 +; ARM-NEXT: ldrh r2, [r0] ; ARM-NEXT: mov r0, #0 -; ARM-NEXT: tst r1, r2 +; ARM-NEXT: ldrh r1, [r1] +; ARM-NEXT: tst r2, r1 ; ARM-NEXT: movweq r0, #1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: cmp_and16: ; ARMEB: @ BB#0: @ %entry -; ARMEB-NEXT: ldr r1, [r1] -; ARMEB-NEXT: movw r2, #65535 -; ARMEB-NEXT: ldr r0, [r0] -; ARMEB-NEXT: and r1, r0, r1 +; ARMEB-NEXT: ldrh r2, [r0, #2] ; ARMEB-NEXT: mov r0, #0 -; ARMEB-NEXT: tst r1, r2 +; ARMEB-NEXT: ldrh r1, [r1, #2] +; ARMEB-NEXT: tst r2, r1 ; ARMEB-NEXT: movweq r0, #1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: cmp_and16: ; THUMB1: @ BB#0: @ %entry -; THUMB1-NEXT: ldr r1, [r1] -; THUMB1-NEXT: ldr r2, [r0] -; THUMB1-NEXT: ands r2, r1 +; THUMB1-NEXT: ldrh r2, [r1] +; THUMB1-NEXT: ldrh r3, [r0] ; THUMB1-NEXT: movs r0, #1 ; THUMB1-NEXT: movs r1, #0 -; THUMB1-NEXT: lsls r2, r2, #16 +; THUMB1-NEXT: tst r3, r2 ; THUMB1-NEXT: beq .LBB11_2 ; THUMB1-NEXT: @ BB#1: @ %entry ; THUMB1-NEXT: mov r0, r1 @@ -655,15 +611,13 @@ ; ; THUMB2-LABEL: cmp_and16: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: ldr r1, [r1] -; THUMB2-NEXT: ldr r0, [r0] -; THUMB2-NEXT: ands r0, r1 -; THUMB2-NEXT: lsls r0, r0, #16 -; THUMB2-NEXT: mov.w r0, #0 +; THUMB2-NEXT: ldrh r2, [r0] +; THUMB2-NEXT: movs r0, #0 +; THUMB2-NEXT: ldrh r1, [r1] +; THUMB2-NEXT: tst r2, r1 ; THUMB2-NEXT: it eq ; THUMB2-NEXT: moveq r0, #1 ; THUMB2-NEXT: bx lr - i32* nocapture readonly %b) { entry: %0 = load i32, i32* %a, align 4 %1 = load i32, i32* %b, align 4 @@ -676,34 +630,34 @@ define arm_aapcscc i32 @add_and16(i32* nocapture readonly %a, i32 %y, i32 %z) { ; ARM-LABEL: add_and16: ; ARM: @ BB#0: @ %entry -; ARM-NEXT: ldr r0, [r0] ; ARM-NEXT: add r1, r1, r2 +; ARM-NEXT: ldrh r0, [r0] +; ARM-NEXT: uxth r1, r1 ; ARM-NEXT: orr r0, r0, r1 -; ARM-NEXT: uxth r0, r0 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: add_and16: ; ARMEB: @ BB#0: @ %entry -; ARMEB-NEXT: ldr r0, [r0] ; ARMEB-NEXT: add r1, r1, r2 +; ARMEB-NEXT: ldrh r0, [r0, #2] +; ARMEB-NEXT: uxth r1, r1 ; ARMEB-NEXT: orr r0, r0, r1 -; ARMEB-NEXT: uxth r0, r0 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: add_and16: ; THUMB1: @ BB#0: @ %entry ; THUMB1-NEXT: adds r1, r1, r2 -; THUMB1-NEXT: ldr r0, [r0] +; THUMB1-NEXT: uxth r1, r1 +; THUMB1-NEXT: ldrh r0, [r0] ; THUMB1-NEXT: orrs r0, r1 -; THUMB1-NEXT: uxth r0, r0 ; THUMB1-NEXT: bx lr ; ; THUMB2-LABEL: add_and16: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: ldr r0, [r0] ; THUMB2-NEXT: add r1, r2 +; THUMB2-NEXT: ldrh r0, [r0] +; THUMB2-NEXT: uxth r1, r1 ; THUMB2-NEXT: orrs r0, r1 -; THUMB2-NEXT: uxth r0, r0 ; THUMB2-NEXT: bx lr entry: %x = load i32, i32* %a, align 4 @@ -717,41 +671,41 @@ ; ARM-LABEL: test1: ; ARM: @ BB#0: @ %entry ; ARM-NEXT: mul r2, r2, r3 -; ARM-NEXT: ldr r1, [r1] -; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: ldrh r1, [r1] +; ARM-NEXT: ldrh r0, [r0] ; ARM-NEXT: eor r0, r0, r1 -; ARM-NEXT: orr r0, r0, r2 -; ARM-NEXT: uxth r0, r0 +; ARM-NEXT: uxth r1, r2 +; ARM-NEXT: orr r0, r0, r1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: test1: ; ARMEB: @ BB#0: @ %entry ; ARMEB-NEXT: mul r2, r2, r3 -; ARMEB-NEXT: ldr r1, [r1] -; ARMEB-NEXT: ldr r0, [r0] +; ARMEB-NEXT: ldrh r1, [r1, #2] +; ARMEB-NEXT: ldrh r0, [r0, #2] ; ARMEB-NEXT: eor r0, r0, r1 -; ARMEB-NEXT: orr r0, r0, r2 -; ARMEB-NEXT: uxth r0, r0 +; ARMEB-NEXT: uxth r1, r2 +; ARMEB-NEXT: orr r0, r0, r1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: test1: ; THUMB1: @ BB#0: @ %entry +; THUMB1: ldrh r1, [r1] +; THUMB1-NEXT: ldrh r4, [r0] +; THUMB1-NEXT: eors r4, r1 ; THUMB1-NEXT: muls r2, r3, r2 -; THUMB1-NEXT: ldr r1, [r1] -; THUMB1-NEXT: ldr r0, [r0] -; THUMB1-NEXT: eors r0, r1 -; THUMB1-NEXT: orrs r0, r2 -; THUMB1-NEXT: uxth r0, r0 -; THUMB1-NEXT: bx lr +; THUMB1-NEXT: uxth r0, r2 +; THUMB1-NEXT: orrs r0, r4 +; THUMB1-NEXT: pop ; ; THUMB2-LABEL: test1: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: muls r2, r3, r2 -; THUMB2-NEXT: ldr r1, [r1] -; THUMB2-NEXT: ldr r0, [r0] +; THUMB2-NEXT: ldrh r1, [r1] +; THUMB2-NEXT: ldrh r0, [r0] ; THUMB2-NEXT: eors r0, r1 -; THUMB2-NEXT: orrs r0, r2 -; THUMB2-NEXT: uxth r0, r0 +; THUMB2-NEXT: mul r1, r2, r3 +; THUMB2-NEXT: uxth r1, r1 +; THUMB2-NEXT: orrs r0, r1 ; THUMB2-NEXT: bx lr entry: %0 = load i32, i32* %a, align 4 @@ -868,41 +822,41 @@ ; ARM-LABEL: test4: ; ARM: @ BB#0: @ %entry ; ARM-NEXT: mul r2, r2, r3 -; ARM-NEXT: ldr r1, [r1] -; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: ldrh r1, [r1] +; ARM-NEXT: ldrh r0, [r0] ; ARM-NEXT: eor r0, r0, r1 -; ARM-NEXT: orr r0, r0, r2 -; ARM-NEXT: uxth r0, r0 +; ARM-NEXT: uxth r1, r2 +; ARM-NEXT: orr r0, r0, r1 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: test4: ; ARMEB: @ BB#0: @ %entry ; ARMEB-NEXT: mul r2, r2, r3 -; ARMEB-NEXT: ldr r1, [r1] -; ARMEB-NEXT: ldr r0, [r0] +; ARMEB-NEXT: ldrh r1, [r1, #2] +; ARMEB-NEXT: ldrh r0, [r0, #2] ; ARMEB-NEXT: eor r0, r0, r1 -; ARMEB-NEXT: orr r0, r0, r2 -; ARMEB-NEXT: uxth r0, r0 +; ARMEB-NEXT: uxth r1, r2 +; ARMEB-NEXT: orr r0, r0, r1 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: test4: ; THUMB1: @ BB#0: @ %entry +; THUMB1: ldrh r1, [r1] +; THUMB1-NEXT: ldrh r4, [r0] +; THUMB1-NEXT: eors r4, r1 ; THUMB1-NEXT: muls r2, r3, r2 -; THUMB1-NEXT: ldr r1, [r1] -; THUMB1-NEXT: ldr r0, [r0] -; THUMB1-NEXT: eors r0, r1 -; THUMB1-NEXT: orrs r0, r2 -; THUMB1-NEXT: uxth r0, r0 -; THUMB1-NEXT: bx lr +; THUMB1-NEXT: uxth r0, r2 +; THUMB1-NEXT: orrs r0, r4 +; THUMB1-NEXT: pop ; ; THUMB2-LABEL: test4: ; THUMB2: @ BB#0: @ %entry -; THUMB2-NEXT: muls r2, r3, r2 -; THUMB2-NEXT: ldr r1, [r1] -; THUMB2-NEXT: ldr r0, [r0] +; THUMB2-NEXT: ldrh r1, [r1] +; THUMB2-NEXT: ldrh r0, [r0] ; THUMB2-NEXT: eors r0, r1 -; THUMB2-NEXT: orrs r0, r2 -; THUMB2-NEXT: uxth r0, r0 +; THUMB2-NEXT: mul r1, r2, r3 +; THUMB2-NEXT: uxth r1, r1 +; THUMB2-NEXT: orrs r0, r1 ; THUMB2-NEXT: bx lr entry: %0 = load i32, i32* %a, align 4 @@ -918,41 +872,41 @@ ; ARM-LABEL: test5: ; ARM: @ BB#0: @ %entry ; ARM-NEXT: ldr r1, [r1] -; ARM-NEXT: ldr r0, [r0] +; ARM-NEXT: ldrh r0, [r0] ; ARM-NEXT: mul r1, r2, r1 ; ARM-NEXT: eor r0, r0, r3 +; ARM-NEXT: uxth r1, r1 ; ARM-NEXT: orr r0, r0, r1 -; ARM-NEXT: uxth r0, r0 ; ARM-NEXT: bx lr ; ; ARMEB-LABEL: test5: ; ARMEB: @ BB#0: @ %entry ; ARMEB-NEXT: ldr r1, [r1] -; ARMEB-NEXT: ldr r0, [r0] +; ARMEB-NEXT: ldrh r0, [r0, #2] ; ARMEB-NEXT: mul r1, r2, r1 ; ARMEB-NEXT: eor r0, r0, r3 +; ARMEB-NEXT: uxth r1, r1 ; ARMEB-NEXT: orr r0, r0, r1 -; ARMEB-NEXT: uxth r0, r0 ; ARMEB-NEXT: bx lr ; ; THUMB1-LABEL: test5: ; THUMB1: @ BB#0: @ %entry -; THUMB1-NEXT: ldr r1, [r1] -; THUMB1-NEXT: muls r1, r2, r1 -; THUMB1-NEXT: ldr r0, [r0] -; THUMB1-NEXT: eors r0, r3 -; THUMB1-NEXT: orrs r0, r1 +; THUMB1: ldrh r4, [r0] +; THUMB1-NEXT: eors r4, r3 +; THUMB1-NEXT: ldr r0, [r1] +; THUMB1-NEXT: muls r0, r2, r0 ; THUMB1-NEXT: uxth r0, r0 -; THUMB1-NEXT: bx lr +; THUMB1-NEXT: orrs r0, r4 +; THUMB1-NEXT: pop ; ; THUMB2-LABEL: test5: ; THUMB2: @ BB#0: @ %entry ; THUMB2-NEXT: ldr r1, [r1] -; THUMB2-NEXT: ldr r0, [r0] +; THUMB2-NEXT: ldrh r0, [r0] ; THUMB2-NEXT: muls r1, r2, r1 ; THUMB2-NEXT: eors r0, r3 +; THUMB2-NEXT: uxth r1, r1 ; THUMB2-NEXT: orrs r0, r1 -; THUMB2-NEXT: uxth r0, r0 ; THUMB2-NEXT: bx lr entry: %0 = load i32, i32* %a, align 4 @@ -964,3 +918,4 @@ %and = and i32 %or, 65535 ret i32 %and } +