Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -692,6 +692,8 @@ SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const; + std::pair LowerLoad(SDValue Op, SelectionDAG &DAG) const; unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -805,6 +805,14 @@ setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); } + // Custom loads/stores to possible use __aeabi_uread/write* + if (Subtarget->isTargetAEABI() && !Subtarget->allowsUnalignedMem()) { + setOperationAction(ISD::STORE, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i64, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::LOAD, MVT::i64, Custom); + } + setOperationAction(ISD::SADDO, MVT::i32, Custom); setOperationAction(ISD::UADDO, MVT::i32, Custom); setOperationAction(ISD::SSUBO, MVT::i32, Custom); @@ -8017,6 +8025,125 @@ return !CI.second.getNode() ? DAG.getRoot() : CI.first; } +std::pair +ARMTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const { + // If we have an unaligned load from a i32 or i64 that would normally be + // split into separate ldrb's, we can use the __aeabi_uread4/__aeabi_uread8 + // functions instead. + LoadSDNode *LD = cast(Op.getNode()); + EVT MemVT = LD->getMemoryVT(); + if (MemVT != MVT::i32 && MemVT != MVT::i64) + return std::make_pair(SDValue(), SDValue()); + + const auto &MF = DAG.getMachineFunction(); + unsigned AS = LD->getAddressSpace(); + unsigned Align = LD->getAlignment(); + const DataLayout &DL = DAG.getDataLayout(); + + const char *LibcallName = nullptr; + if (MF.getFunction().optForMinSize()) { + if (MemVT == MVT::i32 && Align <= 2) + LibcallName = "__aeabi_uread4"; + else if (MemVT == MVT::i64 && Align <= 2) + LibcallName = "__aeabi_uread8"; + } + + if (LibcallName) { + LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned load to " + << LibcallName << "\n"); + CallingConv::ID CC = CallingConv::ARM_AAPCS; + SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL)); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + SDLoc dl(Op); + + Entry.Node = LD->getBasePtr(); + Entry.Ty = LD->getBasePtr().getValueType().getTypeForEVT(*DAG.getContext()); + Args.push_back(Entry); + + Type *RetTy = MemVT.getTypeForEVT(*DAG.getContext()); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(LD->getChain()) + .setCallee(CC, RetTy, Callee, std::move(Args)); + auto Pair = LowerCallTo(CLI); + + // If necessary, extend the node to 64bit + if (LD->getExtensionType() != ISD::NON_EXTLOAD) { + unsigned ExtType = LD->getExtensionType() == ISD::SEXTLOAD + ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; + SDValue EN = DAG.getNode(ExtType, dl, LD->getValueType(0), Pair.first); + Pair.first = EN; + } + return Pair; + } + + // Default expand to individual loads + if (!allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + return expandUnalignedLoad(LD, DAG); + return std::make_pair(SDValue(), SDValue()); +} + +SDValue ARMTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const { + // If we have an unaligned store to a i32 or i64 that would normally be + // split into separate ldrb's, we can use the __aeabi_uwrite4/__aeabi_uwrite8 + // functions instead. + StoreSDNode *ST = cast(Op.getNode()); + EVT MemVT = ST->getMemoryVT(); + if (MemVT != MVT::i32 && MemVT != MVT::i64) + return SDValue(); + + const auto &MF = DAG.getMachineFunction(); + unsigned AS = ST->getAddressSpace(); + unsigned Align = ST->getAlignment(); + const DataLayout &DL = DAG.getDataLayout(); + + const char *LibcallName = nullptr; + if (MF.getFunction().optForMinSize()) { + if (MemVT == MVT::i32 && Align <= 2) + LibcallName = "__aeabi_uwrite4"; + else if (MemVT == MVT::i64 && Align <= 2) + LibcallName = "__aeabi_uwrite8"; + } + + if (LibcallName) { + LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store to " + << LibcallName << "\n"); + CallingConv::ID CC = CallingConv::ARM_AAPCS; + SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL)); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + SDLoc dl(Op); + + // If necessary, trunc the value to 32bit + SDValue StoreVal = ST->getOperand(1); + if (ST->isTruncatingStore()) + StoreVal = DAG.getNode(ISD::TRUNCATE, dl, MemVT, ST->getOperand(1)); + + Entry.Node = StoreVal; + Entry.Ty = StoreVal.getValueType().getTypeForEVT(*DAG.getContext()); + Args.push_back(Entry); + + Entry.Node = ST->getBasePtr(); + Entry.Ty = ST->getBasePtr().getValueType().getTypeForEVT(*DAG.getContext()); + Args.push_back(Entry); + + Type *RetTy = Type::getVoidTy(*DAG.getContext()); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(ST->getChain()) + .setCallee(CC, RetTy, Callee, std::move(Args)); + std::pair CallResult = LowerCallTo(CLI); + return CallResult.second; + } + + // Default expand to individual stores + if (!allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + return expandUnalignedStore(ST, DAG); + return SDValue(); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump()); switch (Op.getOpcode()) { @@ -8097,6 +8224,14 @@ case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG); case ARMISD::WIN__DBZCHK: return SDValue(); + case ISD::STORE: + return LowerStore(Op, DAG); + case ISD::LOAD: { + auto Pair = LowerLoad(Op, DAG); + if (Pair.first) + return DAG.getMergeValues({Pair.first, Pair.second}, SDLoc(Pair.first)); + return SDValue(); + } } } @@ -8174,6 +8309,17 @@ return; case ISD::INTRINSIC_WO_CHAIN: return ReplaceLongIntrinsic(N, Results, DAG); + case ISD::LOAD: { + auto Pair = LowerLoad(SDValue(N, 0), DAG); + if (Pair.first) { + Results.push_back(Pair.first); + Results.push_back(Pair.second); + } + return; + } + case ISD::STORE: + Res = LowerStore(SDValue(N, 0), DAG); + break; } if (Res.getNode()) Results.push_back(Res); Index: test/CodeGen/ARM/unaligned_load_store_aeabi.ll =================================================================== --- test/CodeGen/ARM/unaligned_load_store_aeabi.ll +++ test/CodeGen/ARM/unaligned_load_store_aeabi.ll @@ -8,27 +8,21 @@ ; CHECK-V6M: @ %bb.0: @ %entry ; CHECK-V6M-NEXT: .save {r4, lr} ; CHECK-V6M-NEXT: push {r4, lr} -; CHECK-V6M-NEXT: ldrb r2, [r0] -; CHECK-V6M-NEXT: ldrb r3, [r0, #1] -; CHECK-V6M-NEXT: ldrb r4, [r0, #2] -; CHECK-V6M-NEXT: ldrb r0, [r0, #3] -; CHECK-V6M-NEXT: strb r0, [r1, #3] -; CHECK-V6M-NEXT: strb r4, [r1, #2] -; CHECK-V6M-NEXT: strb r3, [r1, #1] -; CHECK-V6M-NEXT: strb r2, [r1] +; CHECK-V6M-NEXT: mov r4, r1 +; CHECK-V6M-NEXT: bl __aeabi_uread4 +; CHECK-V6M-NEXT: mov r1, r4 +; CHECK-V6M-NEXT: bl __aeabi_uwrite4 ; CHECK-V6M-NEXT: pop {r4, pc} ; ; CHECK-V7M-LABEL: loadstore4_align1: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: ldrb.w r12, [r0] -; CHECK-V7M-NEXT: ldrb r3, [r0, #1] -; CHECK-V7M-NEXT: ldrb r2, [r0, #2] -; CHECK-V7M-NEXT: ldrb r0, [r0, #3] -; CHECK-V7M-NEXT: strb r0, [r1, #3] -; CHECK-V7M-NEXT: strb r2, [r1, #2] -; CHECK-V7M-NEXT: strb r3, [r1, #1] -; CHECK-V7M-NEXT: strb.w r12, [r1] -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .save {r4, lr} +; CHECK-V7M-NEXT: push {r4, lr} +; CHECK-V7M-NEXT: mov r4, r1 +; CHECK-V7M-NEXT: bl __aeabi_uread4 +; CHECK-V7M-NEXT: mov r1, r4 +; CHECK-V7M-NEXT: bl __aeabi_uwrite4 +; CHECK-V7M-NEXT: pop {r4, pc} ; ; CHECK-ALIGNED-LABEL: loadstore4_align1: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -44,28 +38,17 @@ define i32 @load4_align1(i32* %a) nounwind optsize minsize { ; CHECK-V6M-LABEL: load4_align1: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: ldrb r1, [r0] -; CHECK-V6M-NEXT: ldrb r2, [r0, #1] -; CHECK-V6M-NEXT: lsls r2, r2, #8 -; CHECK-V6M-NEXT: adds r1, r2, r1 -; CHECK-V6M-NEXT: ldrb r2, [r0, #2] -; CHECK-V6M-NEXT: ldrb r0, [r0, #3] -; CHECK-V6M-NEXT: lsls r0, r0, #8 -; CHECK-V6M-NEXT: adds r0, r0, r2 -; CHECK-V6M-NEXT: lsls r0, r0, #16 -; CHECK-V6M-NEXT: adds r0, r0, r1 -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: .save {r7, lr} +; CHECK-V6M-NEXT: push {r7, lr} +; CHECK-V6M-NEXT: bl __aeabi_uread4 +; CHECK-V6M-NEXT: pop {r7, pc} ; ; CHECK-V7M-LABEL: load4_align1: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: ldrb r1, [r0] -; CHECK-V7M-NEXT: ldrb r2, [r0, #1] -; CHECK-V7M-NEXT: ldrb r3, [r0, #2] -; CHECK-V7M-NEXT: ldrb r0, [r0, #3] -; CHECK-V7M-NEXT: orr.w r1, r1, r2, lsl #8 -; CHECK-V7M-NEXT: orr.w r0, r3, r0, lsl #8 -; CHECK-V7M-NEXT: orr.w r0, r1, r0, lsl #16 -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .save {r7, lr} +; CHECK-V7M-NEXT: push {r7, lr} +; CHECK-V7M-NEXT: bl __aeabi_uread4 +; CHECK-V7M-NEXT: pop {r7, pc} ; ; CHECK-ALIGNED-LABEL: load4_align1: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -79,30 +62,19 @@ define i64 @load4_align1_zext(i32* %a) nounwind optsize minsize { ; CHECK-V6M-LABEL: load4_align1_zext: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: ldrb r1, [r0] -; CHECK-V6M-NEXT: ldrb r2, [r0, #1] -; CHECK-V6M-NEXT: lsls r2, r2, #8 -; CHECK-V6M-NEXT: adds r1, r2, r1 -; CHECK-V6M-NEXT: ldrb r2, [r0, #2] -; CHECK-V6M-NEXT: ldrb r0, [r0, #3] -; CHECK-V6M-NEXT: lsls r0, r0, #8 -; CHECK-V6M-NEXT: adds r0, r0, r2 -; CHECK-V6M-NEXT: lsls r0, r0, #16 -; CHECK-V6M-NEXT: adds r0, r0, r1 +; CHECK-V6M-NEXT: .save {r7, lr} +; CHECK-V6M-NEXT: push {r7, lr} +; CHECK-V6M-NEXT: bl __aeabi_uread4 ; CHECK-V6M-NEXT: movs r1, #0 -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: pop {r7, pc} ; ; CHECK-V7M-LABEL: load4_align1_zext: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: ldrb r1, [r0] -; CHECK-V7M-NEXT: ldrb r2, [r0, #1] -; CHECK-V7M-NEXT: ldrb r3, [r0, #2] -; CHECK-V7M-NEXT: ldrb r0, [r0, #3] -; CHECK-V7M-NEXT: orr.w r1, r1, r2, lsl #8 -; CHECK-V7M-NEXT: orr.w r0, r3, r0, lsl #8 -; CHECK-V7M-NEXT: orr.w r0, r1, r0, lsl #16 +; CHECK-V7M-NEXT: .save {r7, lr} +; CHECK-V7M-NEXT: push {r7, lr} +; CHECK-V7M-NEXT: bl __aeabi_uread4 ; CHECK-V7M-NEXT: movs r1, #0 -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: pop {r7, pc} ; ; CHECK-ALIGNED-LABEL: load4_align1_zext: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -118,30 +90,19 @@ define i64 @load4_align1_sext(i32* %a) nounwind optsize minsize { ; CHECK-V6M-LABEL: load4_align1_sext: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: ldrb r1, [r0] -; CHECK-V6M-NEXT: ldrb r2, [r0, #1] -; CHECK-V6M-NEXT: lsls r2, r2, #8 -; CHECK-V6M-NEXT: adds r1, r2, r1 -; CHECK-V6M-NEXT: ldrb r2, [r0, #2] -; CHECK-V6M-NEXT: ldrb r0, [r0, #3] -; CHECK-V6M-NEXT: lsls r0, r0, #8 -; CHECK-V6M-NEXT: adds r0, r0, r2 -; CHECK-V6M-NEXT: lsls r0, r0, #16 -; CHECK-V6M-NEXT: adds r0, r0, r1 +; CHECK-V6M-NEXT: .save {r7, lr} +; CHECK-V6M-NEXT: push {r7, lr} +; CHECK-V6M-NEXT: bl __aeabi_uread4 ; CHECK-V6M-NEXT: asrs r1, r0, #31 -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: pop {r7, pc} ; ; CHECK-V7M-LABEL: load4_align1_sext: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: ldrb r1, [r0] -; CHECK-V7M-NEXT: ldrb r2, [r0, #1] -; CHECK-V7M-NEXT: ldrb r3, [r0, #2] -; CHECK-V7M-NEXT: ldrb r0, [r0, #3] -; CHECK-V7M-NEXT: orr.w r1, r1, r2, lsl #8 -; CHECK-V7M-NEXT: orr.w r0, r3, r0, lsl #8 -; CHECK-V7M-NEXT: orr.w r0, r1, r0, lsl #16 +; CHECK-V7M-NEXT: .save {r7, lr} +; CHECK-V7M-NEXT: push {r7, lr} +; CHECK-V7M-NEXT: bl __aeabi_uread4 ; CHECK-V7M-NEXT: asrs r1, r0, #31 -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: pop {r7, pc} ; ; CHECK-ALIGNED-LABEL: load4_align1_sext: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -157,25 +118,23 @@ define void @store4_align1(i32* %a, i32 %b) nounwind optsize minsize { ; CHECK-V6M-LABEL: store4_align1: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: strb r1, [r0] -; CHECK-V6M-NEXT: lsrs r2, r1, #24 -; CHECK-V6M-NEXT: strb r2, [r0, #3] -; CHECK-V6M-NEXT: lsrs r2, r1, #16 -; CHECK-V6M-NEXT: strb r2, [r0, #2] -; CHECK-V6M-NEXT: lsrs r1, r1, #8 -; CHECK-V6M-NEXT: strb r1, [r0, #1] -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: .save {r7, lr} +; CHECK-V6M-NEXT: push {r7, lr} +; CHECK-V6M-NEXT: mov r2, r0 +; CHECK-V6M-NEXT: mov r0, r1 +; CHECK-V6M-NEXT: mov r1, r2 +; CHECK-V6M-NEXT: bl __aeabi_uwrite4 +; CHECK-V6M-NEXT: pop {r7, pc} ; ; CHECK-V7M-LABEL: store4_align1: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: lsrs r2, r1, #24 -; CHECK-V7M-NEXT: strb r1, [r0] -; CHECK-V7M-NEXT: strb r2, [r0, #3] -; CHECK-V7M-NEXT: lsrs r2, r1, #16 -; CHECK-V7M-NEXT: lsrs r1, r1, #8 -; CHECK-V7M-NEXT: strb r2, [r0, #2] -; CHECK-V7M-NEXT: strb r1, [r0, #1] -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .save {r7, lr} +; CHECK-V7M-NEXT: push {r7, lr} +; CHECK-V7M-NEXT: mov r2, r0 +; CHECK-V7M-NEXT: mov r0, r1 +; CHECK-V7M-NEXT: mov r1, r2 +; CHECK-V7M-NEXT: bl __aeabi_uwrite4 +; CHECK-V7M-NEXT: pop {r7, pc} ; ; CHECK-ALIGNED-LABEL: store4_align1: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -189,18 +148,17 @@ define i32 @load4_align2(i32* %a) nounwind optsize minsize { ; CHECK-V6M-LABEL: load4_align2: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: ldrh r1, [r0] -; CHECK-V6M-NEXT: ldrh r0, [r0, #2] -; CHECK-V6M-NEXT: lsls r0, r0, #16 -; CHECK-V6M-NEXT: adds r0, r0, r1 -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: .save {r7, lr} +; CHECK-V6M-NEXT: push {r7, lr} +; CHECK-V6M-NEXT: bl __aeabi_uread4 +; CHECK-V6M-NEXT: pop {r7, pc} ; ; CHECK-V7M-LABEL: load4_align2: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: ldrh r1, [r0, #2] -; CHECK-V7M-NEXT: ldrh r0, [r0] -; CHECK-V7M-NEXT: orr.w r0, r0, r1, lsl #16 -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .save {r7, lr} +; CHECK-V7M-NEXT: push {r7, lr} +; CHECK-V7M-NEXT: bl __aeabi_uread4 +; CHECK-V7M-NEXT: pop {r7, pc} ; ; CHECK-ALIGNED-LABEL: load4_align2: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -214,17 +172,23 @@ define void @store4_align2(i32* %a, i32 %b) nounwind optsize minsize { ; CHECK-V6M-LABEL: store4_align2: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: strh r1, [r0] -; CHECK-V6M-NEXT: lsrs r1, r1, #16 -; CHECK-V6M-NEXT: strh r1, [r0, #2] -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: .save {r7, lr} +; CHECK-V6M-NEXT: push {r7, lr} +; CHECK-V6M-NEXT: mov r2, r0 +; CHECK-V6M-NEXT: mov r0, r1 +; CHECK-V6M-NEXT: mov r1, r2 +; CHECK-V6M-NEXT: bl __aeabi_uwrite4 +; CHECK-V6M-NEXT: pop {r7, pc} ; ; CHECK-V7M-LABEL: store4_align2: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: strh r1, [r0] -; CHECK-V7M-NEXT: lsrs r1, r1, #16 -; CHECK-V7M-NEXT: strh r1, [r0, #2] -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .save {r7, lr} +; CHECK-V7M-NEXT: push {r7, lr} +; CHECK-V7M-NEXT: mov r2, r0 +; CHECK-V7M-NEXT: mov r0, r1 +; CHECK-V7M-NEXT: mov r1, r2 +; CHECK-V7M-NEXT: bl __aeabi_uwrite4 +; CHECK-V7M-NEXT: pop {r7, pc} ; ; CHECK-ALIGNED-LABEL: store4_align2: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -278,36 +242,26 @@ define i64 @load6_align1_zext(i48* %a) nounwind optsize minsize { ; CHECK-V6M-LABEL: load6_align1_zext: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: ldrb r1, [r0] -; CHECK-V6M-NEXT: ldrb r2, [r0, #1] -; CHECK-V6M-NEXT: lsls r2, r2, #8 -; CHECK-V6M-NEXT: adds r1, r2, r1 -; CHECK-V6M-NEXT: ldrb r2, [r0, #2] -; CHECK-V6M-NEXT: ldrb r3, [r0, #3] -; CHECK-V6M-NEXT: lsls r3, r3, #8 -; CHECK-V6M-NEXT: adds r2, r3, r2 -; CHECK-V6M-NEXT: lsls r2, r2, #16 -; CHECK-V6M-NEXT: adds r2, r2, r1 +; CHECK-V6M-NEXT: .save {r4, lr} +; CHECK-V6M-NEXT: push {r4, lr} ; CHECK-V6M-NEXT: ldrb r1, [r0, #4] -; CHECK-V6M-NEXT: ldrb r0, [r0, #5] -; CHECK-V6M-NEXT: lsls r0, r0, #8 -; CHECK-V6M-NEXT: adds r1, r0, r1 -; CHECK-V6M-NEXT: mov r0, r2 -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: ldrb r2, [r0, #5] +; CHECK-V6M-NEXT: lsls r2, r2, #8 +; CHECK-V6M-NEXT: adds r4, r2, r1 +; CHECK-V6M-NEXT: bl __aeabi_uread4 +; CHECK-V6M-NEXT: mov r1, r4 +; CHECK-V6M-NEXT: pop {r4, pc} ; ; CHECK-V7M-LABEL: load6_align1_zext: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: ldrb r1, [r0, #5] -; CHECK-V7M-NEXT: ldrb r2, [r0, #4] -; CHECK-V7M-NEXT: ldrb.w r12, [r0] -; CHECK-V7M-NEXT: orr.w r1, r2, r1, lsl #8 -; CHECK-V7M-NEXT: ldrb r3, [r0, #1] -; CHECK-V7M-NEXT: ldrb r2, [r0, #2] -; CHECK-V7M-NEXT: ldrb r0, [r0, #3] -; CHECK-V7M-NEXT: orr.w r0, r2, r0, lsl #8 -; CHECK-V7M-NEXT: orr.w r2, r12, r3, lsl #8 -; CHECK-V7M-NEXT: orr.w r0, r2, r0, lsl #16 -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .save {r4, lr} +; CHECK-V7M-NEXT: push {r4, lr} +; CHECK-V7M-NEXT: mov r4, r0 +; CHECK-V7M-NEXT: bl __aeabi_uread4 +; CHECK-V7M-NEXT: ldrb r2, [r4, #5] +; CHECK-V7M-NEXT: ldrb r1, [r4, #4] +; CHECK-V7M-NEXT: orr.w r1, r1, r2, lsl #8 +; CHECK-V7M-NEXT: pop {r4, pc} ; ; CHECK-ALIGNED-LABEL: load6_align1_zext: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -324,37 +278,27 @@ define i64 @load6_align1_sext(i48* %a) nounwind optsize minsize { ; CHECK-V6M-LABEL: load6_align1_sext: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: ldrb r1, [r0] -; CHECK-V6M-NEXT: ldrb r2, [r0, #1] -; CHECK-V6M-NEXT: lsls r2, r2, #8 -; CHECK-V6M-NEXT: adds r1, r2, r1 -; CHECK-V6M-NEXT: ldrb r2, [r0, #2] -; CHECK-V6M-NEXT: ldrb r3, [r0, #3] -; CHECK-V6M-NEXT: lsls r3, r3, #8 -; CHECK-V6M-NEXT: adds r2, r3, r2 -; CHECK-V6M-NEXT: lsls r2, r2, #16 -; CHECK-V6M-NEXT: adds r2, r2, r1 +; CHECK-V6M-NEXT: .save {r4, lr} +; CHECK-V6M-NEXT: push {r4, lr} ; CHECK-V6M-NEXT: movs r1, #5 ; CHECK-V6M-NEXT: ldrsb r1, [r0, r1] ; CHECK-V6M-NEXT: lsls r1, r1, #8 -; CHECK-V6M-NEXT: ldrb r0, [r0, #4] -; CHECK-V6M-NEXT: adds r1, r1, r0 -; CHECK-V6M-NEXT: mov r0, r2 -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: ldrb r2, [r0, #4] +; CHECK-V6M-NEXT: adds r4, r1, r2 +; CHECK-V6M-NEXT: bl __aeabi_uread4 +; CHECK-V6M-NEXT: mov r1, r4 +; CHECK-V6M-NEXT: pop {r4, pc} ; ; CHECK-V7M-LABEL: load6_align1_sext: ; CHECK-V7M: @ %bb.0: @ %entry +; CHECK-V7M-NEXT: .save {r4, lr} +; CHECK-V7M-NEXT: push {r4, lr} ; CHECK-V7M-NEXT: ldrsb.w r1, [r0, #5] ; CHECK-V7M-NEXT: ldrb r2, [r0, #4] -; CHECK-V7M-NEXT: ldrb.w r12, [r0] -; CHECK-V7M-NEXT: orr.w r1, r2, r1, lsl #8 -; CHECK-V7M-NEXT: ldrb r3, [r0, #1] -; CHECK-V7M-NEXT: ldrb r2, [r0, #2] -; CHECK-V7M-NEXT: ldrb r0, [r0, #3] -; CHECK-V7M-NEXT: orr.w r0, r2, r0, lsl #8 -; CHECK-V7M-NEXT: orr.w r2, r12, r3, lsl #8 -; CHECK-V7M-NEXT: orr.w r0, r2, r0, lsl #16 -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: orr.w r4, r2, r1, lsl #8 +; CHECK-V7M-NEXT: bl __aeabi_uread4 +; CHECK-V7M-NEXT: mov r1, r4 +; CHECK-V7M-NEXT: pop {r4, pc} ; ; CHECK-ALIGNED-LABEL: load6_align1_sext: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -371,31 +315,27 @@ define void @store6_align1(i48* %a, i48 %b) nounwind optsize minsize { ; CHECK-V6M-LABEL: store6_align1: ; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: .save {r7, lr} +; CHECK-V6M-NEXT: push {r7, lr} +; CHECK-V6M-NEXT: mov r1, r0 ; CHECK-V6M-NEXT: strb r3, [r0, #4] -; CHECK-V6M-NEXT: strb r2, [r0] -; CHECK-V6M-NEXT: lsrs r1, r3, #8 -; CHECK-V6M-NEXT: strb r1, [r0, #5] -; CHECK-V6M-NEXT: lsrs r1, r2, #24 -; CHECK-V6M-NEXT: strb r1, [r0, #3] -; CHECK-V6M-NEXT: lsrs r1, r2, #16 -; CHECK-V6M-NEXT: strb r1, [r0, #2] -; CHECK-V6M-NEXT: lsrs r1, r2, #8 -; CHECK-V6M-NEXT: strb r1, [r0, #1] -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: lsrs r0, r3, #8 +; CHECK-V6M-NEXT: strb r0, [r1, #5] +; CHECK-V6M-NEXT: mov r0, r2 +; CHECK-V6M-NEXT: bl __aeabi_uwrite4 +; CHECK-V6M-NEXT: pop {r7, pc} ; ; CHECK-V7M-LABEL: store6_align1: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: lsrs r1, r3, #8 +; CHECK-V7M-NEXT: .save {r7, lr} +; CHECK-V7M-NEXT: push {r7, lr} +; CHECK-V7M-NEXT: mov r1, r0 ; CHECK-V7M-NEXT: strb r3, [r0, #4] -; CHECK-V7M-NEXT: strb r1, [r0, #5] -; CHECK-V7M-NEXT: lsrs r1, r2, #24 -; CHECK-V7M-NEXT: strb r1, [r0, #3] -; CHECK-V7M-NEXT: lsrs r1, r2, #16 -; CHECK-V7M-NEXT: strb r1, [r0, #2] -; CHECK-V7M-NEXT: lsrs r1, r2, #8 -; CHECK-V7M-NEXT: strb r2, [r0] -; CHECK-V7M-NEXT: strb r1, [r0, #1] -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: lsrs r0, r3, #8 +; CHECK-V7M-NEXT: strb r0, [r1, #5] +; CHECK-V7M-NEXT: mov r0, r2 +; CHECK-V7M-NEXT: bl __aeabi_uwrite4 +; CHECK-V7M-NEXT: pop {r7, pc} ; ; CHECK-ALIGNED-LABEL: store6_align1: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -410,51 +350,23 @@ define void @loadstore8_align4(double* %a, double* %b) nounwind optsize minsize { ; CHECK-V6M-LABEL: loadstore8_align4: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-V6M-NEXT: .pad #4 -; CHECK-V6M-NEXT: push {r3, r4, r5, r6, r7, lr} -; CHECK-V6M-NEXT: ldrb r2, [r0] -; CHECK-V6M-NEXT: str r2, [sp] @ 4-byte Spill -; CHECK-V6M-NEXT: ldrb r3, [r0, #1] -; CHECK-V6M-NEXT: ldrb r4, [r0, #2] -; CHECK-V6M-NEXT: ldrb r5, [r0, #3] -; CHECK-V6M-NEXT: ldrb r6, [r0, #4] -; CHECK-V6M-NEXT: ldrb r7, [r0, #5] -; CHECK-V6M-NEXT: ldrb r2, [r0, #6] -; CHECK-V6M-NEXT: ldrb r0, [r0, #7] -; CHECK-V6M-NEXT: strb r0, [r1, #7] -; CHECK-V6M-NEXT: strb r2, [r1, #6] -; CHECK-V6M-NEXT: strb r7, [r1, #5] -; CHECK-V6M-NEXT: strb r6, [r1, #4] -; CHECK-V6M-NEXT: strb r5, [r1, #3] -; CHECK-V6M-NEXT: strb r4, [r1, #2] -; CHECK-V6M-NEXT: strb r3, [r1, #1] -; CHECK-V6M-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-V6M-NEXT: strb r0, [r1] -; CHECK-V6M-NEXT: pop {r3, r4, r5, r6, r7, pc} +; CHECK-V6M-NEXT: .save {r4, lr} +; CHECK-V6M-NEXT: push {r4, lr} +; CHECK-V6M-NEXT: mov r4, r1 +; CHECK-V6M-NEXT: bl __aeabi_uread8 +; CHECK-V6M-NEXT: mov r2, r4 +; CHECK-V6M-NEXT: bl __aeabi_uwrite8 +; CHECK-V6M-NEXT: pop {r4, pc} ; ; CHECK-V7M-LABEL: loadstore8_align4: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: .save {r4, r5, r6, lr} -; CHECK-V7M-NEXT: push {r4, r5, r6, lr} -; CHECK-V7M-NEXT: mov r2, r0 -; CHECK-V7M-NEXT: ldrb r3, [r0] -; CHECK-V7M-NEXT: ldrb lr, [r2, #4]! -; CHECK-V7M-NEXT: ldrb r4, [r0, #1] -; CHECK-V7M-NEXT: ldrb r5, [r0, #2] -; CHECK-V7M-NEXT: ldrb r6, [r0, #3] -; CHECK-V7M-NEXT: ldrb r0, [r0, #5] -; CHECK-V7M-NEXT: ldrb.w r12, [r2, #2] -; CHECK-V7M-NEXT: ldrb r2, [r2, #3] -; CHECK-V7M-NEXT: strb r0, [r1, #5] -; CHECK-V7M-NEXT: strb r6, [r1, #3] -; CHECK-V7M-NEXT: strb r5, [r1, #2] -; CHECK-V7M-NEXT: strb r4, [r1, #1] -; CHECK-V7M-NEXT: strb r3, [r1] -; CHECK-V7M-NEXT: strb lr, [r1, #4]! -; CHECK-V7M-NEXT: strb r2, [r1, #3] -; CHECK-V7M-NEXT: strb.w r12, [r1, #2] -; CHECK-V7M-NEXT: pop {r4, r5, r6, pc} +; CHECK-V7M-NEXT: .save {r4, lr} +; CHECK-V7M-NEXT: push {r4, lr} +; CHECK-V7M-NEXT: mov r4, r1 +; CHECK-V7M-NEXT: bl __aeabi_uread8 +; CHECK-V7M-NEXT: mov r2, r4 +; CHECK-V7M-NEXT: bl __aeabi_uwrite8 +; CHECK-V7M-NEXT: pop {r4, pc} ; ; CHECK-ALIGNED-LABEL: loadstore8_align4: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -472,47 +384,17 @@ define double @load8_align1(double* %a) nounwind optsize minsize { ; CHECK-V6M-LABEL: load8_align1: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: ldrb r1, [r0] -; CHECK-V6M-NEXT: ldrb r2, [r0, #1] -; CHECK-V6M-NEXT: lsls r2, r2, #8 -; CHECK-V6M-NEXT: adds r1, r2, r1 -; CHECK-V6M-NEXT: ldrb r2, [r0, #2] -; CHECK-V6M-NEXT: ldrb r3, [r0, #3] -; CHECK-V6M-NEXT: lsls r3, r3, #8 -; CHECK-V6M-NEXT: adds r2, r3, r2 -; CHECK-V6M-NEXT: lsls r2, r2, #16 -; CHECK-V6M-NEXT: adds r2, r2, r1 -; CHECK-V6M-NEXT: ldrb r1, [r0, #4] -; CHECK-V6M-NEXT: ldrb r3, [r0, #5] -; CHECK-V6M-NEXT: lsls r3, r3, #8 -; CHECK-V6M-NEXT: adds r1, r3, r1 -; CHECK-V6M-NEXT: ldrb r3, [r0, #6] -; CHECK-V6M-NEXT: ldrb r0, [r0, #7] -; CHECK-V6M-NEXT: lsls r0, r0, #8 -; CHECK-V6M-NEXT: adds r0, r0, r3 -; CHECK-V6M-NEXT: lsls r0, r0, #16 -; CHECK-V6M-NEXT: adds r1, r0, r1 -; CHECK-V6M-NEXT: mov r0, r2 -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: .save {r7, lr} +; CHECK-V6M-NEXT: push {r7, lr} +; CHECK-V6M-NEXT: bl __aeabi_uread8 +; CHECK-V6M-NEXT: pop {r7, pc} ; ; CHECK-V7M-LABEL: load8_align1: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: ldrb r1, [r0, #3] -; CHECK-V7M-NEXT: ldrb r3, [r0, #2] -; CHECK-V7M-NEXT: ldrb r2, [r0, #1] -; CHECK-V7M-NEXT: ldrb.w r12, [r0] -; CHECK-V7M-NEXT: orr.w r1, r3, r1, lsl #8 -; CHECK-V7M-NEXT: orr.w r2, r12, r2, lsl #8 -; CHECK-V7M-NEXT: orr.w r2, r2, r1, lsl #16 -; CHECK-V7M-NEXT: ldrb r1, [r0, #5] -; CHECK-V7M-NEXT: ldrb r3, [r0, #4]! -; CHECK-V7M-NEXT: orr.w r1, r3, r1, lsl #8 -; CHECK-V7M-NEXT: ldrb r3, [r0, #2] -; CHECK-V7M-NEXT: ldrb r0, [r0, #3] -; CHECK-V7M-NEXT: orr.w r0, r3, r0, lsl #8 -; CHECK-V7M-NEXT: orr.w r1, r1, r0, lsl #16 -; CHECK-V7M-NEXT: mov r0, r2 -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .save {r7, lr} +; CHECK-V7M-NEXT: push {r7, lr} +; CHECK-V7M-NEXT: bl __aeabi_uread8 +; CHECK-V7M-NEXT: pop {r7, pc} ; ; CHECK-ALIGNED-LABEL: load8_align1: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -528,28 +410,17 @@ define i32 @load8_align1_trunc(i64* %a) nounwind optsize minsize { ; CHECK-V6M-LABEL: load8_align1_trunc: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: ldrb r1, [r0] -; CHECK-V6M-NEXT: ldrb r2, [r0, #1] -; CHECK-V6M-NEXT: lsls r2, r2, #8 -; CHECK-V6M-NEXT: adds r1, r2, r1 -; CHECK-V6M-NEXT: ldrb r2, [r0, #2] -; CHECK-V6M-NEXT: ldrb r0, [r0, #3] -; CHECK-V6M-NEXT: lsls r0, r0, #8 -; CHECK-V6M-NEXT: adds r0, r0, r2 -; CHECK-V6M-NEXT: lsls r0, r0, #16 -; CHECK-V6M-NEXT: adds r0, r0, r1 -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: .save {r7, lr} +; CHECK-V6M-NEXT: push {r7, lr} +; CHECK-V6M-NEXT: bl __aeabi_uread4 +; CHECK-V6M-NEXT: pop {r7, pc} ; ; CHECK-V7M-LABEL: load8_align1_trunc: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: ldrb r1, [r0] -; CHECK-V7M-NEXT: ldrb r2, [r0, #1] -; CHECK-V7M-NEXT: ldrb r3, [r0, #2] -; CHECK-V7M-NEXT: ldrb r0, [r0, #3] -; CHECK-V7M-NEXT: orr.w r1, r1, r2, lsl #8 -; CHECK-V7M-NEXT: orr.w r0, r3, r0, lsl #8 -; CHECK-V7M-NEXT: orr.w r0, r1, r0, lsl #16 -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .save {r7, lr} +; CHECK-V7M-NEXT: push {r7, lr} +; CHECK-V7M-NEXT: bl __aeabi_uread4 +; CHECK-V7M-NEXT: pop {r7, pc} ; ; CHECK-ALIGNED-LABEL: load8_align1_trunc: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -564,40 +435,25 @@ define void @store8_align1(double* %a, double %b) nounwind optsize minsize { ; CHECK-V6M-LABEL: store8_align1: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: strb r3, [r0, #4] -; CHECK-V6M-NEXT: strb r2, [r0] -; CHECK-V6M-NEXT: lsrs r1, r3, #24 -; CHECK-V6M-NEXT: strb r1, [r0, #7] -; CHECK-V6M-NEXT: lsrs r1, r3, #16 -; CHECK-V6M-NEXT: strb r1, [r0, #6] -; CHECK-V6M-NEXT: lsrs r1, r3, #8 -; CHECK-V6M-NEXT: strb r1, [r0, #5] -; CHECK-V6M-NEXT: lsrs r1, r2, #24 -; CHECK-V6M-NEXT: strb r1, [r0, #3] -; CHECK-V6M-NEXT: lsrs r1, r2, #16 -; CHECK-V6M-NEXT: strb r1, [r0, #2] -; CHECK-V6M-NEXT: lsrs r1, r2, #8 -; CHECK-V6M-NEXT: strb r1, [r0, #1] -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: .save {r7, lr} +; CHECK-V6M-NEXT: push {r7, lr} +; CHECK-V6M-NEXT: mov r1, r3 +; CHECK-V6M-NEXT: mov r3, r0 +; CHECK-V6M-NEXT: mov r0, r2 +; CHECK-V6M-NEXT: mov r2, r3 +; CHECK-V6M-NEXT: bl __aeabi_uwrite8 +; CHECK-V6M-NEXT: pop {r7, pc} ; ; CHECK-V7M-LABEL: store8_align1: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: mov r12, r0 -; CHECK-V7M-NEXT: lsrs r1, r3, #24 -; CHECK-V7M-NEXT: strb r3, [r12, #4]! -; CHECK-V7M-NEXT: strb r2, [r0] -; CHECK-V7M-NEXT: strb.w r1, [r12, #3] -; CHECK-V7M-NEXT: lsrs r1, r3, #16 -; CHECK-V7M-NEXT: strb.w r1, [r12, #2] -; CHECK-V7M-NEXT: lsrs r1, r3, #8 -; CHECK-V7M-NEXT: strb r1, [r0, #5] -; CHECK-V7M-NEXT: lsrs r1, r2, #24 -; CHECK-V7M-NEXT: strb r1, [r0, #3] -; CHECK-V7M-NEXT: lsrs r1, r2, #16 -; CHECK-V7M-NEXT: strb r1, [r0, #2] -; CHECK-V7M-NEXT: lsrs r1, r2, #8 -; CHECK-V7M-NEXT: strb r1, [r0, #1] -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .save {r7, lr} +; CHECK-V7M-NEXT: push {r7, lr} +; CHECK-V7M-NEXT: mov r1, r3 +; CHECK-V7M-NEXT: mov r3, r0 +; CHECK-V7M-NEXT: mov r0, r2 +; CHECK-V7M-NEXT: mov r2, r3 +; CHECK-V7M-NEXT: bl __aeabi_uwrite8 +; CHECK-V7M-NEXT: pop {r7, pc} ; ; CHECK-ALIGNED-LABEL: store8_align1: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -612,27 +468,17 @@ define double @load8_align2(double* %a) nounwind optsize minsize { ; CHECK-V6M-LABEL: load8_align2: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: ldrh r1, [r0] -; CHECK-V6M-NEXT: ldrh r2, [r0, #2] -; CHECK-V6M-NEXT: lsls r2, r2, #16 -; CHECK-V6M-NEXT: adds r2, r2, r1 -; CHECK-V6M-NEXT: ldrh r1, [r0, #4] -; CHECK-V6M-NEXT: ldrh r0, [r0, #6] -; CHECK-V6M-NEXT: lsls r0, r0, #16 -; CHECK-V6M-NEXT: adds r1, r0, r1 -; CHECK-V6M-NEXT: mov r0, r2 -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: .save {r7, lr} +; CHECK-V6M-NEXT: push {r7, lr} +; CHECK-V6M-NEXT: bl __aeabi_uread8 +; CHECK-V6M-NEXT: pop {r7, pc} ; ; CHECK-V7M-LABEL: load8_align2: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: ldrh r1, [r0, #2] -; CHECK-V7M-NEXT: ldrh r2, [r0] -; CHECK-V7M-NEXT: orr.w r2, r2, r1, lsl #16 -; CHECK-V7M-NEXT: ldrh r1, [r0, #6] -; CHECK-V7M-NEXT: ldrh r0, [r0, #4] -; CHECK-V7M-NEXT: orr.w r1, r0, r1, lsl #16 -; CHECK-V7M-NEXT: mov r0, r2 -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .save {r7, lr} +; CHECK-V7M-NEXT: push {r7, lr} +; CHECK-V7M-NEXT: bl __aeabi_uread8 +; CHECK-V7M-NEXT: pop {r7, pc} ; ; CHECK-ALIGNED-LABEL: load8_align2: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -648,23 +494,25 @@ define void @store8_align2(double* %a, double %b) nounwind optsize minsize { ; CHECK-V6M-LABEL: store8_align2: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: strh r3, [r0, #4] -; CHECK-V6M-NEXT: strh r2, [r0] -; CHECK-V6M-NEXT: lsrs r1, r3, #16 -; CHECK-V6M-NEXT: strh r1, [r0, #6] -; CHECK-V6M-NEXT: lsrs r1, r2, #16 -; CHECK-V6M-NEXT: strh r1, [r0, #2] -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: .save {r7, lr} +; CHECK-V6M-NEXT: push {r7, lr} +; CHECK-V6M-NEXT: mov r1, r3 +; CHECK-V6M-NEXT: mov r3, r0 +; CHECK-V6M-NEXT: mov r0, r2 +; CHECK-V6M-NEXT: mov r2, r3 +; CHECK-V6M-NEXT: bl __aeabi_uwrite8 +; CHECK-V6M-NEXT: pop {r7, pc} ; ; CHECK-V7M-LABEL: store8_align2: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: lsrs r1, r3, #16 -; CHECK-V7M-NEXT: strh r3, [r0, #4] -; CHECK-V7M-NEXT: strh r1, [r0, #6] -; CHECK-V7M-NEXT: lsrs r1, r2, #16 -; CHECK-V7M-NEXT: strh r2, [r0] -; CHECK-V7M-NEXT: strh r1, [r0, #2] -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .save {r7, lr} +; CHECK-V7M-NEXT: push {r7, lr} +; CHECK-V7M-NEXT: mov r1, r3 +; CHECK-V7M-NEXT: mov r3, r0 +; CHECK-V7M-NEXT: mov r0, r2 +; CHECK-V7M-NEXT: mov r2, r3 +; CHECK-V7M-NEXT: bl __aeabi_uwrite8 +; CHECK-V7M-NEXT: pop {r7, pc} ; ; CHECK-ALIGNED-LABEL: store8_align2: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -724,47 +572,17 @@ define i64 @load12_align1_trunc(i96* %a) nounwind optsize minsize { ; CHECK-V6M-LABEL: load12_align1_trunc: ; CHECK-V6M: @ %bb.0: @ %entry -; CHECK-V6M-NEXT: ldrb r1, [r0] -; CHECK-V6M-NEXT: ldrb r2, [r0, #1] -; CHECK-V6M-NEXT: lsls r2, r2, #8 -; CHECK-V6M-NEXT: adds r1, r2, r1 -; CHECK-V6M-NEXT: ldrb r2, [r0, #2] -; CHECK-V6M-NEXT: ldrb r3, [r0, #3] -; CHECK-V6M-NEXT: lsls r3, r3, #8 -; CHECK-V6M-NEXT: adds r2, r3, r2 -; CHECK-V6M-NEXT: lsls r2, r2, #16 -; CHECK-V6M-NEXT: adds r2, r2, r1 -; CHECK-V6M-NEXT: ldrb r1, [r0, #4] -; CHECK-V6M-NEXT: ldrb r3, [r0, #5] -; CHECK-V6M-NEXT: lsls r3, r3, #8 -; CHECK-V6M-NEXT: adds r1, r3, r1 -; CHECK-V6M-NEXT: ldrb r3, [r0, #6] -; CHECK-V6M-NEXT: ldrb r0, [r0, #7] -; CHECK-V6M-NEXT: lsls r0, r0, #8 -; CHECK-V6M-NEXT: adds r0, r0, r3 -; CHECK-V6M-NEXT: lsls r0, r0, #16 -; CHECK-V6M-NEXT: adds r1, r0, r1 -; CHECK-V6M-NEXT: mov r0, r2 -; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: .save {r7, lr} +; CHECK-V6M-NEXT: push {r7, lr} +; CHECK-V6M-NEXT: bl __aeabi_uread8 +; CHECK-V6M-NEXT: pop {r7, pc} ; ; CHECK-V7M-LABEL: load12_align1_trunc: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: ldrb r1, [r0, #3] -; CHECK-V7M-NEXT: ldrb r3, [r0, #2] -; CHECK-V7M-NEXT: ldrb r2, [r0, #1] -; CHECK-V7M-NEXT: ldrb.w r12, [r0] -; CHECK-V7M-NEXT: orr.w r1, r3, r1, lsl #8 -; CHECK-V7M-NEXT: orr.w r2, r12, r2, lsl #8 -; CHECK-V7M-NEXT: orr.w r2, r2, r1, lsl #16 -; CHECK-V7M-NEXT: ldrb r1, [r0, #5] -; CHECK-V7M-NEXT: ldrb r3, [r0, #4]! -; CHECK-V7M-NEXT: orr.w r1, r3, r1, lsl #8 -; CHECK-V7M-NEXT: ldrb r3, [r0, #2] -; CHECK-V7M-NEXT: ldrb r0, [r0, #3] -; CHECK-V7M-NEXT: orr.w r0, r3, r0, lsl #8 -; CHECK-V7M-NEXT: orr.w r1, r1, r0, lsl #16 -; CHECK-V7M-NEXT: mov r0, r2 -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .save {r7, lr} +; CHECK-V7M-NEXT: push {r7, lr} +; CHECK-V7M-NEXT: bl __aeabi_uread8 +; CHECK-V7M-NEXT: pop {r7, pc} ; ; CHECK-ALIGNED-LABEL: load12_align1_trunc: ; CHECK-ALIGNED: @ %bb.0: @ %entry @@ -783,57 +601,30 @@ ; CHECK-V6M: @ %bb.0: @ %entry ; CHECK-V6M-NEXT: .save {r4, lr} ; CHECK-V6M-NEXT: push {r4, lr} -; CHECK-V6M-NEXT: strb r3, [r0, #4] -; CHECK-V6M-NEXT: strb r2, [r0] -; CHECK-V6M-NEXT: ldr r1, [sp, #8] -; CHECK-V6M-NEXT: strb r1, [r0, #8] -; CHECK-V6M-NEXT: lsrs r4, r3, #24 -; CHECK-V6M-NEXT: strb r4, [r0, #7] -; CHECK-V6M-NEXT: lsrs r4, r3, #16 -; CHECK-V6M-NEXT: strb r4, [r0, #6] -; CHECK-V6M-NEXT: lsrs r3, r3, #8 -; CHECK-V6M-NEXT: strb r3, [r0, #5] -; CHECK-V6M-NEXT: lsrs r3, r2, #24 -; CHECK-V6M-NEXT: strb r3, [r0, #3] -; CHECK-V6M-NEXT: lsrs r3, r2, #16 -; CHECK-V6M-NEXT: strb r3, [r0, #2] -; CHECK-V6M-NEXT: lsrs r2, r2, #8 -; CHECK-V6M-NEXT: strb r2, [r0, #1] -; CHECK-V6M-NEXT: lsrs r2, r1, #24 -; CHECK-V6M-NEXT: strb r2, [r0, #11] -; CHECK-V6M-NEXT: lsrs r2, r1, #16 -; CHECK-V6M-NEXT: strb r2, [r0, #10] -; CHECK-V6M-NEXT: lsrs r1, r1, #8 -; CHECK-V6M-NEXT: strb r1, [r0, #9] +; CHECK-V6M-NEXT: mov r1, r3 +; CHECK-V6M-NEXT: mov r4, r0 +; CHECK-V6M-NEXT: mov r0, r2 +; CHECK-V6M-NEXT: mov r2, r4 +; CHECK-V6M-NEXT: bl __aeabi_uwrite8 +; CHECK-V6M-NEXT: adds r4, #8 +; CHECK-V6M-NEXT: ldr r0, [sp, #8] +; CHECK-V6M-NEXT: mov r1, r4 +; CHECK-V6M-NEXT: bl __aeabi_uwrite4 ; CHECK-V6M-NEXT: pop {r4, pc} ; ; CHECK-V7M-LABEL: store12_align4_trunc: ; CHECK-V7M: @ %bb.0: @ %entry -; CHECK-V7M-NEXT: mov r12, r0 -; CHECK-V7M-NEXT: lsrs r1, r3, #24 -; CHECK-V7M-NEXT: strb r3, [r12, #4]! -; CHECK-V7M-NEXT: strb r2, [r0] -; CHECK-V7M-NEXT: strb.w r1, [r12, #3] -; CHECK-V7M-NEXT: lsrs r1, r3, #16 -; CHECK-V7M-NEXT: strb.w r1, [r12, #2] -; CHECK-V7M-NEXT: lsrs r1, r3, #8 -; CHECK-V7M-NEXT: strb r1, [r0, #5] -; CHECK-V7M-NEXT: lsrs r1, r2, #24 -; CHECK-V7M-NEXT: strb r1, [r0, #3] -; CHECK-V7M-NEXT: lsrs r1, r2, #16 -; CHECK-V7M-NEXT: strb r1, [r0, #2] -; CHECK-V7M-NEXT: lsrs r1, r2, #8 -; CHECK-V7M-NEXT: strb r1, [r0, #1] -; CHECK-V7M-NEXT: mov r2, r0 -; CHECK-V7M-NEXT: ldr r1, [sp] -; CHECK-V7M-NEXT: strb r1, [r2, #8]! -; CHECK-V7M-NEXT: lsrs r3, r1, #24 -; CHECK-V7M-NEXT: strb r3, [r2, #3] -; CHECK-V7M-NEXT: lsrs r3, r1, #16 -; CHECK-V7M-NEXT: lsrs r1, r1, #8 -; CHECK-V7M-NEXT: strb r3, [r2, #2] -; CHECK-V7M-NEXT: strb r1, [r0, #9] -; CHECK-V7M-NEXT: bx lr +; CHECK-V7M-NEXT: .save {r4, lr} +; CHECK-V7M-NEXT: push {r4, lr} +; CHECK-V7M-NEXT: mov r4, r0 +; CHECK-V7M-NEXT: mov r0, r2 +; CHECK-V7M-NEXT: mov r1, r3 +; CHECK-V7M-NEXT: mov r2, r4 +; CHECK-V7M-NEXT: bl __aeabi_uwrite8 +; CHECK-V7M-NEXT: ldr r0, [sp, #8] +; CHECK-V7M-NEXT: add.w r1, r4, #8 +; CHECK-V7M-NEXT: bl __aeabi_uwrite4 +; CHECK-V7M-NEXT: pop {r4, pc} ; ; CHECK-ALIGNED-LABEL: store12_align4_trunc: ; CHECK-ALIGNED: @ %bb.0: @ %entry