Index: llvm/include/llvm/Target/TargetSelectionDAG.td =================================================================== --- llvm/include/llvm/Target/TargetSelectionDAG.td +++ llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1113,6 +1113,16 @@ let IsStore = 1; let MemoryVT = f32; } +def pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i8; +} +def pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i16; +} def post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (istore node:$val, node:$ptr, node:$offset), [{ @@ -1150,6 +1160,16 @@ let IsStore = 1; let MemoryVT = f32; } +def post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i8; +} +def post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i16; +} def nonvolatile_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ Index: llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -146,9 +146,12 @@ SDValue &OffImm); bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, SDValue &OffImm); - template - bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, - SDValue &OffImm); + template + bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); + bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, + unsigned Shift); + template + bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm); bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); @@ -179,6 +182,7 @@ bool tryARMIndexedLoad(SDNode *N); bool tryT1IndexedLoad(SDNode *N); bool tryT2IndexedLoad(SDNode *N); + bool tryMVEIndexedLoad(SDNode *N); /// SelectVLD - Select NEON load intrinsics. NumVecs should be /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for @@ -1307,6 +1311,31 @@ return true; } +template +bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, + SDValue &OffImm) { + return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); +} + +bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, + SDValue &OffImm, + unsigned Shift) { + unsigned Opcode = Op->getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast(Op)->getAddressingMode() + : cast(Op)->getAddressingMode(); + int RHSC; + if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits. + OffImm = + ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) + ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) + : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), + MVT::i32); + return true; + } + return false; +} + bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm) { @@ -1565,6 +1594,68 @@ return false; } +bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { + LoadSDNode *LD = cast(N); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM == ISD::UNINDEXED) + return false; + EVT LoadedVT = LD->getMemoryVT(); + if (!LoadedVT.isVector()) + return false; + bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; + SDValue Offset; + bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); + unsigned Opcode = 0; + unsigned Align = LD->getAlignment(); + bool IsLE = Subtarget->isLittle(); + + if (Align >= 2 && LoadedVT == MVT::v4i16 && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) { + if (isSExtLd) + Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; + else + Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; + } else if (LoadedVT == MVT::v8i8 && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { + if (isSExtLd) + Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; + else + Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; + } else if (LoadedVT == MVT::v4i8 && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) { + if (isSExtLd) + Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; + else + Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; + } else if (Align >= 4 && + (IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2)) + Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; + else if (Align >= 2 && + (IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) + Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; + else if ((IsLE || LoadedVT == MVT::v16i8) && + SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) + Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; + else + return false; + + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[] = {Base, Offset, + CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32), + CurDAG->getRegister(0, MVT::i32), Chain}; + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0), + MVT::i32, MVT::Other, Ops); + transferMemOperands(N, New); + ReplaceUses(SDValue(N, 0), SDValue(New, 1)); + ReplaceUses(SDValue(N, 1), SDValue(New, 0)); + ReplaceUses(SDValue(N, 2), SDValue(New, 2)); + CurDAG->RemoveDeadNode(N); + return true; +} + /// Form a GPRPair pseudo register from a pair of GPR regs. SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { SDLoc dl(V0.getNode()); @@ -2987,6 +3078,8 @@ return; } case ISD::LOAD: { + if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) + return; if (Subtarget->isThumb() && Subtarget->hasThumb2()) { if (tryT2IndexedLoad(N)) return; Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -272,6 +272,13 @@ setOperationAction(ISD::FP_TO_SINT, VT, Expand); setOperationAction(ISD::FP_TO_UINT, VT, Expand); } + + // Pre and Post inc are supported on loads and stores + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, VT, Legal); + setIndexedStoreAction(im, VT, Legal); + } } const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 }; @@ -290,6 +297,13 @@ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); setOperationAction(ISD::SETCC, VT, Custom); + // Pre and Post inc are supported on loads and stores + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, VT, Legal); + setIndexedStoreAction(im, VT, Legal); + } + if (HasMVEFP) { setOperationAction(ISD::FMINNUM, VT, Legal); setOperationAction(ISD::FMAXNUM, VT, Legal); @@ -337,6 +351,17 @@ setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); + // Pre and Post inc on these are legal, given the correct extends + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, MVT::v8i8, Legal); + setIndexedStoreAction(im, MVT::v8i8, Legal); + setIndexedLoadAction(im, MVT::v4i8, Legal); + setIndexedStoreAction(im, MVT::v4i8, Legal); + setIndexedLoadAction(im, MVT::v4i16, Legal); + setIndexedStoreAction(im, MVT::v4i16, Legal); + } + // Predicate types const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1}; for (auto VT : pTypes) { @@ -14659,6 +14684,52 @@ return false; } +static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align, + bool isSEXTLoad, bool isLE, SDValue &Base, + SDValue &Offset, bool &isInc, + SelectionDAG &DAG) { + if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) + return false; + if (!isa(Ptr->getOperand(1))) + return false; + + ConstantSDNode *RHS = cast(Ptr->getOperand(1)); + int RHSC = (int)RHS->getZExtValue(); + + auto IsInRange = [&](int RHSC, int Limit, int Scale) { + if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) { + assert(Ptr->getOpcode() == ISD::ADD); + isInc = false; + Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); + return true; + } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) { + isInc = Ptr->getOpcode() == ISD::ADD; + Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0)); + return true; + } + return false; + }; + + // Try to find a matching instruction based on s/zext, Alignment, Offset and + // (in BE) type. + Base = Ptr->getOperand(0); + if (VT == MVT::v4i16) { + if (Align >= 2 && IsInRange(RHSC, 0x80, 2)) + return true; + } else if (VT == MVT::v4i8 || VT == MVT::v8i8) { + if (IsInRange(RHSC, 0x80, 1)) + return true; + } else if (Align >= 4 && (isLE || VT == MVT::v4i32 || VT == MVT::v4f32) && + IsInRange(RHSC, 0x80, 4)) + return true; + else if (Align >= 2 && (isLE || VT == MVT::v8i16 || VT == MVT::v8f16) && + IsInRange(RHSC, 0x80, 2)) + return true; + else if ((isLE || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1)) + return true; + return false; +} + /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. @@ -14672,25 +14743,35 @@ EVT VT; SDValue Ptr; + unsigned Align; bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); - VT = LD->getMemoryVT(); + VT = LD->getMemoryVT(); + Align = LD->getAlignment(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); - VT = ST->getMemoryVT(); + VT = ST->getMemoryVT(); + Align = ST->getAlignment(); } else return false; bool isInc; bool isLegal = false; - if (Subtarget->isThumb2()) - isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, - Offset, isInc, DAG); - else - isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, - Offset, isInc, DAG); + if (VT.isVector()) + isLegal = Subtarget->hasMVEIntegerOps() && + getMVEIndexedAddressParts(Ptr.getNode(), VT, Align, isSEXTLoad, + Subtarget->isLittle(), Base, Offset, + isInc, DAG); + else { + if (Subtarget->isThumb2()) + isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, + Offset, isInc, DAG); + else + isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, + Offset, isInc, DAG); + } if (!isLegal) return false; @@ -14708,15 +14789,18 @@ SelectionDAG &DAG) const { EVT VT; SDValue Ptr; + unsigned Align; bool isSEXTLoad = false, isNonExt; if (LoadSDNode *LD = dyn_cast(N)) { - VT = LD->getMemoryVT(); + VT = LD->getMemoryVT(); Ptr = LD->getBasePtr(); + Align = LD->getAlignment(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD; } else if (StoreSDNode *ST = dyn_cast(N)) { - VT = ST->getMemoryVT(); + VT = ST->getMemoryVT(); Ptr = ST->getBasePtr(); + Align = ST->getAlignment(); isNonExt = !ST->isTruncatingStore(); } else return false; @@ -14739,12 +14823,19 @@ bool isInc; bool isLegal = false; - if (Subtarget->isThumb2()) - isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, - isInc, DAG); - else - isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + if (VT.isVector()) + isLegal = Subtarget->hasMVEIntegerOps() && + getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad, + Subtarget->isLittle(), Base, Offset, isInc, DAG); + else { + if (Subtarget->isThumb2()) + isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + else + isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + } if (!isLegal) return false; Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -221,7 +221,9 @@ def t2am_imm7shift1OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<1>; def t2am_imm7shift2OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<2>; -class t2am_imm7_offset : MemOperand { +class t2am_imm7_offset : MemOperand, + ComplexPattern", + [], [SDNPWantRoot]> { // They are printed the same way as the imm8 version let PrintMethod = "printT2AddrModeImm8OffsetOperand"; let ParserMatchClass = @@ -4751,6 +4753,38 @@ def : MVE_unpred_vector_load_typed; } +class MVE_unpred_vector_offset_store_typed + : Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset:$addr), + (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset:$addr)>; + +multiclass MVE_unpred_vector_offset_store { + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; +} +def aligned32_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (pre_store node:$val, node:$ptr, node:$offset), [{ + return cast(N)->getAlignment() >= 4; +}]>; +def aligned32_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (post_store node:$val, node:$ptr, node:$offset), [{ + return cast(N)->getAlignment() >= 4; +}]>; +def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (pre_store node:$val, node:$ptr, node:$offset), [{ + return cast(N)->getAlignment() == 2; +}]>; +def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), + (post_store node:$val, node:$ptr, node:$offset), [{ + return cast(N)->getAlignment() == 2; +}]>; + let Predicates = [HasMVEInt, IsLE] in { defm : MVE_unpred_vector_store; defm : MVE_unpred_vector_store; @@ -4759,6 +4793,13 @@ defm : MVE_unpred_vector_load; defm : MVE_unpred_vector_load; defm : MVE_unpred_vector_load; + + defm : MVE_unpred_vector_offset_store; + defm : MVE_unpred_vector_offset_store; + defm : MVE_unpred_vector_offset_store; + defm : MVE_unpred_vector_offset_store; + defm : MVE_unpred_vector_offset_store; + defm : MVE_unpred_vector_offset_store; } let Predicates = [HasMVEInt, IsBE] in { @@ -4799,6 +4840,17 @@ (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; def : Pat<(store (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr), (MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>; + + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; + def : MVE_unpred_vector_offset_store_typed; } let Predicates = [HasMVEInt] in { @@ -4816,6 +4868,10 @@ let MinAlignment = 2 in { def truncstorevi16_align2 : PatFrag<(ops node:$val, node:$ptr), (truncstorevi16 node:$val, node:$ptr)>; + def post_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncstvi16 node:$val, node:$base, node:$offset)>; + def pre_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncstvi16 node:$val, node:$base, node:$offset)>; } let Predicates = [HasMVEInt] in { @@ -4825,6 +4881,20 @@ (MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr)>; def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr), (MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr)>; + + def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(post_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(post_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), + (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; + + def : Pat<(pre_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(pre_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(pre_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), + (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; } Index: llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll +++ llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll @@ -4,8 +4,7 @@ define i8* @ldrwu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrw.u32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -20,8 +19,7 @@ define i8* @ldrwu32_3(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u8 q0, [r0], #3 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -36,8 +34,7 @@ define i8* @ldrwu32_m4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_m4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: subs r0, #4 +; CHECK-NEXT: vldrw.u32 q0, [r0], #-4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -52,8 +49,7 @@ define i8* @ldrwu32_508(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vldrw.u32 q0, [r0], #508 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -84,8 +80,7 @@ define i8* @ldrwu32_m508(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_m508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vldrw.u32 q0, [r0], #-508 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -117,8 +112,7 @@ define i8* @ldrhu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.u32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -151,8 +145,7 @@ define i8* @ldrhu32_2(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrh.u32 q0, [r0], #2 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -168,8 +161,7 @@ define i8* @ldrhu32_254(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu32_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vldrh.u32 q0, [r0], #254 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -203,8 +195,7 @@ define i8* @ldrhs32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhs32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.s32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.s32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -237,8 +228,7 @@ define i8* @ldrhs32_2(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhs32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.s32 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrh.s32 q0, [r0], #2 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -254,8 +244,7 @@ define i8* @ldrhs32_254(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhs32_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.s32 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vldrh.s32 q0, [r0], #254 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -289,8 +278,7 @@ define i8* @ldrhu16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.u16 q0, [r0], #4 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -305,8 +293,7 @@ define i8* @ldrhu16_3(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u8 q0, [r0], #3 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -321,8 +308,7 @@ define i8* @ldrhu16_2(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrh.u16 q0, [r0], #2 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -337,8 +323,7 @@ define i8* @ldrhu16_254(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vldrh.u16 q0, [r0], #254 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -370,8 +355,7 @@ define i8* @ldrbu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.u32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -387,8 +371,7 @@ define i8* @ldrbu32_3(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u32 q0, [r0], #3 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -404,8 +387,7 @@ define i8* @ldrbu32_127(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu32_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrb.u32 q0, [r0], #127 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -439,8 +421,7 @@ define i8* @ldrbs32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.s32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -456,8 +437,7 @@ define i8* @ldrbs32_3(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s32 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.s32 q0, [r0], #3 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -473,8 +453,7 @@ define i8* @ldrbs32_127(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs32_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s32 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrb.s32 q0, [r0], #127 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -508,8 +487,7 @@ define i8* @ldrbu16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.u16 q0, [r0], #4 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -525,8 +503,7 @@ define i8* @ldrbu16_3(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u16 q0, [r0], #3 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -542,8 +519,7 @@ define i8* @ldrbu16_127(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu16_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrb.u16 q0, [r0], #127 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -577,8 +553,7 @@ define i8* @ldrbs16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.s16 q0, [r0], #4 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -594,8 +569,7 @@ define i8* @ldrbs16_3(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s16 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.s16 q0, [r0], #3 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -611,8 +585,7 @@ define i8* @ldrbs16_127(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs16_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s16 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrb.s16 q0, [r0], #127 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -646,8 +619,7 @@ define i8* @ldrbu8_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu8_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.u8 q0, [r0], #4 ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -662,8 +634,7 @@ define i8* @ldrbu8_3(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu8_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u8 q0, [r0], #3 ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -678,8 +649,7 @@ define i8* @ldrbu8_127(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu8_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrb.u8 q0, [r0], #127 ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -710,8 +680,7 @@ define i8* @ldrwf32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwf32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrw.u32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -726,8 +695,7 @@ define i8* @ldrwf16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwf16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.u16 q0, [r0], #4 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -742,8 +710,7 @@ define i8* @ldrwi32_align1(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwi32_align1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u8 q0, [r0], #3 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -758,8 +725,7 @@ define i8* @ldrhi16_align1(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhi16_align1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u8 q0, [r0], #3 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -798,8 +764,7 @@ define i8* @ldrf32_align1(i8* %x, i8* %y) { ; CHECK-LABEL: ldrf32_align1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u8 q0, [r0], #3 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -814,8 +779,7 @@ define i8* @ldrf16_align1(i8* %x, i8* %y) { ; CHECK-LABEL: ldrf16_align1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u8 q0, [r0], #3 ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -835,8 +799,7 @@ ; CHECK-LABEL: strw32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -851,8 +814,7 @@ ; CHECK-LABEL: strw32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -867,8 +829,7 @@ ; CHECK-LABEL: strw32_m4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: subs r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r0], #-4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -4 @@ -883,8 +844,7 @@ ; CHECK-LABEL: strw32_508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vstrw.32 q0, [r0], #508 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 508 @@ -915,8 +875,7 @@ ; CHECK-LABEL: strw32_m508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vstrw.32 q0, [r0], #-508 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -508 @@ -948,8 +907,7 @@ ; CHECK-LABEL: strh32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vstrh.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.32 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -980,8 +938,7 @@ ; CHECK-LABEL: strh32_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vstrh.32 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrh.32 q0, [r0], #2 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -996,8 +953,7 @@ ; CHECK-LABEL: strh32_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vstrh.32 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrh.32 q0, [r0], #254 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 @@ -1029,8 +985,7 @@ ; CHECK-LABEL: strh16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrh.16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1045,8 +1000,7 @@ ; CHECK-LABEL: strh16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrh.16 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1061,8 +1015,7 @@ ; CHECK-LABEL: strh16_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrh.16 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrb.8 q0, [r0], #2 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -1077,8 +1030,7 @@ ; CHECK-LABEL: strh16_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrh.16 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrh.16 q0, [r0], #254 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 @@ -1110,8 +1062,7 @@ ; CHECK-LABEL: strb32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] -; CHECK-NEXT: vstrb.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.32 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1126,8 +1077,7 @@ ; CHECK-LABEL: strb32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] -; CHECK-NEXT: vstrb.32 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.32 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1142,8 +1092,7 @@ ; CHECK-LABEL: strb32_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] -; CHECK-NEXT: vstrb.32 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrb.32 q0, [r0], #127 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -1175,8 +1124,7 @@ ; CHECK-LABEL: strb16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] -; CHECK-NEXT: vstrb.16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.16 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1191,8 +1139,7 @@ ; CHECK-LABEL: strb16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] -; CHECK-NEXT: vstrb.16 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.16 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1207,8 +1154,7 @@ ; CHECK-LABEL: strb16_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] -; CHECK-NEXT: vstrb.16 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrb.16 q0, [r0], #127 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -1240,8 +1186,7 @@ ; CHECK-LABEL: strb8_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1256,8 +1201,7 @@ ; CHECK-LABEL: strb8_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1272,8 +1216,7 @@ ; CHECK-LABEL: strb8_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrb.8 q0, [r0], #127 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -1304,8 +1247,7 @@ ; CHECK-LABEL: strf32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1320,8 +1262,7 @@ ; CHECK-LABEL: strf16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrh.16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1336,8 +1277,7 @@ ; CHECK-LABEL: strwi32_align1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1352,8 +1292,7 @@ ; CHECK-LABEL: strhi16_align1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1392,8 +1331,7 @@ ; CHECK-LABEL: strf32_align1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1408,8 +1346,7 @@ ; CHECK-LABEL: strf16_align1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 Index: llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll +++ llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll @@ -4,8 +4,7 @@ define i8* @ldrwu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrw.u32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -20,8 +19,7 @@ define i8* @ldrwu32_3(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -36,8 +34,7 @@ define i8* @ldrwu32_m4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_m4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #-4] -; CHECK-NEXT: subs r0, #4 +; CHECK-NEXT: vldrw.u32 q0, [r0, #-4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -52,8 +49,7 @@ define i8* @ldrwu32_508(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: add.w r0, r0, #508 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrw.u32 q0, [r0, #508]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -84,8 +80,7 @@ define i8* @ldrwu32_m508(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwu32_m508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub.w r0, r0, #508 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrw.u32 q0, [r0, #-508]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -117,8 +112,7 @@ define i8* @ldrhu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.u32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -151,8 +145,7 @@ define i8* @ldrhu32_2(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrh.u32 q0, [r0, #2]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -168,8 +161,7 @@ define i8* @ldrhu32_254(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu32_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #254 -; CHECK-NEXT: vldrh.u32 q0, [r0] +; CHECK-NEXT: vldrh.u32 q0, [r0, #254]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -203,8 +195,7 @@ define i8* @ldrhs32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhs32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.s32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.s32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -237,8 +228,7 @@ define i8* @ldrhs32_2(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhs32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.s32 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrh.s32 q0, [r0, #2]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -254,8 +244,7 @@ define i8* @ldrhs32_254(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhs32_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #254 -; CHECK-NEXT: vldrh.s32 q0, [r0] +; CHECK-NEXT: vldrh.s32 q0, [r0, #254]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -289,8 +278,7 @@ define i8* @ldrhu16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.u16 q0, [r0, #4]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -305,8 +293,7 @@ define i8* @ldrhu16_3(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vldrh.u16 q0, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -321,8 +308,7 @@ define i8* @ldrhu16_2(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrh.u16 q0, [r0, #2]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -337,8 +323,7 @@ define i8* @ldrhu16_254(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhu16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #254 -; CHECK-NEXT: vldrh.u16 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r0, #254]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -370,8 +355,7 @@ define i8* @ldrbu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.u32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -387,8 +371,7 @@ define i8* @ldrbu32_3(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u32 q0, [r0, #3]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -404,8 +387,7 @@ define i8* @ldrbu32_127(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu32_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vldrb.u32 q0, [r0] +; CHECK-NEXT: vldrb.u32 q0, [r0, #127]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -439,8 +421,7 @@ define i8* @ldrbs32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.s32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -456,8 +437,7 @@ define i8* @ldrbs32_3(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s32 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.s32 q0, [r0, #3]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -473,8 +453,7 @@ define i8* @ldrbs32_127(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs32_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vldrb.s32 q0, [r0] +; CHECK-NEXT: vldrb.s32 q0, [r0, #127]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -508,8 +487,7 @@ define i8* @ldrbu16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.u16 q0, [r0, #4]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -525,8 +503,7 @@ define i8* @ldrbu16_3(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u16 q0, [r0, #3]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -542,8 +519,7 @@ define i8* @ldrbu16_127(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu16_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vldrb.u16 q0, [r0] +; CHECK-NEXT: vldrb.u16 q0, [r0, #127]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -577,8 +553,7 @@ define i8* @ldrbs16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.s16 q0, [r0, #4]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -594,8 +569,7 @@ define i8* @ldrbs16_3(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s16 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.s16 q0, [r0, #3]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -611,8 +585,7 @@ define i8* @ldrbs16_127(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbs16_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vldrb.s16 q0, [r0] +; CHECK-NEXT: vldrb.s16 q0, [r0, #127]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -646,8 +619,7 @@ define i8* @ldrbu8_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu8_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.u8 q0, [r0, #4]! ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -662,8 +634,7 @@ define i8* @ldrbu8_3(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu8_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -678,8 +649,7 @@ define i8* @ldrbu8_127(i8* %x, i8* %y) { ; CHECK-LABEL: ldrbu8_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vldrb.u8 q0, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r0, #127]! ; CHECK-NEXT: vstrb.8 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -710,8 +680,7 @@ define i8* @ldrwf32_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwf32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrw.u32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -726,8 +695,7 @@ define i8* @ldrwf16_4(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwf16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.u16 q0, [r0, #4]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -742,8 +710,7 @@ define i8* @ldrwi32_align1(i8* %x, i8* %y) { ; CHECK-LABEL: ldrwi32_align1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -758,8 +725,7 @@ define i8* @ldrhi16_align1(i8* %x, i8* %y) { ; CHECK-LABEL: ldrhi16_align1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -798,8 +764,7 @@ define i8* @ldrf32_align1(i8* %x, i8* %y) { ; CHECK-LABEL: ldrf32_align1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -814,8 +779,7 @@ define i8* @ldrf16_align1(i8* %x, i8* %y) { ; CHECK-LABEL: ldrf16_align1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! ; CHECK-NEXT: vstrh.16 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -835,8 +799,7 @@ ; CHECK-LABEL: strw32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -850,9 +813,8 @@ define i8* @strw32_3(i8* %y, i8* %x) { ; CHECK-LABEL: strw32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vstrb.8 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -867,8 +829,7 @@ ; CHECK-LABEL: strw32_m4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #-4] -; CHECK-NEXT: subs r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r0, #-4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -4 @@ -882,9 +843,8 @@ define i8* @strw32_508(i8* %y, i8* %x) { ; CHECK-LABEL: strw32_508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: add.w r0, r0, #508 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r0, #508]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 508 @@ -914,9 +874,8 @@ define i8* @strw32_m508(i8* %y, i8* %x) { ; CHECK-LABEL: strw32_m508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub.w r0, r0, #508 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r0, #-508]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -508 @@ -948,8 +907,7 @@ ; CHECK-LABEL: strh32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vstrh.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.32 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -980,8 +938,7 @@ ; CHECK-LABEL: strh32_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vstrh.32 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrh.32 q0, [r0, #2]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -995,9 +952,8 @@ define i8* @strh32_254(i8* %y, i8* %x) { ; CHECK-LABEL: strh32_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #254 ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vstrh.32 q0, [r0] +; CHECK-NEXT: vstrh.32 q0, [r0, #254]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 @@ -1029,8 +985,7 @@ ; CHECK-LABEL: strh16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrh.16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1044,9 +999,8 @@ define i8* @strh16_3(i8* %y, i8* %x) { ; CHECK-LABEL: strh16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrh.16 q0, [r0] +; CHECK-NEXT: vstrb.8 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1061,8 +1015,7 @@ ; CHECK-LABEL: strh16_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrh.16 q0, [r0, #2] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrb.8 q0, [r0, #2]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -1076,9 +1029,8 @@ define i8* @strh16_254(i8* %y, i8* %x) { ; CHECK-LABEL: strh16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #254 ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrh.16 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r0, #254]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 @@ -1110,8 +1062,7 @@ ; CHECK-LABEL: strb32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] -; CHECK-NEXT: vstrb.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.32 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1126,8 +1077,7 @@ ; CHECK-LABEL: strb32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] -; CHECK-NEXT: vstrb.32 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.32 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1141,9 +1091,8 @@ define i8* @strb32_127(i8* %y, i8* %x) { ; CHECK-LABEL: strb32_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 ; CHECK-NEXT: vldrb.u32 q0, [r1] -; CHECK-NEXT: vstrb.32 q0, [r0] +; CHECK-NEXT: vstrb.32 q0, [r0, #127]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -1175,8 +1124,7 @@ ; CHECK-LABEL: strb16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] -; CHECK-NEXT: vstrb.16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.16 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1191,8 +1139,7 @@ ; CHECK-LABEL: strb16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] -; CHECK-NEXT: vstrb.16 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.16 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1206,9 +1153,8 @@ define i8* @strb16_127(i8* %y, i8* %x) { ; CHECK-LABEL: strb16_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 ; CHECK-NEXT: vldrb.u16 q0, [r1] -; CHECK-NEXT: vstrb.16 q0, [r0] +; CHECK-NEXT: vstrb.16 q0, [r0, #127]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -1240,8 +1186,7 @@ ; CHECK-LABEL: strb8_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1256,8 +1201,7 @@ ; CHECK-LABEL: strb8_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u8 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1271,9 +1215,8 @@ define i8* @strb8_127(i8* %y, i8* %x) { ; CHECK-LABEL: strb8_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 ; CHECK-NEXT: vldrb.u8 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0] +; CHECK-NEXT: vstrb.8 q0, [r0, #127]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -1304,8 +1247,7 @@ ; CHECK-LABEL: strf32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1320,8 +1262,7 @@ ; CHECK-LABEL: strf16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrh.16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1336,8 +1277,7 @@ ; CHECK-LABEL: strwi32_align1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1352,8 +1292,7 @@ ; CHECK-LABEL: strhi16_align1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1391,8 +1330,7 @@ ; CHECK-LABEL: strf32_align1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1407,8 +1345,7 @@ ; CHECK-LABEL: strf16_align1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vstrb.8 q0, [r0, #3] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 Index: llvm/test/CodeGen/Thumb2/mve-ldst-regimm.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-ldst-regimm.ll +++ llvm/test/CodeGen/Thumb2/mve-ldst-regimm.ll @@ -55,8 +55,8 @@ define hidden void @fwd_float16_t(%struct.s_float16_t* noalias nocapture %v) local_unnamed_addr #0 { ; CHECK-LABEL: fwd_float16_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r0] -; CHECK-NEXT: vstrh.16 q0, [r0, #16] +; CHECK-NEXT: vldrh.u16 q0, [r0], #16 +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: bx lr entry: %arrayidx3 = getelementptr inbounds %struct.s_float16_t, %struct.s_float16_t* %v, i32 0, i32 1, i32 0 @@ -130,8 +130,8 @@ define hidden void @bwd_float16_t(%struct.s_float16_t* noalias nocapture %v) local_unnamed_addr #0 { ; CHECK-LABEL: bwd_float16_t: ; CHECK: @ %bb.0: @ %for.end -; CHECK-NEXT: vldrh.u16 q0, [r0] -; CHECK-NEXT: vstrh.16 q0, [r0, #-16] +; CHECK-NEXT: vldrh.u16 q0, [r0], #-16 +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: bx lr for.end: %0 = bitcast %struct.s_float16_t* %v to <8 x half>*