Index: llvm/include/llvm/Target/TargetSelectionDAG.td =================================================================== --- llvm/include/llvm/Target/TargetSelectionDAG.td +++ llvm/include/llvm/Target/TargetSelectionDAG.td @@ -1105,6 +1105,16 @@ let IsStore = 1; let MemoryVT = f32; } +def pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i8; +} +def pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i16; +} def post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (istore node:$val, node:$ptr, node:$offset), [{ @@ -1142,6 +1152,16 @@ let IsStore = 1; let MemoryVT = f32; } +def post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i8; +} +def post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let ScalarMemoryVT = i16; +} def nonvolatile_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ Index: llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -147,6 +147,11 @@ bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, SDValue &OffImm); template + bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, + SDValue &OffImm); + bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, + SDValue &OffImm, unsigned Shift); + template bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, @@ -1300,6 +1305,28 @@ return true; } +template +bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, + SDValue &OffImm) { + return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); +} + +bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, + SDValue &OffImm, unsigned Shift) { + unsigned Opcode = Op->getOpcode(); + ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) + ? cast(Op)->getAddressingMode() + : cast(Op)->getAddressingMode(); + int RHSC; + if (isScaledConstantInRange(N, 1<getTargetConstant(RHSC * (1<getTargetConstant(-RHSC * (1<getOffset(), Offset, Shift)) { + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[] = {Base, + Offset, + CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32), + CurDAG->getRegister(0, MVT::i32), + Chain}; + SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0), MVT::i32, + MVT::Other, Ops); + transferMemOperands(N, New); + ReplaceUses(SDValue(N, 0), SDValue(New, 1)); + ReplaceUses(SDValue(N, 1), SDValue(New, 0)); + ReplaceUses(SDValue(N, 2), SDValue(New, 2)); + CurDAG->RemoveDeadNode(N); + return true; + } + + return false; + } + if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { switch (LoadedVT.getSimpleVT().SimpleTy) { case MVT::i32: @@ -1540,10 +1629,7 @@ default: return false; } - Match = true; - } - if (Match) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -255,6 +255,13 @@ setOperationAction(ISD::FP_TO_SINT, VT, Expand); setOperationAction(ISD::FP_TO_UINT, VT, Expand); } + + // Pre and Post inc are supported on loads and stores + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, VT, Legal); + setIndexedStoreAction(im, VT, Legal); + } } const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 }; @@ -273,6 +280,13 @@ setOperationAction(ISD::LOAD, VT, Legal); setOperationAction(ISD::STORE, VT, Legal); + // Pre and Post inc are supported on loads and stores + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, VT, Legal); + setIndexedStoreAction(im, VT, Legal); + } + if (HasMVEFP) { setOperationAction(ISD::FMINNUM, VT, Legal); setOperationAction(ISD::FMAXNUM, VT, Legal); @@ -313,6 +327,17 @@ setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); + + // Pre and Post inc on these are legal, given the correct extends + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, MVT::v8i8, Legal); + setIndexedStoreAction(im, MVT::v8i8, Legal); + setIndexedLoadAction(im, MVT::v4i8, Legal); + setIndexedStoreAction(im, MVT::v4i8, Legal); + setIndexedLoadAction(im, MVT::v4i16, Legal); + setIndexedStoreAction(im, MVT::v4i16, Legal); + } } ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, @@ -13783,15 +13808,38 @@ if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) return false; + int limit = 0x100; // 8 bits + int scale = 1; + + // MVE has different bitwidths and scales + if (VT.isSimple() && VT.isVector()) { + limit = 0x80; // 7 bits + switch (VT.getSimpleVT().getVectorElementType().SimpleTy) { + case MVT::i32: + case MVT::f32: + scale = 4; + break; + case MVT::i16: + case MVT::f16: + scale = 2; + break; + case MVT::i8: + scale = 1; + break; + default: + return false; + } + } + Base = Ptr->getOperand(0); if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); - if (RHSC < 0 && RHSC > -0x100) { // 8 bits. + if (RHSC < 0 && RHSC > -limit*scale && RHSC % scale == 0) { assert(Ptr->getOpcode() == ISD::ADD); isInc = false; Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0)); return true; - } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. + } else if (RHSC > 0 && RHSC < limit*scale && RHSC % scale == 0) { isInc = Ptr->getOpcode() == ISD::ADD; Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0)); return true; Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -221,7 +221,9 @@ def t2am_imm7shift1OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<1>; def t2am_imm7shift2OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<2>; -class t2am_imm7_offset : MemOperand { +class t2am_imm7_offset : MemOperand, + ComplexPattern", + [], [SDNPWantRoot]> { // They are printed the same way as the imm8 version let PrintMethod = "printT2AddrModeImm8OffsetOperand"; let ParserMatchClass = @@ -4325,6 +4327,48 @@ } +let Predicates = [HasMVEInt] in { + def : Pat<(post_store (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<2>:$addr), + (MVE_VSTRWU32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<2>:$addr)>; + def : Pat<(post_store (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), + (MVE_VSTRHU16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; + def : Pat<(post_store (v16i8 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRBU8_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + + def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(post_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(post_truncstvi16 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), + (MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; + + def : Pat<(pre_store (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<2>:$addr), + (MVE_VSTRWU32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<2>:$addr)>; + def : Pat<(pre_store (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), + (MVE_VSTRHU16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; + def : Pat<(pre_store (v16i8 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRBU8_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + + def : Pat<(pre_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(pre_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr), + (MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>; + def : Pat<(pre_truncstvi16 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), + (MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; + + def : Pat<(post_store (v4f32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<2>:$addr), + (MVE_VSTRWU32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<2>:$addr)>; + def : Pat<(post_store (v8f16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), + (MVE_VSTRHU16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; + + def : Pat<(pre_store (v4f32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<2>:$addr), + (MVE_VSTRWU32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<2>:$addr)>; + def : Pat<(pre_store (v8f16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr), + (MVE_VSTRHU16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>; +} + + + // Bit convert patterns let Predicates = [HasMVEInt] in { Index: llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll +++ llvm/test/CodeGen/Thumb2/mve-ldst-offset.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s define i8* @post_ldrwu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrwu32_4: Index: llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll +++ llvm/test/CodeGen/Thumb2/mve-ldst-postinc.ll @@ -1,11 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s define i8* @post_ldrwu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrwu32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrw.u32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -36,8 +35,7 @@ define i8* @post_ldrwu32_m4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrwu32_m4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: subs r0, #4 +; CHECK-NEXT: vldrw.u32 q0, [r0], #-4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -52,8 +50,7 @@ define i8* @post_ldrwu32_508(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrwu32_508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vldrw.u32 q0, [r0], #508 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -84,8 +81,7 @@ define i8* @post_ldrwu32_m508(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrwu32_m508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vldrw.u32 q0, [r0], #-508 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -117,8 +113,7 @@ define i8* @post_ldrhu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhu32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.u32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -151,8 +146,7 @@ define i8* @post_ldrhu32_2(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhu32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrh.u32 q0, [r0], #2 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -168,8 +162,7 @@ define i8* @post_ldrhu32_254(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhu32_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vldrh.u32 q0, [r0], #254 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -203,8 +196,7 @@ define i8* @post_ldrhs32_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhs32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.s32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.s32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -237,8 +229,7 @@ define i8* @post_ldrhs32_2(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhs32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.s32 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrh.s32 q0, [r0], #2 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -254,8 +245,7 @@ define i8* @post_ldrhs32_254(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhs32_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.s32 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vldrh.s32 q0, [r0], #254 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -289,8 +279,7 @@ define i8* @post_ldrhu16_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhu16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.u16 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -321,8 +310,7 @@ define i8* @post_ldrhu16_2(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhu16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vldrh.u16 q0, [r0], #2 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -337,8 +325,7 @@ define i8* @post_ldrhu16_254(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhu16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vldrh.u16 q0, [r0], #254 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -370,8 +357,7 @@ define i8* @post_ldrbu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.u32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -387,8 +373,7 @@ define i8* @post_ldrbu32_3(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u32 q0, [r0], #3 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -404,8 +389,7 @@ define i8* @post_ldrbu32_127(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu32_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrb.u32 q0, [r0], #127 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -439,8 +423,7 @@ define i8* @post_ldrbs32_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbs32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.s32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -456,8 +439,7 @@ define i8* @post_ldrbs32_3(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbs32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s32 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.s32 q0, [r0], #3 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -473,8 +455,7 @@ define i8* @post_ldrbs32_127(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbs32_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s32 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrb.s32 q0, [r0], #127 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -508,8 +489,7 @@ define i8* @post_ldrbu16_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.u16 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -525,8 +505,7 @@ define i8* @post_ldrbu16_3(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u16 q0, [r0], #3 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -542,8 +521,7 @@ define i8* @post_ldrbu16_127(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu16_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrb.u16 q0, [r0], #127 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -577,8 +555,7 @@ define i8* @post_ldrbs16_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbs16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.s16 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -594,8 +571,7 @@ define i8* @post_ldrbs16_3(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbs16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s16 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.s16 q0, [r0], #3 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -611,8 +587,7 @@ define i8* @post_ldrbs16_127(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbs16_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s16 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrb.s16 q0, [r0], #127 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -646,8 +621,7 @@ define i8* @post_ldrbu8_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu8_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.u8 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -662,8 +636,7 @@ define i8* @post_ldrbu8_3(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu8_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vldrb.u8 q0, [r0], #3 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -678,8 +651,7 @@ define i8* @post_ldrbu8_127(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu8_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vldrb.u8 q0, [r0], #127 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -710,8 +682,7 @@ define i8* @post_ldrwf32_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrwf32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrw.u32 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -726,8 +697,7 @@ define i8* @post_ldrwf16_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrwf16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.u16 q0, [r0], #4 ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -747,8 +717,7 @@ ; CHECK-LABEL: post_strw32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -779,8 +748,7 @@ ; CHECK-LABEL: post_strw32_m4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: subs r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r0], #-4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -4 @@ -795,8 +763,7 @@ ; CHECK-LABEL: post_strw32_508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: add.w r0, r0, #508 +; CHECK-NEXT: vstrw.32 q0, [r0], #508 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 508 @@ -827,8 +794,7 @@ ; CHECK-LABEL: post_strw32_m508: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: sub.w r0, r0, #508 +; CHECK-NEXT: vstrw.32 q0, [r0], #-508 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -508 @@ -860,8 +826,7 @@ ; CHECK-LABEL: post_strh32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vstrh.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.32 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -892,8 +857,7 @@ ; CHECK-LABEL: post_strh32_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vstrh.32 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrh.32 q0, [r0], #2 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -908,8 +872,7 @@ ; CHECK-LABEL: post_strh32_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vstrh.32 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrh.32 q0, [r0], #254 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 @@ -941,8 +904,7 @@ ; CHECK-LABEL: post_strh16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.16 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -973,8 +935,7 @@ ; CHECK-LABEL: post_strh16_2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: vstrh.16 q0, [r0], #2 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -989,8 +950,7 @@ ; CHECK-LABEL: post_strh16_254: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r0, #254 +; CHECK-NEXT: vstrh.16 q0, [r0], #254 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 @@ -1022,8 +982,7 @@ ; CHECK-LABEL: post_strb32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] -; CHECK-NEXT: vstrb.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.32 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1038,8 +997,7 @@ ; CHECK-LABEL: post_strb32_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] -; CHECK-NEXT: vstrb.32 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.32 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1054,8 +1012,7 @@ ; CHECK-LABEL: post_strb32_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] -; CHECK-NEXT: vstrb.32 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrb.32 q0, [r0], #127 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -1087,8 +1044,7 @@ ; CHECK-LABEL: post_strb16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] -; CHECK-NEXT: vstrb.16 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.16 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1103,8 +1059,7 @@ ; CHECK-LABEL: post_strb16_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] -; CHECK-NEXT: vstrb.16 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.16 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1119,8 +1074,7 @@ ; CHECK-LABEL: post_strb16_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] -; CHECK-NEXT: vstrb.16 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrb.16 q0, [r0], #127 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -1152,8 +1106,7 @@ ; CHECK-LABEL: post_strb8_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1168,8 +1121,7 @@ ; CHECK-LABEL: post_strb8_3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r0, #3 +; CHECK-NEXT: vstrb.8 q0, [r0], #3 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1184,8 +1136,7 @@ ; CHECK-LABEL: post_strb8_127: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r0, #127 +; CHECK-NEXT: vstrb.8 q0, [r0], #127 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -1216,8 +1167,7 @@ ; CHECK-LABEL: post_strf32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1232,8 +1182,7 @@ ; CHECK-LABEL: post_strf16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.16 q0, [r0], #4 ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 Index: llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll +++ llvm/test/CodeGen/Thumb2/mve-ldst-preinc.ll @@ -1,11 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s define i8* @post_ldrwu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrwu32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrw.u32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -36,8 +35,7 @@ define i8* @post_ldrwu32_m4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrwu32_m4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #-4] -; CHECK-NEXT: subs r0, #4 +; CHECK-NEXT: vldrw.u32 q0, [r0, #-4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -52,8 +50,7 @@ define i8* @post_ldrwu32_508(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrwu32_508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: add.w r0, r0, #508 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrw.u32 q0, [r0, #508]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -84,8 +81,7 @@ define i8* @post_ldrwu32_m508(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrwu32_m508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub.w r0, r0, #508 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrw.u32 q0, [r0, #-508]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -117,8 +113,7 @@ define i8* @post_ldrhu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhu32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.u32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -151,8 +146,7 @@ define i8* @post_ldrhu32_2(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhu32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #2 -; CHECK-NEXT: vldrh.u32 q0, [r0] +; CHECK-NEXT: vldrh.u32 q0, [r0, #2]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -168,8 +162,7 @@ define i8* @post_ldrhu32_254(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhu32_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #254 -; CHECK-NEXT: vldrh.u32 q0, [r0] +; CHECK-NEXT: vldrh.u32 q0, [r0, #254]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -203,8 +196,7 @@ define i8* @post_ldrhs32_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhs32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.s32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.s32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -237,8 +229,7 @@ define i8* @post_ldrhs32_2(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhs32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #2 -; CHECK-NEXT: vldrh.s32 q0, [r0] +; CHECK-NEXT: vldrh.s32 q0, [r0, #2]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -254,8 +245,7 @@ define i8* @post_ldrhs32_254(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhs32_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #254 -; CHECK-NEXT: vldrh.s32 q0, [r0] +; CHECK-NEXT: vldrh.s32 q0, [r0, #254]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -289,8 +279,7 @@ define i8* @post_ldrhu16_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhu16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.u16 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -321,8 +310,7 @@ define i8* @post_ldrhu16_2(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhu16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #2 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r0, #2]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -337,8 +325,7 @@ define i8* @post_ldrhu16_254(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrhu16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #254 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrh.u16 q0, [r0, #254]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -370,8 +357,7 @@ define i8* @post_ldrbu32_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.u32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -387,8 +373,7 @@ define i8* @post_ldrbu32_3(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vldrb.u32 q0, [r0] +; CHECK-NEXT: vldrb.u32 q0, [r0, #3]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -404,8 +389,7 @@ define i8* @post_ldrbu32_127(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu32_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vldrb.u32 q0, [r0] +; CHECK-NEXT: vldrb.u32 q0, [r0, #127]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -439,8 +423,7 @@ define i8* @post_ldrbs32_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbs32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.s32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -456,8 +439,7 @@ define i8* @post_ldrbs32_3(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbs32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vldrb.s32 q0, [r0] +; CHECK-NEXT: vldrb.s32 q0, [r0, #3]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -473,8 +455,7 @@ define i8* @post_ldrbs32_127(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbs32_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vldrb.s32 q0, [r0] +; CHECK-NEXT: vldrb.s32 q0, [r0, #127]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -508,8 +489,7 @@ define i8* @post_ldrbu16_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.u16 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -525,8 +505,7 @@ define i8* @post_ldrbu16_3(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vldrb.u16 q0, [r0] +; CHECK-NEXT: vldrb.u16 q0, [r0, #3]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -542,8 +521,7 @@ define i8* @post_ldrbu16_127(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu16_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vldrb.u16 q0, [r0] +; CHECK-NEXT: vldrb.u16 q0, [r0, #127]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -577,8 +555,7 @@ define i8* @post_ldrbs16_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbs16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.s16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.s16 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -594,8 +571,7 @@ define i8* @post_ldrbs16_3(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbs16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vldrb.s16 q0, [r0] +; CHECK-NEXT: vldrb.s16 q0, [r0, #3]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -611,8 +587,7 @@ define i8* @post_ldrbs16_127(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbs16_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vldrb.s16 q0, [r0] +; CHECK-NEXT: vldrb.s16 q0, [r0, #127]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -646,8 +621,7 @@ define i8* @post_ldrbu8_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu8_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrb.u8 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -662,8 +636,7 @@ define i8* @post_ldrbu8_3(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu8_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r0, #3]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -678,8 +651,7 @@ define i8* @post_ldrbu8_127(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrbu8_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 -; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: vldrb.u8 q0, [r0, #127]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -710,8 +682,7 @@ define i8* @post_ldrwf32_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrwf32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrw.u32 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -726,8 +697,7 @@ define i8* @post_ldrwf16_4(i8* %x, i8* %y) { ; CHECK-LABEL: post_ldrwf16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vldrh.u16 q0, [r0, #4]! ; CHECK-NEXT: vstrw.32 q0, [r1] ; CHECK-NEXT: bx lr entry: @@ -747,8 +717,7 @@ ; CHECK-LABEL: post_strw32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -779,8 +748,7 @@ ; CHECK-LABEL: post_strw32_m4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #-4] -; CHECK-NEXT: subs r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r0, #-4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -4 @@ -794,9 +762,8 @@ define i8* @post_strw32_508(i8* %y, i8* %x) { ; CHECK-LABEL: post_strw32_508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: add.w r0, r0, #508 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r0, #508]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 508 @@ -826,9 +793,8 @@ define i8* @post_strw32_m508(i8* %y, i8* %x) { ; CHECK-LABEL: post_strw32_m508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub.w r0, r0, #508 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vstrw.32 q0, [r0, #-508]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 -508 @@ -860,8 +826,7 @@ ; CHECK-LABEL: post_strh32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vstrh.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.32 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -891,9 +856,8 @@ define i8* @post_strh32_2(i8* %y, i8* %x) { ; CHECK-LABEL: post_strh32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #2 ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vstrh.32 q0, [r0] +; CHECK-NEXT: vstrh.32 q0, [r0, #2]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -907,9 +871,8 @@ define i8* @post_strh32_254(i8* %y, i8* %x) { ; CHECK-LABEL: post_strh32_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #254 ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vstrh.32 q0, [r0] +; CHECK-NEXT: vstrh.32 q0, [r0, #254]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 @@ -941,8 +904,7 @@ ; CHECK-LABEL: post_strh16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.16 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -972,9 +934,8 @@ define i8* @post_strh16_2(i8* %y, i8* %x) { ; CHECK-LABEL: post_strh16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #2 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r0, #2]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 2 @@ -988,9 +949,8 @@ define i8* @post_strh16_254(i8* %y, i8* %x) { ; CHECK-LABEL: post_strh16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #254 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vstrh.16 q0, [r0, #254]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 254 @@ -1022,8 +982,7 @@ ; CHECK-LABEL: post_strb32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u32 q0, [r1] -; CHECK-NEXT: vstrb.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.32 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1037,9 +996,8 @@ define i8* @post_strb32_3(i8* %y, i8* %x) { ; CHECK-LABEL: post_strb32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrb.u32 q0, [r1] -; CHECK-NEXT: vstrb.32 q0, [r0] +; CHECK-NEXT: vstrb.32 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1053,9 +1011,8 @@ define i8* @post_strb32_127(i8* %y, i8* %x) { ; CHECK-LABEL: post_strb32_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 ; CHECK-NEXT: vldrb.u32 q0, [r1] -; CHECK-NEXT: vstrb.32 q0, [r0] +; CHECK-NEXT: vstrb.32 q0, [r0, #127]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -1087,8 +1044,7 @@ ; CHECK-LABEL: post_strb16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrb.u16 q0, [r1] -; CHECK-NEXT: vstrb.16 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.16 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1102,9 +1058,8 @@ define i8* @post_strb16_3(i8* %y, i8* %x) { ; CHECK-LABEL: post_strb16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrb.u16 q0, [r1] -; CHECK-NEXT: vstrb.16 q0, [r0] +; CHECK-NEXT: vstrb.16 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1118,9 +1073,8 @@ define i8* @post_strb16_127(i8* %y, i8* %x) { ; CHECK-LABEL: post_strb16_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 ; CHECK-NEXT: vldrb.u16 q0, [r1] -; CHECK-NEXT: vstrb.16 q0, [r0] +; CHECK-NEXT: vstrb.16 q0, [r0, #127]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -1152,8 +1106,7 @@ ; CHECK-LABEL: post_strb8_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrb.8 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1167,9 +1120,8 @@ define i8* @post_strb8_3(i8* %y, i8* %x) { ; CHECK-LABEL: post_strb8_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vstrb.8 q0, [r0, #3]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 3 @@ -1183,9 +1135,8 @@ define i8* @post_strb8_127(i8* %y, i8* %x) { ; CHECK-LABEL: post_strb8_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #127 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0] +; CHECK-NEXT: vstrb.8 q0, [r0, #127]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 127 @@ -1216,8 +1167,7 @@ ; CHECK-LABEL: post_strf32_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrw.32 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 @@ -1232,8 +1182,7 @@ ; CHECK-LABEL: post_strf16_4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vstrw.32 q0, [r0, #4] -; CHECK-NEXT: adds r0, #4 +; CHECK-NEXT: vstrh.16 q0, [r0, #4]! ; CHECK-NEXT: bx lr entry: %z = getelementptr inbounds i8, i8* %y, i32 4 Index: llvm/test/CodeGen/Thumb2/mve-ldst-regimm.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-ldst-regimm.ll +++ llvm/test/CodeGen/Thumb2/mve-ldst-regimm.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s %struct.s_int8_t = type { [16 x i8], [16 x i8] } %struct.s_int16_t = type { [8 x i16], [8 x i16] } @@ -10,8 +10,8 @@ define hidden void @fwd_int8_t(%struct.s_int8_t* noalias %v) local_unnamed_addr #0 { ; CHECK-LABEL: fwd_int8_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r0] -; CHECK-NEXT: vstrb.8 q0, [r0, #16] +; CHECK-NEXT: vldrb.u8 q0, [r0], #16 +; CHECK-NEXT: vstrb.8 q0, [r0] ; CHECK-NEXT: bx lr entry: %arrayidx3 = getelementptr inbounds %struct.s_int8_t, %struct.s_int8_t* %v, i32 0, i32 1, i32 0 @@ -25,8 +25,8 @@ define hidden void @fwd_int16_t(%struct.s_int16_t* noalias nocapture %v) local_unnamed_addr #0 { ; CHECK-LABEL: fwd_int16_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r0] -; CHECK-NEXT: vstrh.16 q0, [r0, #16] +; CHECK-NEXT: vldrh.u16 q0, [r0], #16 +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: bx lr entry: %arrayidx3 = getelementptr inbounds %struct.s_int16_t, %struct.s_int16_t* %v, i32 0, i32 1, i32 0 @@ -40,8 +40,8 @@ define hidden void @fwd_int32_t(%struct.s_int32_t* noalias nocapture %v) local_unnamed_addr #0 { ; CHECK-LABEL: fwd_int32_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r0, #16] +; CHECK-NEXT: vldrw.u32 q0, [r0], #16 +; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: bx lr entry: %arrayidx3 = getelementptr inbounds %struct.s_int32_t, %struct.s_int32_t* %v, i32 0, i32 1, i32 0 @@ -55,8 +55,8 @@ define hidden void @fwd_float16_t(%struct.s_float16_t* noalias nocapture %v) local_unnamed_addr #0 { ; CHECK-LABEL: fwd_float16_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r0] -; CHECK-NEXT: vstrh.16 q0, [r0, #16] +; CHECK-NEXT: vldrh.u16 q0, [r0], #16 +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: bx lr entry: %arrayidx3 = getelementptr inbounds %struct.s_float16_t, %struct.s_float16_t* %v, i32 0, i32 1, i32 0 @@ -70,8 +70,8 @@ define hidden void @fwd_float32_t(%struct.s_float32_t* noalias nocapture %v) local_unnamed_addr #0 { ; CHECK-LABEL: fwd_float32_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r0, #16] +; CHECK-NEXT: vldrw.u32 q0, [r0], #16 +; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: bx lr entry: %d = getelementptr inbounds %struct.s_float32_t, %struct.s_float32_t* %v, i32 0, i32 1 @@ -85,8 +85,8 @@ define hidden void @bwd_int8_t(%struct.s_int8_t* noalias %v) local_unnamed_addr #0 { ; CHECK-LABEL: bwd_int8_t: ; CHECK: @ %bb.0: @ %for.end -; CHECK-NEXT: vldrb.u8 q0, [r0] -; CHECK-NEXT: vstrb.8 q0, [r0, #-16] +; CHECK-NEXT: vldrb.u8 q0, [r0], #-16 +; CHECK-NEXT: vstrb.8 q0, [r0] ; CHECK-NEXT: bx lr for.end: %0 = bitcast %struct.s_int8_t* %v to <16 x i8>* @@ -100,8 +100,8 @@ define hidden void @bwd_int16_t(%struct.s_int16_t* noalias nocapture %v) local_unnamed_addr #0 { ; CHECK-LABEL: bwd_int16_t: ; CHECK: @ %bb.0: @ %for.end -; CHECK-NEXT: vldrh.u16 q0, [r0] -; CHECK-NEXT: vstrh.16 q0, [r0, #-16] +; CHECK-NEXT: vldrh.u16 q0, [r0], #-16 +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: bx lr for.end: %0 = bitcast %struct.s_int16_t* %v to <8 x i16>* @@ -115,8 +115,8 @@ define hidden void @bwd_int32_t(%struct.s_int32_t* noalias nocapture %v) local_unnamed_addr #0 { ; CHECK-LABEL: bwd_int32_t: ; CHECK: @ %bb.0: @ %for.end -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r0, #-16] +; CHECK-NEXT: vldrw.u32 q0, [r0], #-16 +; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: bx lr for.end: %0 = bitcast %struct.s_int32_t* %v to <4 x i32>* @@ -130,8 +130,8 @@ define hidden void @bwd_float16_t(%struct.s_float16_t* noalias nocapture %v) local_unnamed_addr #0 { ; CHECK-LABEL: bwd_float16_t: ; CHECK: @ %bb.0: @ %for.end -; CHECK-NEXT: vldrh.u16 q0, [r0] -; CHECK-NEXT: vstrh.16 q0, [r0, #-16] +; CHECK-NEXT: vldrh.u16 q0, [r0], #-16 +; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: bx lr for.end: %0 = bitcast %struct.s_float16_t* %v to <8 x half>* @@ -145,8 +145,8 @@ define hidden void @bwd_float32_t(%struct.s_float32_t* noalias nocapture %v) local_unnamed_addr #0 { ; CHECK-LABEL: bwd_float32_t: ; CHECK: @ %bb.0: @ %for.end -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vstrw.32 q0, [r0, #-16] +; CHECK-NEXT: vldrw.u32 q0, [r0], #-16 +; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: bx lr for.end: %0 = bitcast %struct.s_float32_t* %v to <4 x i32>* Index: llvm/test/CodeGen/Thumb2/mve-loadstore.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-loadstore.ll +++ llvm/test/CodeGen/Thumb2/mve-loadstore.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4(<4 x i32>* %vp) { ; CHECK-LABEL: load_4xi32_a4: