diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2570,10 +2570,85 @@ case ARM::t2STRHi8: case ARM::t2STRHi12: return 1; + case ARM::MVE_VLDRBS16_post: + case ARM::MVE_VLDRBS32_post: + case ARM::MVE_VLDRBU16_post: + case ARM::MVE_VLDRBU32_post: + case ARM::MVE_VLDRHS32_post: + case ARM::MVE_VLDRHU32_post: + case ARM::MVE_VLDRBU8_post: + case ARM::MVE_VLDRHU16_post: + case ARM::MVE_VLDRWU32_post: + case ARM::MVE_VSTRB16_post: + case ARM::MVE_VSTRB32_post: + case ARM::MVE_VSTRH32_post: + case ARM::MVE_VSTRBU8_post: + case ARM::MVE_VSTRHU16_post: + case ARM::MVE_VSTRWU32_post: + case ARM::MVE_VLDRBS16_pre: + case ARM::MVE_VLDRBS32_pre: + case ARM::MVE_VLDRBU16_pre: + case ARM::MVE_VLDRBU32_pre: + case ARM::MVE_VLDRHS32_pre: + case ARM::MVE_VLDRHU32_pre: + case ARM::MVE_VLDRBU8_pre: + case ARM::MVE_VLDRHU16_pre: + case ARM::MVE_VLDRWU32_pre: + case ARM::MVE_VSTRB16_pre: + case ARM::MVE_VSTRB32_pre: + case ARM::MVE_VSTRH32_pre: + case ARM::MVE_VSTRBU8_pre: + case ARM::MVE_VSTRHU16_pre: + case ARM::MVE_VSTRWU32_pre: + return 2; } return -1; } +static bool isPostIndex(MachineInstr &MI) { + switch (MI.getOpcode()) { + case ARM::MVE_VLDRBS16_post: + case ARM::MVE_VLDRBS32_post: + case ARM::MVE_VLDRBU16_post: + case ARM::MVE_VLDRBU32_post: + case ARM::MVE_VLDRHS32_post: + case ARM::MVE_VLDRHU32_post: + case ARM::MVE_VLDRBU8_post: + case ARM::MVE_VLDRHU16_post: + case ARM::MVE_VLDRWU32_post: + case ARM::MVE_VSTRB16_post: + case ARM::MVE_VSTRB32_post: + case ARM::MVE_VSTRH32_post: + case ARM::MVE_VSTRBU8_post: + case ARM::MVE_VSTRHU16_post: + case ARM::MVE_VSTRWU32_post: + return true; + } + return false; +} + +static bool isPreIndex(MachineInstr &MI) { + switch (MI.getOpcode()) { + case ARM::MVE_VLDRBS16_pre: + case ARM::MVE_VLDRBS32_pre: + case ARM::MVE_VLDRBU16_pre: + case ARM::MVE_VLDRBU32_pre: + case ARM::MVE_VLDRHS32_pre: + case ARM::MVE_VLDRHU32_pre: + case ARM::MVE_VLDRBU8_pre: + case ARM::MVE_VLDRHU16_pre: + case ARM::MVE_VLDRWU32_pre: + case ARM::MVE_VSTRB16_pre: + case ARM::MVE_VSTRB32_pre: + case ARM::MVE_VSTRH32_pre: + case ARM::MVE_VSTRBU8_pre: + case ARM::MVE_VSTRHU16_pre: + case ARM::MVE_VSTRWU32_pre: + return true; + } + return false; +} + // Given a memory access Opcode, check that the give Imm would be a valid Offset // for this instruction (same as isLegalAddressImm), Or if the instruction // could be easily converted to one where that was valid. For example converting @@ -2703,19 +2778,26 @@ } // Given a Base Register, optimise the load/store uses to attempt to create more -// post-inc accesses. We do this by taking zero offset loads/stores with an add, -// and convert them to a postinc load/store of the same type. Any subsequent -// accesses will be adjusted to use and account for the post-inc value. +// post-inc accesses and less register moves. We do this by taking zero offset +// loads/stores with an add, and convert them to a postinc load/store of the +// same type. Any subsequent accesses will be adjusted to use and account for +// the post-inc value. // For example: // LDR #0 LDR_POSTINC #16 // LDR #4 LDR #-12 // LDR #8 LDR #-8 // LDR #12 LDR #-4 // ADD #16 +// +// At the same time if we do not find an increment but do find an existing +// pre/post inc instruction, we can still adjust the offsets of subsequent +// instructions to save the register move that would otherwise be needed for the +// in-place increment. bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) { // We are looking for: // One zero offset load/store that can become postinc MachineInstr *BaseAccess = nullptr; + MachineInstr *PrePostInc = nullptr; // An increment that can be folded in MachineInstr *Increment = nullptr; // Other accesses after BaseAccess that will need to be updated to use the @@ -2734,40 +2816,62 @@ if (!Use.getOperand(BaseOp).isReg() || Use.getOperand(BaseOp).getReg() != Base) return false; - if (Use.getOperand(BaseOp + 1).getImm() == 0) + if (isPreIndex(Use) || isPostIndex(Use)) + PrePostInc = &Use; + else if (Use.getOperand(BaseOp + 1).getImm() == 0) BaseAccess = &Use; else OtherAccesses.insert(&Use); } - if (!BaseAccess || !Increment || - BaseAccess->getParent() != Increment->getParent()) - return false; - Register PredReg; - if (Increment->definesRegister(ARM::CPSR) || - getInstrPredicate(*Increment, PredReg) != ARMCC::AL) - return false; + int IncrementOffset; + Register NewBaseReg; + if (BaseAccess && Increment) { + if (PrePostInc || BaseAccess->getParent() != Increment->getParent()) + return false; + Register PredReg; + if (Increment->definesRegister(ARM::CPSR) || + getInstrPredicate(*Increment, PredReg) != ARMCC::AL) + return false; + + LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg " + << Base.virtRegIndex() << "\n"); - LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg " - << Base.virtRegIndex() << "\n"); + // Make sure that Increment has no uses before BaseAccess. + for (MachineInstr &Use : + MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) { + if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) { + LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n"); + return false; + } + } - // Make sure that Increment has no uses before BaseAccess. - for (MachineInstr &Use : - MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) { - if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) { - LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n"); + // Make sure that Increment can be folded into Base + IncrementOffset = getAddSubImmediate(*Increment); + unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode( + BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub); + if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) { + LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n"); return false; } } + else if (PrePostInc) { + // If we already have a pre/post index load/store then set BaseAccess, + // IncrementOffset and NewBaseReg to the values it already produces, + // allowing us to update and subsequent uses of BaseOp reg with the + // incremented value. + if (Increment) + return false; - // Make sure that Increment can be folded into Base - int IncrementOffset = getAddSubImmediate(*Increment); - unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode( - BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub); - if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) { - LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n"); - return false; + LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already " + << "indexed VirtualReg " << Base.virtRegIndex() << "\n"); + int BaseOp = getBaseOperandIndex(*PrePostInc); + IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm(); + BaseAccess = PrePostInc; + NewBaseReg = PrePostInc->getOperand(0).getReg(); } + else + return false; // And make sure that the negative value of increment can be added to all // other offsets after the BaseAccess. We rely on either @@ -2801,16 +2905,18 @@ return false; } - // Replace BaseAccess with a post inc - LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump()); - LLVM_DEBUG(dbgs() << " And : "; Increment->dump()); - Register NewBaseReg = Increment->getOperand(0).getReg(); - MachineInstr *BaseAccessPost = - createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI); - BaseAccess->eraseFromParent(); - Increment->eraseFromParent(); - (void)BaseAccessPost; - LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump()); + if (!PrePostInc) { + // Replace BaseAccess with a post inc + LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump()); + LLVM_DEBUG(dbgs() << " And : "; Increment->dump()); + NewBaseReg = Increment->getOperand(0).getReg(); + MachineInstr *BaseAccessPost = + createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI); + BaseAccess->eraseFromParent(); + Increment->eraseFromParent(); + (void)BaseAccessPost; + LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump()); + } for (auto *Use : SuccessorAccesses) { LLVM_DEBUG(dbgs() << "Changing: "; Use->dump()); diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.mir b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.mir --- a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.mir +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.mir @@ -919,7 +919,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[MVE_VLDRWU32_post:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_post1:%[0-9]+]]:mqpr = MVE_VLDRWU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_post]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRWU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 @@ -947,7 +947,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[MVE_VLDRHU16_post:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_post1:%[0-9]+]]:mqpr = MVE_VLDRHU16_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRHU16_:%[0-9]+]]:mqpr = MVE_VLDRHU16 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRHU16_:%[0-9]+]]:mqpr = MVE_VLDRHU16 [[MVE_VLDRHU16_post]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRHU16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 @@ -975,7 +975,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:gprnopc = COPY $r0 ; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBU8_:%[0-9]+]]:mqpr = MVE_VLDRBU8 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBU8_:%[0-9]+]]:mqpr = MVE_VLDRBU8 [[MVE_VLDRBU8_post]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBU8_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:gprnopc = COPY $r0 @@ -1003,7 +1003,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VLDRBS32_post:%[0-9]+]]:tgpr, [[MVE_VLDRBS32_post1:%[0-9]+]]:mqpr = MVE_VLDRBS32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBS32_:%[0-9]+]]:mqpr = MVE_VLDRBS32 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBS32_:%[0-9]+]]:mqpr = MVE_VLDRBS32 [[MVE_VLDRBS32_post]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBS32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 @@ -1031,7 +1031,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VLDRBU32_post:%[0-9]+]]:tgpr, [[MVE_VLDRBU32_post1:%[0-9]+]]:mqpr = MVE_VLDRBU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBU32_:%[0-9]+]]:mqpr = MVE_VLDRBU32 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBU32_:%[0-9]+]]:mqpr = MVE_VLDRBU32 [[MVE_VLDRBU32_post]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 @@ -1059,7 +1059,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VLDRHS32_post:%[0-9]+]]:tgpr, [[MVE_VLDRHS32_post1:%[0-9]+]]:mqpr = MVE_VLDRHS32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRHS32_:%[0-9]+]]:mqpr = MVE_VLDRHS32 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRHS32_:%[0-9]+]]:mqpr = MVE_VLDRHS32 [[MVE_VLDRHS32_post]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRHS32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 @@ -1087,7 +1087,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VLDRHU32_post:%[0-9]+]]:tgpr, [[MVE_VLDRHU32_post1:%[0-9]+]]:mqpr = MVE_VLDRHU32_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRHU32_:%[0-9]+]]:mqpr = MVE_VLDRHU32 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRHU32_:%[0-9]+]]:mqpr = MVE_VLDRHU32 [[MVE_VLDRHU32_post]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRHU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 @@ -1115,7 +1115,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VLDRBS16_post:%[0-9]+]]:tgpr, [[MVE_VLDRBS16_post1:%[0-9]+]]:mqpr = MVE_VLDRBS16_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBS16_:%[0-9]+]]:mqpr = MVE_VLDRBS16 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBS16_:%[0-9]+]]:mqpr = MVE_VLDRBS16 [[MVE_VLDRBS16_post]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBS16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 @@ -1143,7 +1143,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VLDRBU16_post:%[0-9]+]]:tgpr, [[MVE_VLDRBU16_post1:%[0-9]+]]:mqpr = MVE_VLDRBU16_post [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBU16_:%[0-9]+]]:mqpr = MVE_VLDRBU16 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBU16_:%[0-9]+]]:mqpr = MVE_VLDRBU16 [[MVE_VLDRBU16_post]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBU16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 @@ -1172,7 +1172,7 @@ ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 ; CHECK: [[MVE_VSTRWU32_post:%[0-9]+]]:rgpr = MVE_VSTRWU32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRWU32 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRWU32 [[COPY]], [[MVE_VSTRWU32_post]], -16, 0, $noreg :: (store 16, align 8) ; CHECK: $r0 = COPY [[MVE_VSTRWU32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 @@ -1202,7 +1202,7 @@ ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 ; CHECK: [[MVE_VSTRHU16_post:%[0-9]+]]:rgpr = MVE_VSTRHU16_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRHU16 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRHU16 [[COPY]], [[MVE_VSTRHU16_post]], -16, 0, $noreg :: (store 16, align 8) ; CHECK: $r0 = COPY [[MVE_VSTRHU16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 @@ -1232,7 +1232,7 @@ ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 ; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRBU8 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRBU8 [[COPY]], [[MVE_VSTRBU8_post]], -16, 0, $noreg :: (store 16, align 8) ; CHECK: $r0 = COPY [[MVE_VSTRBU8_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 @@ -1262,7 +1262,7 @@ ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VSTRH32_post:%[0-9]+]]:tgpr = MVE_VSTRH32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRH32 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRH32 [[COPY]], [[MVE_VSTRH32_post]], -16, 0, $noreg :: (store 16, align 8) ; CHECK: $r0 = COPY [[MVE_VSTRH32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 @@ -1292,7 +1292,7 @@ ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VSTRB32_post:%[0-9]+]]:tgpr = MVE_VSTRB32_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB32 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRB32 [[COPY]], [[MVE_VSTRB32_post]], -16, 0, $noreg :: (store 16, align 8) ; CHECK: $r0 = COPY [[MVE_VSTRB32_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 @@ -1322,7 +1322,7 @@ ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VSTRB16_post:%[0-9]+]]:tgpr = MVE_VSTRB16_post [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_post]], -16, 0, $noreg :: (store 16, align 8) ; CHECK: $r0 = COPY [[MVE_VSTRB16_post]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 @@ -1351,7 +1351,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 ; CHECK: [[MVE_VLDRWU32_pre:%[0-9]+]]:rgpr, [[MVE_VLDRWU32_pre1:%[0-9]+]]:mqpr = MVE_VLDRWU32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRWU32_:%[0-9]+]]:mqpr = MVE_VLDRWU32 [[MVE_VLDRWU32_pre]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRWU32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:rgpr = COPY $r0 @@ -1379,7 +1379,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 ; CHECK: [[MVE_VLDRHU16_pre:%[0-9]+]]:rgpr, [[MVE_VLDRHU16_pre1:%[0-9]+]]:mqpr = MVE_VLDRHU16_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRHU16_:%[0-9]+]]:mqpr = MVE_VLDRHU16 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRHU16_:%[0-9]+]]:mqpr = MVE_VLDRHU16 [[MVE_VLDRHU16_pre]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRHU16_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:rgpr = COPY $r0 @@ -1407,7 +1407,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r0 ; CHECK: [[MVE_VLDRBU8_pre:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_pre1:%[0-9]+]]:mqpr = MVE_VLDRBU8_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBU8_:%[0-9]+]]:mqpr = MVE_VLDRBU8 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBU8_:%[0-9]+]]:mqpr = MVE_VLDRBU8 [[MVE_VLDRBU8_pre]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBU8_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:rgpr = COPY $r0 @@ -1435,7 +1435,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VLDRBS32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBS32_pre1:%[0-9]+]]:mqpr = MVE_VLDRBS32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBS32_:%[0-9]+]]:mqpr = MVE_VLDRBS32 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBS32_:%[0-9]+]]:mqpr = MVE_VLDRBS32 [[MVE_VLDRBS32_pre]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBS32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 @@ -1463,7 +1463,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VLDRBU32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBU32_pre1:%[0-9]+]]:mqpr = MVE_VLDRBU32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBU32_:%[0-9]+]]:mqpr = MVE_VLDRBU32 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBU32_:%[0-9]+]]:mqpr = MVE_VLDRBU32 [[MVE_VLDRBU32_pre]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBU32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 @@ -1491,7 +1491,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VLDRHS32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRHS32_pre1:%[0-9]+]]:mqpr = MVE_VLDRHS32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRHS32_:%[0-9]+]]:mqpr = MVE_VLDRHS32 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRHS32_:%[0-9]+]]:mqpr = MVE_VLDRHS32 [[MVE_VLDRHS32_pre]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRHS32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 @@ -1519,7 +1519,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VLDRHU32_pre:%[0-9]+]]:tgpr, [[MVE_VLDRHU32_pre1:%[0-9]+]]:mqpr = MVE_VLDRHU32_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRHU32_:%[0-9]+]]:mqpr = MVE_VLDRHU32 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRHU32_:%[0-9]+]]:mqpr = MVE_VLDRHU32 [[MVE_VLDRHU32_pre]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRHU32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 @@ -1547,7 +1547,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VLDRBS16_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBS16_pre1:%[0-9]+]]:mqpr = MVE_VLDRBS16_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBS16_:%[0-9]+]]:mqpr = MVE_VLDRBS16 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBS16_:%[0-9]+]]:mqpr = MVE_VLDRBS16 [[MVE_VLDRBS16_pre]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBS16_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 @@ -1575,7 +1575,7 @@ ; CHECK: liveins: $r0, $q0 ; CHECK: [[COPY:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VLDRBU16_pre:%[0-9]+]]:tgpr, [[MVE_VLDRBU16_pre1:%[0-9]+]]:mqpr = MVE_VLDRBU16_pre [[COPY]], 32, 0, $noreg :: (load 16, align 8) - ; CHECK: [[MVE_VLDRBU16_:%[0-9]+]]:mqpr = MVE_VLDRBU16 [[COPY]], 16, 0, $noreg :: (load 16, align 8) + ; CHECK: [[MVE_VLDRBU16_:%[0-9]+]]:mqpr = MVE_VLDRBU16 [[MVE_VLDRBU16_pre]], -16, 0, $noreg :: (load 16, align 8) ; CHECK: $r0 = COPY [[MVE_VLDRBU16_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %0:tgpr = COPY $r0 @@ -1604,7 +1604,7 @@ ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 ; CHECK: [[MVE_VSTRWU32_pre:%[0-9]+]]:rgpr = MVE_VSTRWU32_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRWU32 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRWU32 [[COPY]], [[MVE_VSTRWU32_pre]], -16, 0, $noreg :: (store 16, align 8) ; CHECK: $r0 = COPY [[MVE_VSTRWU32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 @@ -1634,7 +1634,7 @@ ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 ; CHECK: [[MVE_VSTRHU16_pre:%[0-9]+]]:rgpr = MVE_VSTRHU16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRHU16 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRHU16 [[COPY]], [[MVE_VSTRHU16_pre]], -16, 0, $noreg :: (store 16, align 8) ; CHECK: $r0 = COPY [[MVE_VSTRHU16_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 @@ -1664,7 +1664,7 @@ ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 ; CHECK: [[MVE_VSTRBU8_pre:%[0-9]+]]:rgpr = MVE_VSTRBU8_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRBU8 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRBU8 [[COPY]], [[MVE_VSTRBU8_pre]], -16, 0, $noreg :: (store 16, align 8) ; CHECK: $r0 = COPY [[MVE_VSTRBU8_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 @@ -1694,7 +1694,7 @@ ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VSTRH32_pre:%[0-9]+]]:tgpr = MVE_VSTRH32_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRH32 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRH32 [[COPY]], [[MVE_VSTRH32_pre]], -16, 0, $noreg :: (store 16, align 8) ; CHECK: $r0 = COPY [[MVE_VSTRH32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 @@ -1724,7 +1724,7 @@ ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VSTRB32_pre:%[0-9]+]]:tgpr = MVE_VSTRB32_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB32 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRB32 [[COPY]], [[MVE_VSTRB32_pre]], -16, 0, $noreg :: (store 16, align 8) ; CHECK: $r0 = COPY [[MVE_VSTRB32_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 @@ -1754,7 +1754,7 @@ ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], -16, 0, $noreg :: (store 16, align 8) ; CHECK: $r0 = COPY [[MVE_VSTRB16_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 @@ -1784,9 +1784,9 @@ ; CHECK: [[COPY:%[0-9]+]]:mqpr = COPY $q0 ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], -16, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], 34, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], -16, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], -48, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre]], 2, 0, $noreg :: (store 16, align 8) ; CHECK: $r0 = COPY [[MVE_VSTRB16_pre]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 @@ -1820,7 +1820,7 @@ ; CHECK: [[COPY1:%[0-9]+]]:tgpr = COPY $r0 ; CHECK: [[MVE_VSTRB16_pre:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 32, 0, $noreg :: (store 16, align 8) ; CHECK: [[MVE_VSTRB16_pre1:%[0-9]+]]:tgpr = MVE_VSTRB16_pre [[COPY]], [[COPY1]], 64, 0, $noreg :: (store 16, align 8) - ; CHECK: MVE_VSTRB16 [[COPY]], [[COPY1]], 16, 0, $noreg :: (store 16, align 8) + ; CHECK: MVE_VSTRB16 [[COPY]], [[MVE_VSTRB16_pre1]], -48, 0, $noreg :: (store 16, align 8) ; CHECK: $r0 = COPY [[MVE_VSTRB16_pre1]] ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 %1:mqpr = COPY $q0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vst2.ll b/llvm/test/CodeGen/Thumb2/mve-vst2.ll --- a/llvm/test/CodeGen/Thumb2/mve-vst2.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst2.ll @@ -303,25 +303,24 @@ ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vldrw.u32 q0, [r0, #16] ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: vldrw.u32 q2, [r0, #32] ; CHECK-NEXT: vldrw.u32 q4, [r0, #48] +; CHECK-NEXT: vldrw.u32 q2, [r0, #32] ; CHECK-NEXT: vmov.f64 d6, d1 -; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: vmov.f64 d10, d3 ; CHECK-NEXT: vmov.f32 s13, s3 ; CHECK-NEXT: vmov.f32 s21, s7 +; CHECK-NEXT: vmov.f32 s2, s16 ; CHECK-NEXT: vmov.f32 s6, s8 ; CHECK-NEXT: vmov.f32 s14, s18 ; CHECK-NEXT: vmov.f32 s22, s10 -; CHECK-NEXT: vmov.f32 s2, s16 -; CHECK-NEXT: vmov.f32 s7, s9 -; CHECK-NEXT: vmov.f32 s23, s11 -; CHECK-NEXT: vstrb.8 q1, [r0], #48 -; CHECK-NEXT: vmov.f32 s15, s19 -; CHECK-NEXT: vstrw.32 q5, [r1, #16] ; CHECK-NEXT: vmov.f32 s3, s17 -; CHECK-NEXT: vstrw.32 q3, [r0] +; CHECK-NEXT: vmov.f32 s7, s9 ; CHECK-NEXT: vstrw.32 q0, [r1, #32] +; CHECK-NEXT: vmov.f32 s15, s19 +; CHECK-NEXT: vstrb.8 q1, [r1], #48 +; CHECK-NEXT: vmov.f32 s23, s11 +; CHECK-NEXT: vstrw.32 q3, [r1] +; CHECK-NEXT: vstrw.32 q5, [r1, #-32] ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll --- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll @@ -1085,7 +1085,6 @@ ; CHECK-NEXT: vldrw.u32 q2, [r0, #16] ; CHECK-NEXT: vldrw.u32 q0, [r0, #32] ; CHECK-NEXT: vldrw.u32 q1, [r0] -; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: vmov.f64 d6, d5 ; CHECK-NEXT: vmov.f32 s13, s11 ; CHECK-NEXT: vmov.f32 s14, s2 @@ -1093,10 +1092,10 @@ ; CHECK-NEXT: vmov.f32 s2, s6 ; CHECK-NEXT: vmov.f32 s3, s7 ; CHECK-NEXT: vmov.f32 s6, s8 -; CHECK-NEXT: vstrw.32 q0, [r1, #16] ; CHECK-NEXT: vmov.f32 s7, s9 -; CHECK-NEXT: vstrb.8 q1, [r0], #32 -; CHECK-NEXT: vstrw.32 q3, [r0] +; CHECK-NEXT: vstrb.8 q1, [r1], #32 +; CHECK-NEXT: vstrw.32 q3, [r1] +; CHECK-NEXT: vstrw.32 q0, [r1, #-16] ; CHECK-NEXT: bx lr entry: %s1 = getelementptr <2 x i64>, <2 x i64>* %src, i32 0