Index: llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp =================================================================== --- llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1269,9 +1269,15 @@ bool passesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty()); // Lazy store all fp registers to the stack - MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) - .addReg(ARM::SP) - .add(predOps(ARMCC::AL)); + MachineInstrBuilder VLSTM = + BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ARM::VPR, RegState::Implicit | RegState::Undef) + .addReg(ARM::FPSCR, RegState::Implicit | RegState::Undef) + .addReg(ARM::FPSCR_NZCV, RegState::Implicit | RegState::Undef); + for (int I = 0; I < 7; ++I) + VLSTM.addReg(ARM::Q0 + I, RegState::Implicit | RegState::Undef); // Restore all arguments for (const auto &Regs : ClearedFPRegs) { @@ -1358,9 +1364,15 @@ .add(predOps(ARMCC::AL)); // Lazy store all FP registers to the stack - BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) - .addReg(ARM::SP) - .add(predOps(ARMCC::AL)); + MachineInstrBuilder VLSTM = + BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .addReg(ARM::VPR, RegState::Implicit | RegState::Undef) + .addReg(ARM::FPSCR, RegState::Implicit | RegState::Undef) + .addReg(ARM::FPSCR_NZCV, RegState::Implicit | RegState::Undef); + for (int I = 0; I < 7; ++I) + VLSTM.addReg(ARM::Q0 + I, RegState::Implicit | RegState::Undef); } else { // Push all the callee-saved registers (s16-s31). MachineInstrBuilder VPUSH = Index: llvm/lib/Target/ARM/ARMInstrVFP.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrVFP.td +++ llvm/lib/Target/ARM/ARMInstrVFP.td @@ -277,7 +277,6 @@ //===----------------------------------------------------------------------===// // Lazy load / store multiple Instructions // -let mayLoad = 1 in def VLLDM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone, NoItinerary, "vlldm${p}\t$Rn", "", []>, Requires<[HasV8MMainline, Has8MSecExt]> { @@ -288,9 +287,9 @@ let Inst{15-12} = 0; let Inst{7-0} = 0; let mayLoad = 1; + let Defs = [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, VPR, FPSCR, FPSCR_NZCV]; } -let mayStore = 1 in def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone, NoItinerary, "vlstm${p}\t$Rn", "", []>, Requires<[HasV8MMainline, Has8MSecExt]> { Index: llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/cmse-vlldm-no-reorder.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 --float-abi=hard %s -o - | \ +; RUN: FileCheck %s + +@g = hidden local_unnamed_addr global float (...)* null, align 4 +@a = hidden local_unnamed_addr global float 0.000000e+00, align 4 + +define hidden void @f() local_unnamed_addr #0 { +entry: + %0 = load float ()*, float ()** bitcast (float (...)** @g to float ()**), align 4 + %call = tail call nnan ninf nsz float %0() #1 + store float %call, float* @a, align 4 + ret void +} + +; CHECK: blxns r{{[0-9]+}} +; CHECK: vmov r[[T:[0-9]+]], s0 +; CHECK: vlldm sp +; CHECK: vmov s0, r[[T]] + +attributes #0 = { nounwind } +attributes #1 = { nounwind "cmse_nonsecure_call" } Index: llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir =================================================================== --- llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir +++ llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir @@ -62,7 +62,7 @@ $sp = tSUBspi $sp, 34, 14, $noreg VLSTM $sp, 14, $noreg tBLXNSr 14, $noreg, killed $r4, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp - VLLDM $sp, 14, $noreg + VLLDM $sp, 14, $noreg, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv $sp = tADDspi $sp, 34, 14, $noreg $sp = t2LDMIA_UPD $sp, 14, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11 $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $pc