diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -432,16 +432,6 @@ return TypeIdx; } - unsigned immIdx(unsigned ImmIdx) { - assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM - - MCOI::OPERAND_FIRST_GENERIC_IMM) && - "Imm Index is out of bounds"); -#ifndef NDEBUG - ImmIdxsCovered.set(ImmIdx); -#endif - return ImmIdx; - } - void markAllIdxsAsCovered() { #ifndef NDEBUG TypeIdxsCovered.set(); @@ -568,6 +558,16 @@ } unsigned getAlias() const { return AliasOf; } + unsigned immIdx(unsigned ImmIdx) { + assert(ImmIdx <= (MCOI::OPERAND_LAST_GENERIC_IMM - + MCOI::OPERAND_FIRST_GENERIC_IMM) && + "Imm Index is out of bounds"); +#ifndef NDEBUG + ImmIdxsCovered.set(ImmIdx); +#endif + return ImmIdx; + } + /// The instruction is legal if predicate is true. LegalizeRuleSet &legalIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that the free-form @@ -824,11 +824,22 @@ LegalizeRuleSet &customForCartesianProduct(std::initializer_list Types) { return actionForCartesianProduct(LegalizeAction::Custom, Types); } + /// The instruction is custom when type indexes 0 and 1 are both in their + /// respective lists. LegalizeRuleSet & customForCartesianProduct(std::initializer_list Types0, std::initializer_list Types1) { return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1); } + /// The instruction is custom when when type indexes 0, 1, and 2 are all in + /// their respective lists. + LegalizeRuleSet & + customForCartesianProduct(std::initializer_list Types0, + std::initializer_list Types1, + std::initializer_list Types2) { + return actionForCartesianProduct(LegalizeAction::Custom, Types0, Types1, + Types2); + } /// Unconditionally custom lower. LegalizeRuleSet &custom() { diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -95,6 +95,8 @@ void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI); + void LowerMOPS(MCStreamer &OutStreamer, const MachineInstr &MI); + void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI); void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, @@ -936,6 +938,43 @@ .addImm(Size == 4 ? 0 : 2)); } +void AArch64AsmPrinter::LowerMOPS(llvm::MCStreamer &OutStreamer, + const llvm::MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + assert(STI->hasMOPS()); + assert(STI->hasMTE() || Opcode != AArch64::MOPSMemorySetTaggingPseudo); + + const auto Ops = [Opcode]() -> std::array { + if (Opcode == AArch64::MOPSMemoryCopyPseudo) + return {AArch64::CPYFP, AArch64::CPYFM, AArch64::CPYFE}; + if (Opcode == AArch64::MOPSMemoryMovePseudo) + return {AArch64::CPYP, AArch64::CPYM, AArch64::CPYE}; + if (Opcode == AArch64::MOPSMemorySetPseudo) + return {AArch64::SETP, AArch64::SETM, AArch64::SETE}; + if (Opcode == AArch64::MOPSMemorySetTaggingPseudo) + return {AArch64::SETGP, AArch64::SETGM, AArch64::MOPSSETGE}; + llvm_unreachable("Unhandled memory operation pseudo"); + }(); + const bool IsSet = Opcode == AArch64::MOPSMemorySetPseudo || + Opcode == AArch64::MOPSMemorySetTaggingPseudo; + + for (auto Op : Ops) { + int i = 0; + auto MCIB = MCInstBuilder(Op); + // Destination registers + MCIB.addReg(MI.getOperand(i++).getReg()); + MCIB.addReg(MI.getOperand(i++).getReg()); + if (!IsSet) + MCIB.addReg(MI.getOperand(i++).getReg()); + // Input registers + MCIB.addReg(MI.getOperand(i++).getReg()); + MCIB.addReg(MI.getOperand(i++).getReg()); + MCIB.addReg(MI.getOperand(i++).getReg()); + + EmitToStreamer(OutStreamer, MCIB); + } +} + void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI) { unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes(); @@ -1363,6 +1402,13 @@ emitFMov0(*MI); return; + case AArch64::MOPSMemoryCopyPseudo: + case AArch64::MOPSMemoryMovePseudo: + case AArch64::MOPSMemorySetPseudo: + case AArch64::MOPSMemorySetTaggingPseudo: + LowerMOPS(*OutStreamer, *MI); + return; + case TargetOpcode::STACKMAP: return LowerSTACKMAP(*OutStreamer, SM, *MI); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -130,6 +130,10 @@ case AArch64::JumpTableDest32: case AArch64::JumpTableDest16: case AArch64::JumpTableDest8: + case AArch64::MOPSMemoryCopyPseudo: + case AArch64::MOPSMemoryMovePseudo: + case AArch64::MOPSMemorySetPseudo: + case AArch64::MOPSMemorySetTaggingPseudo: NumBytes = 12; break; case AArch64::SPACE: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -8363,6 +8363,27 @@ } } +let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in { + let mayLoad = 1 in { + def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), + (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), + [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; + def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb), + (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn), + [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>; + } + let mayLoad = 0 in { + def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), + (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), + [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; + } +} +let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in { + def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb), + (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm), + [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>; +} + let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1 in def StoreSwiftAsyncContext : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -193,6 +193,7 @@ bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); + bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI); bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI); unsigned emitConstantPoolEntry(const Constant *CPVal, @@ -3425,6 +3426,13 @@ case TargetOpcode::G_VECREDUCE_FADD: case TargetOpcode::G_VECREDUCE_ADD: return selectReduction(I, MRI); + case TargetOpcode::G_BZERO: + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMCPY_INLINE: + case TargetOpcode::G_MEMMOVE: + case TargetOpcode::G_MEMSET: + assert(STI.hasMOPS() && "Shouldn't get here without +mops feature"); + return selectMOPS(I, MRI); } return false; @@ -3482,6 +3490,68 @@ return false; } +bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI, + MachineRegisterInfo &MRI) { + assert(GI.getOpcode() != TargetOpcode::G_BZERO && + "There is no point combining to G_BZERO only to re-materialize the " + "zero."); + + unsigned Mopcode; + switch (GI.getOpcode()) { + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMCPY_INLINE: + Mopcode = AArch64::MOPSMemoryCopyPseudo; + break; + case TargetOpcode::G_MEMMOVE: + Mopcode = AArch64::MOPSMemoryMovePseudo; + break; + case TargetOpcode::G_MEMSET: + // For tagged memset see llvm.aarch64.mops.memset.tag + Mopcode = AArch64::MOPSMemorySetPseudo; + break; + } + + auto &DstPtr = GI.getOperand(0); + auto &SrcOrVal = GI.getOperand(1); + auto &Size = GI.getOperand(2); + + // Create copies of the registers that can be clobbered. + const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg()); + const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg()); + const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg()); + + const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo; + const auto &SrcValRegClass = + IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass; + + // Constrain to specific registers + RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI); + RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI); + RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI); + + MIB.buildCopy(DstPtrCopy, DstPtr); + MIB.buildCopy(SrcValCopy, SrcOrVal); + MIB.buildCopy(SizeCopy, Size); + + // New instruction uses the copied registers because it must update them. + // The defs are not used since they don't exist in G_MEM*. They are still + // tied. + // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE + Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass); + Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + if (IsSet) { + MIB.buildInstr(Mopcode, {DefDstPtr, DefSize}, + {DstPtrCopy, SizeCopy, SrcValCopy}); + } else { + Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass); + MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize}, + {DstPtrCopy, SrcValCopy, SizeCopy}); + } + + GI.eraseFromParent(); + return true; +} + bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) { assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"); @@ -5376,6 +5446,36 @@ constrainSelectedInstRegOperands(*Store, TII, TRI, RBI); break; } + case Intrinsic::aarch64_mops_memset_tag: { + // Transform + // %dst:gpr(p0) = \ + // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag), + // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64) + // where %dst is updated, into + // %Rd:GPR64common, %Rn:GPR64) = \ + // MOPSMemorySetTaggingPseudo \ + // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64 + // where Rd and Rn are tied. + // It is expected that %val has been extended to s64 in legalization. + // Note that the order of the size/value operands are swapped. + + Register DstDef = I.getOperand(0).getReg(); + // I.getOperand(1) is the intrinsic function + Register DstUse = I.getOperand(2).getReg(); + Register ValUse = I.getOperand(3).getReg(); + Register SizeUse = I.getOperand(4).getReg(); + + // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one. + // Therefore an additional virtual register is requried for the updated size + // operand. This value is not accessible via the semantics of the intrinsic. + Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64)); + + auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo, + {DstDef, SizeDef}, {DstUse, SizeUse, ValUse}); + Memset.cloneMemRefs(I); + constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI); + break; + } } I.eraseFromParent(); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -56,6 +56,7 @@ bool legalizeAtomicCmpxchg128(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const; + bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const; const AArch64Subtarget *ST; }; } // End llvm namespace. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -699,8 +699,28 @@ getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower(); - getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET}) - .libcall(); + if (ST.hasMOPS()) { + // G_BZERO is not supported. Currently it is only emitted by + // PreLegalizerCombiner for G_MEMSET with zero constant. + getActionDefinitionsBuilder(G_BZERO).unsupported(); + + getActionDefinitionsBuilder(G_MEMSET) + .legalForCartesianProduct({p0}, {s64}, {s64}) + .customForCartesianProduct({p0}, {s8}, {s64}) + .immIdx(0); // Inform verifier imm idx 0 is handled. + + getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE}) + .legalForCartesianProduct({p0}, {p0}, {s64}) + .immIdx(0); // Inform verifier imm idx 0 is handled. + + // G_MEMCPY_INLINE does not have a tailcall immediate + getActionDefinitionsBuilder(G_MEMCPY_INLINE) + .legalForCartesianProduct({p0}, {p0}, {s64}); + + } else { + getActionDefinitionsBuilder({G_BZERO, G_MEMCPY, G_MEMMOVE, G_MEMSET}) + .libcall(); + } // FIXME: Legal types are only legal with NEON. getActionDefinitionsBuilder(G_ABS) @@ -832,6 +852,11 @@ return legalizeAtomicCmpxchg128(MI, MRI, Helper); case TargetOpcode::G_CTTZ: return legalizeCTTZ(MI, Helper); + case TargetOpcode::G_BZERO: + case TargetOpcode::G_MEMCPY: + case TargetOpcode::G_MEMMOVE: + case TargetOpcode::G_MEMSET: + return legalizeMemOps(MI, Helper); } llvm_unreachable("expected switch to return"); @@ -989,6 +1014,15 @@ MI.eraseFromParent(); return true; } + case Intrinsic::aarch64_mops_memset_tag: { + assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); + // Zext the value to 64 bit + MachineIRBuilder MIB(MI); + auto &Value = MI.getOperand(3); + Register ZExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0); + Value.setReg(ZExtValueReg); + return true; + } } return true; @@ -1359,3 +1393,20 @@ MI.eraseFromParent(); return true; } + +bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI, + LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + + // Tagged version MOPSMemorySetTagged is legalised in legalizeIntrinsic + if (MI.getOpcode() == TargetOpcode::G_MEMSET) { + // Zext the value operand to 64 bit + auto &Value = MI.getOperand(1); + Register ZExtValueReg = + MIRBuilder.buildAnyExt(LLT::scalar(64), Value).getReg(0); + Value.setReg(ZExtValueReg); + return true; + } + + return false; +} diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp @@ -75,6 +75,7 @@ MachineIRBuilder &B) const { CombinerHelper Helper(Observer, B, KB, MDT); AArch64GenO0PreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper); + auto &ST = static_cast(B.getMF().getSubtarget()); if (Generated.tryCombineAll(Observer, MI, B)) return true; @@ -90,6 +91,9 @@ case TargetOpcode::G_MEMCPY: case TargetOpcode::G_MEMMOVE: case TargetOpcode::G_MEMSET: { + if (ST.hasMOPS()) + return false; + // At -O0 set a maxlen of 32 to inline; unsigned MaxLen = 32; // Try to inline memcpy type calls if optimizations are enabled. diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 -mattr=+mops,+mte | FileCheck %s --check-prefix=GISel + +; Function Attrs: mustprogress nofree nosync nounwind willreturn writeonly +declare i8* @llvm.aarch64.mops.memset.tag(i8*, i8, i64) + +define i8* @memset_tagged_0_zeroval(i8* %dst, i64 %size) { +; GISel-LABEL: memset_tagged_0_zeroval: +; GISel: // %bb.0: // %entry +; GISel-NEXT: mov x8, xzr +; GISel-NEXT: setgp [x0]!, x8!, x8 +; GISel-NEXT: setgm [x0]!, x8!, x8 +; GISel-NEXT: setge [x0]!, x8!, x8 +; GISel-NEXT: ret +entry: + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 0) + ret i8* %r +} + +define i8* @memset_tagged_1_zeroval(i8* %dst, i64 %size) { +; GISel-LABEL: memset_tagged_1_zeroval: +; GISel: // %bb.0: // %entry +; GISel-NEXT: mov x9, xzr +; GISel-NEXT: mov w8, #1 +; GISel-NEXT: // kill: def $x8 killed $w8 +; GISel-NEXT: setgp [x0]!, x8!, x9 +; GISel-NEXT: setgm [x0]!, x8!, x9 +; GISel-NEXT: setge [x0]!, x8!, x9 +; GISel-NEXT: ret +entry: + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 1) + ret i8* %r +} + +define i8* @memset_tagged_10_zeroval(i8* %dst, i64 %size) { +; GISel-LABEL: memset_tagged_10_zeroval: +; GISel: // %bb.0: // %entry +; GISel-NEXT: mov x9, xzr +; GISel-NEXT: mov w8, #10 +; GISel-NEXT: // kill: def $x8 killed $w8 +; GISel-NEXT: setgp [x0]!, x8!, x9 +; GISel-NEXT: setgm [x0]!, x8!, x9 +; GISel-NEXT: setge [x0]!, x8!, x9 +; GISel-NEXT: ret +entry: + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 10) + ret i8* %r +} + +define i8* @memset_tagged_10000_zeroval(i8* %dst, i64 %size) { +; GISel-LABEL: memset_tagged_10000_zeroval: +; GISel: // %bb.0: // %entry +; GISel-NEXT: mov x9, xzr +; GISel-NEXT: mov w8, #10000 +; GISel-NEXT: // kill: def $x8 killed $w8 +; GISel-NEXT: setgp [x0]!, x8!, x9 +; GISel-NEXT: setgm [x0]!, x8!, x9 +; GISel-NEXT: setge [x0]!, x8!, x9 +; GISel-NEXT: ret +entry: + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 10000) + ret i8* %r +} + +define i8* @memset_tagged_size_zeroval(i8* %dst, i64 %size) { +; GISel-LABEL: memset_tagged_size_zeroval: +; GISel: // %bb.0: // %entry +; GISel-NEXT: mov x8, xzr +; GISel-NEXT: setgp [x0]!, x1!, x8 +; GISel-NEXT: setgm [x0]!, x1!, x8 +; GISel-NEXT: setge [x0]!, x1!, x8 +; GISel-NEXT: ret +entry: + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 0, i64 %size) + ret i8* %r +} + +define i8* @memset_tagged_0(i8* %dst, i64 %size, i32 %value) { +; GISel-LABEL: memset_tagged_0: +; GISel: // %bb.0: // %entry +; GISel-NEXT: // implicit-def: $x9 +; GISel-NEXT: mov w9, w2 +; GISel-NEXT: mov x8, xzr +; GISel-NEXT: setgp [x0]!, x8!, x9 +; GISel-NEXT: setgm [x0]!, x8!, x9 +; GISel-NEXT: setge [x0]!, x8!, x9 +; GISel-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 0) + ret i8* %r +} + +define i8* @memset_tagged_1(i8* %dst, i64 %size, i32 %value) { +; GISel-LABEL: memset_tagged_1: +; GISel: // %bb.0: // %entry +; GISel-NEXT: // implicit-def: $x9 +; GISel-NEXT: mov w9, w2 +; GISel-NEXT: mov w8, #1 +; GISel-NEXT: // kill: def $x8 killed $w8 +; GISel-NEXT: setgp [x0]!, x8!, x9 +; GISel-NEXT: setgm [x0]!, x8!, x9 +; GISel-NEXT: setge [x0]!, x8!, x9 +; GISel-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 1) + ret i8* %r +} + +define i8* @memset_tagged_10(i8* %dst, i64 %size, i32 %value) { +; GISel-LABEL: memset_tagged_10: +; GISel: // %bb.0: // %entry +; GISel-NEXT: // implicit-def: $x9 +; GISel-NEXT: mov w9, w2 +; GISel-NEXT: mov w8, #10 +; GISel-NEXT: // kill: def $x8 killed $w8 +; GISel-NEXT: setgp [x0]!, x8!, x9 +; GISel-NEXT: setgm [x0]!, x8!, x9 +; GISel-NEXT: setge [x0]!, x8!, x9 +; GISel-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 10) + ret i8* %r +} + +define i8* @memset_tagged_10000(i8* %dst, i64 %size, i32 %value) { +; GISel-LABEL: memset_tagged_10000: +; GISel: // %bb.0: // %entry +; GISel-NEXT: // implicit-def: $x9 +; GISel-NEXT: mov w9, w2 +; GISel-NEXT: mov w8, #10000 +; GISel-NEXT: // kill: def $x8 killed $w8 +; GISel-NEXT: setgp [x0]!, x8!, x9 +; GISel-NEXT: setgm [x0]!, x8!, x9 +; GISel-NEXT: setge [x0]!, x8!, x9 +; GISel-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 10000) + ret i8* %r +} + +define i8* @memset_tagged_size(i8* %dst, i64 %size, i32 %value) { +; GISel-LABEL: memset_tagged_size: +; GISel: // %bb.0: // %entry +; GISel-NEXT: // implicit-def: $x8 +; GISel-NEXT: mov w8, w2 +; GISel-NEXT: setgp [x0]!, x1!, x8 +; GISel-NEXT: setgm [x0]!, x1!, x8 +; GISel-NEXT: setge [x0]!, x1!, x8 +; GISel-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + %r = tail call i8* @llvm.aarch64.mops.memset.tag(i8* %dst, i8 %value_trunc, i64 %size) + ret i8* %r +} diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll @@ -0,0 +1,825 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 | FileCheck %s --check-prefix=O0-GISel-WITHOUT-MOPS +; RUN: llc %s -o - -mtriple=aarch64-arm-none-eabi -O0 -global-isel=1 -global-isel-abort=1 -mattr=+mops | FileCheck %s --check-prefix=O0-GISel-MOPS + +; Function Attrs: argmemonly nofree nounwind willreturn writeonly +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) + +; Function Attrs: argmemonly nofree nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) + +; Function Attrs: argmemonly nofree nounwind willreturn +declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) + +; Function Attrs: argmemonly nofree nounwind willreturn +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) + +define void @memset_0_zeroval(i8* %dst) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_0_zeroval: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_0_zeroval: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: mov x9, x8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 false) + ret void +} + +define void @memset_0_zeroval_volatile(i8* %dst) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_0_zeroval_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_0_zeroval_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: mov x9, x8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 true) + ret void +} + +define void @memset_10_zeroval(i8* %dst) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str xzr, [x0] +; O0-GISel-WITHOUT-MOPS-NEXT: strh wzr, [x0, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10_zeroval: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x9, xzr +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10, i1 false) + ret void +} + +define void @memset_10_zeroval_volatile(i8* %dst) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w1, wzr +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10_zeroval_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x9, xzr +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10, i1 true) + ret void +} + +define void @memset_10000_zeroval(i8* %dst) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10000_zeroval: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w1, wzr +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10000_zeroval: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x9, xzr +; O0-GISel-MOPS-NEXT: mov w8, #10000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10000, i1 false) + ret void +} + +define void @memset_10000_zeroval_volatile(i8* %dst) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10000_zeroval_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w1, wzr +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10000_zeroval_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x9, xzr +; O0-GISel-MOPS-NEXT: mov w8, #10000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 10000, i1 true) + ret void +} + +define void @memset_size_zeroval(i8* %dst, i64 %size) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_size_zeroval: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov x2, x1 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w1, wzr +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_size_zeroval: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: setp [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: setm [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: sete [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %size, i1 false) + ret void +} + +define void @memset_size_zeroval_volatile(i8* %dst, i64 %size) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_size_zeroval_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov x2, x1 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w1, wzr +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_size_zeroval_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: setp [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: setm [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: sete [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 %size, i1 true) + ret void +} + +define void @memset_0(i8* %dst, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_0: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_0: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: mov x9, x8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 false) + ret void +} + +define void @memset_0_volatile(i8* %dst, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_0_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_0_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: mov x9, x8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 0, i64 0, i1 true) + ret void +} + +define void @memset_10(i8* %dst, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: // implicit-def: $x8 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, w1 +; O0-GISel-WITHOUT-MOPS-NEXT: and x8, x8, #0xff +; O0-GISel-WITHOUT-MOPS-NEXT: mov x9, #72340172838076673 +; O0-GISel-WITHOUT-MOPS-NEXT: mul x8, x8, x9 +; O0-GISel-WITHOUT-MOPS-NEXT: str x8, [x0] +; O0-GISel-WITHOUT-MOPS-NEXT: // kill: def $w8 killed $w8 killed $x8 +; O0-GISel-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: // implicit-def: $x9 +; O0-GISel-MOPS-NEXT: mov w9, w1 +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10, i1 false) + ret void +} + +define void @memset_10_volatile(i8* %dst, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: // implicit-def: $x9 +; O0-GISel-MOPS-NEXT: mov w9, w1 +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10, i1 true) + ret void +} + +define void @memset_10000(i8* %dst, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10000: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10000: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: // implicit-def: $x9 +; O0-GISel-MOPS-NEXT: mov w9, w1 +; O0-GISel-MOPS-NEXT: mov w8, #10000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10000, i1 false) + ret void +} + +define void @memset_10000_volatile(i8* %dst, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_10000_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_10000_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: // implicit-def: $x9 +; O0-GISel-MOPS-NEXT: mov w9, w1 +; O0-GISel-MOPS-NEXT: mov w8, #10000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: setp [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: setm [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: sete [x0]!, x8!, x9 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 10000, i1 true) + ret void +} + +define void @memset_size(i8* %dst, i64 %size, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_size: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: sub sp, sp, #32 +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 32 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: str x1, [sp, #8] // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: mov w1, w2 +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x2, [sp, #8] // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: add sp, sp, #32 +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_size: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: // implicit-def: $x8 +; O0-GISel-MOPS-NEXT: mov w8, w2 +; O0-GISel-MOPS-NEXT: setp [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: setm [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: sete [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 %size, i1 false) + ret void +} + +define void @memset_size_volatile(i8* %dst, i64 %size, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memset_size_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: sub sp, sp, #32 +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 32 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: str x1, [sp, #8] // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: mov w1, w2 +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x2, [sp, #8] // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: bl memset +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: add sp, sp, #32 +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memset_size_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: // implicit-def: $x8 +; O0-GISel-MOPS-NEXT: mov w8, w2 +; O0-GISel-MOPS-NEXT: setp [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: setm [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: sete [x0]!, x1!, x8 +; O0-GISel-MOPS-NEXT: ret +entry: + %value_trunc = trunc i32 %value to i8 + call void @llvm.memset.p0i8.i64(i8* align 1 %dst, i8 %value_trunc, i64 %size, i1 true) + ret void +} + +define void @memcpy_0(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_0: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_0: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false) + ret void +} + +define void @memcpy_0_volatile(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_0_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_0_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true) + ret void +} + +define void @memcpy_10(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_10: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x8, [x1] +; O0-GISel-WITHOUT-MOPS-NEXT: str x8, [x0] +; O0-GISel-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_10: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false) + ret void +} + +define void @memcpy_10_volatile(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_10_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memcpy +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_10_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true) + ret void +} + +define void @memcpy_1000(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_1000: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #1000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memcpy +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_1000: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #1000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 false) + ret void +} + +define void @memcpy_1000_volatile(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_1000_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #1000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memcpy +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_1000_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #1000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 true) + ret void +} + +define void @memcpy_n(i8* %dst, i8* %src, i64 %size, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_n: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memcpy +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_n: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 false) + ret void +} + +define void @memcpy_n_volatile(i8* %dst, i8* %src, i64 %size, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_n_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memcpy +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_n_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: cpyfp [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpyfm [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpyfe [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 true) + ret void +} + +define void @memcpy_inline_0(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_inline_0: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_inline_0: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false) + ret void +} + +define void @memcpy_inline_0_volatile(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_inline_0_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_inline_0_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true) + ret void +} + +define void @memcpy_inline_10(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_inline_10: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x8, [x1] +; O0-GISel-WITHOUT-MOPS-NEXT: str x8, [x0] +; O0-GISel-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_inline_10: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: ldr x8, [x1] +; O0-GISel-MOPS-NEXT: str x8, [x0] +; O0-GISel-MOPS-NEXT: ldrh w8, [x1, #8] +; O0-GISel-MOPS-NEXT: strh w8, [x0, #8] +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false) + ret void +} + +define void @memcpy_inline_10_volatile(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memcpy_inline_10_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x8, [x1] +; O0-GISel-WITHOUT-MOPS-NEXT: str x8, [x0] +; O0-GISel-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memcpy_inline_10_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: ldr x8, [x1] +; O0-GISel-MOPS-NEXT: str x8, [x0] +; O0-GISel-MOPS-NEXT: ldrh w8, [x1, #8] +; O0-GISel-MOPS-NEXT: strh w8, [x0, #8] +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true) + ret void +} + +define void @memmove_0(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_0: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_0: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 false) + ret void +} + +define void @memmove_0_volatile(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_0_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_0_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov x8, xzr +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 0, i1 true) + ret void +} + +define void @memmove_10(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_10: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x9, [x1] +; O0-GISel-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: str x9, [x0] +; O0-GISel-WITHOUT-MOPS-NEXT: strh w8, [x0, #8] +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_10: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 false) + ret void +} + +define void @memmove_10_volatile(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_10_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #10 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memmove +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_10_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #10 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 10, i1 true) + ret void +} + +define void @memmove_1000(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_1000: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #1000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memmove +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_1000: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #1000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 false) + ret void +} + +define void @memmove_1000_volatile(i8* %dst, i8* %src, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_1000_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w8, #1000 +; O0-GISel-WITHOUT-MOPS-NEXT: mov w2, w8 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memmove +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_1000_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: mov w8, #1000 +; O0-GISel-MOPS-NEXT: // kill: def $x8 killed $w8 +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x8! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 1000, i1 true) + ret void +} + +define void @memmove_n(i8* %dst, i8* %src, i64 %size, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_n: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memmove +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_n: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 false) + ret void +} + +define void @memmove_n_volatile(i8* %dst, i8* %src, i64 %size, i32 %value) { +; O0-GISel-WITHOUT-MOPS-LABEL: memmove_n_volatile: +; O0-GISel-WITHOUT-MOPS: // %bb.0: // %entry +; O0-GISel-WITHOUT-MOPS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_def_cfa_offset 16 +; O0-GISel-WITHOUT-MOPS-NEXT: .cfi_offset w30, -16 +; O0-GISel-WITHOUT-MOPS-NEXT: bl memmove +; O0-GISel-WITHOUT-MOPS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; O0-GISel-WITHOUT-MOPS-NEXT: ret +; +; O0-GISel-MOPS-LABEL: memmove_n_volatile: +; O0-GISel-MOPS: // %bb.0: // %entry +; O0-GISel-MOPS-NEXT: cpyp [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpym [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: cpye [x0]!, [x1]!, x2! +; O0-GISel-MOPS-NEXT: ret +entry: + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %dst, i8* align 1 %src, i64 %size, i1 true) + ret void +} diff --git a/llvm/unittests/Target/AArch64/InstSizes.cpp b/llvm/unittests/Target/AArch64/InstSizes.cpp --- a/llvm/unittests/Target/AArch64/InstSizes.cpp +++ b/llvm/unittests/Target/AArch64/InstSizes.cpp @@ -22,9 +22,9 @@ std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error); - return std::unique_ptr(static_cast( - TheTarget->createTargetMachine(TT, CPU, FS, TargetOptions(), None, None, - CodeGenOpt::Default))); + return std::unique_ptr( + static_cast(TheTarget->createTargetMachine( + TT, CPU, FS, TargetOptions(), None, None, CodeGenOpt::Default))); } std::unique_ptr createInstrInfo(TargetMachine *TM) { @@ -45,16 +45,15 @@ std::function Checks) { LLVMContext Context; - auto MIRString = - "--- |\n" - " declare void @sizes()\n" - + InputIRSnippet.str() + - "...\n" - "---\n" - "name: sizes\n" - "body: |\n" - " bb.0:\n" - + InputMIRSnippet.str(); + auto MIRString = "--- |\n" + " declare void @sizes()\n" + + InputIRSnippet.str() + + "...\n" + "---\n" + "name: sizes\n" + "body: |\n" + " bb.0:\n" + + InputMIRSnippet.str(); std::unique_ptr MBuffer = MemoryBuffer::getMemBuffer(MIRString); std::unique_ptr MParser = @@ -103,12 +102,14 @@ runChecks(TM.get(), II.get(), "", " \n" - " frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp\n", + " frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, " + "implicit $sp\n", isAuthInst); runChecks(TM.get(), II.get(), "", " \n" - " frame-destroy AUTIBSP implicit-def $lr, implicit killed $lr, implicit $sp\n", + " frame-destroy AUTIBSP implicit-def $lr, implicit killed $lr, " + "implicit $sp\n", isAuthInst); } @@ -117,8 +118,9 @@ ASSERT_TRUE(TM); std::unique_ptr II = createInstrInfo(TM.get()); - runChecks(TM.get(), II.get(), "", " STACKMAP 0, 16\n" - " STACKMAP 1, 32\n", + runChecks(TM.get(), II.get(), "", + " STACKMAP 0, 16\n" + " STACKMAP 1, 32\n", [](AArch64InstrInfo &II, MachineFunction &MF) { auto I = MF.begin()->begin(); EXPECT_EQ(16u, II.getInstSizeInBytes(*I)); @@ -155,3 +157,17 @@ EXPECT_EQ(16u, II.getInstSizeInBytes(*I)); }); } + +TEST(InstSizes, MOPSMemorySetTaggingPseudo) { + std::unique_ptr TM = createTargetMachine(); + std::unique_ptr II = createInstrInfo(TM.get()); + + runChecks(TM.get(), II.get(), "", + " renamable $x0, dead renamable $x1 = MOPSMemorySetTaggingPseudo " + "killed renamable $x0, killed renamable $x1, killed renamable $x2, " + "implicit-def dead $nzcv\n", + [](AArch64InstrInfo &II, MachineFunction &MF) { + auto I = MF.begin()->begin(); + EXPECT_EQ(12u, II.getInstSizeInBytes(*I)); + }); +}