diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h --- a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h +++ b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h @@ -164,6 +164,9 @@ const SlotMapping &IRSlots; PerTargetMIParsingState &Target; + std::map MachineMetadataNodes; + std::map MachineForwardRefMDNodes; + DenseMap MBBSlots; DenseMap VRegInfos; StringMap VRegInfosNamed; @@ -233,6 +236,9 @@ bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src, SMDiagnostic &Error); +bool parseMachineMetadata(PerFunctionMIParsingState &PFS, StringRef Src, + SMDiagnostic &Error); + } // end namespace llvm #endif // LLVM_CODEGEN_MIRPARSER_MIPARSER_H diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -128,6 +128,9 @@ kw_unknown_size, kw_unknown_address, + // Metadata types. + kw_distinct, + // Named metadata keywords md_tbaa, md_alias_scope, diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -272,6 +272,7 @@ .Case("bbsections", MIToken::kw_bbsections) .Case("unknown-size", MIToken::kw_unknown_size) .Case("unknown-address", MIToken::kw_unknown_address) + .Case("distinct", MIToken::kw_distinct) .Default(MIToken::Identifier); } diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -428,6 +428,10 @@ bool parseStandaloneRegister(Register &Reg); bool parseStandaloneStackObject(int &FI); bool parseStandaloneMDNode(MDNode *&Node); + bool parseMachineMetadata(); + bool parseMDTuple(MDNode *&MD, bool IsDistinct); + bool parseMDNodeVector(SmallVectorImpl &Elts); + bool parseMetadata(Metadata *&MD); bool parseBasicBlockDefinition(DenseMap &MBBSlots); @@ -1170,6 +1174,127 @@ return false; } +bool MIParser::parseMachineMetadata() { + lex(); + if (Token.isNot(MIToken::exclaim)) + return error("expected a metadata node"); + + lex(); + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected metadata id after '!'"); + unsigned ID = 0; + if (getUnsigned(ID)) + return true; + lex(); + if (expectAndConsume(MIToken::equal)) + return true; + bool IsDistinct = Token.is(MIToken::kw_distinct); + if (IsDistinct) + lex(); + if (Token.isNot(MIToken::exclaim)) + return error("expected a metadata node"); + lex(); + + MDNode *MD; + if (parseMDTuple(MD, IsDistinct)) + return true; + + auto FI = PFS.MachineForwardRefMDNodes.find(ID); + if (FI != PFS.MachineForwardRefMDNodes.end()) { + FI->second->replaceAllUsesWith(MD); + PFS.MachineForwardRefMDNodes.erase(FI); + + assert(PFS.MachineMetadataNodes[ID] == MD && "Tracking VH didn't work"); + } else { + if (PFS.MachineMetadataNodes.count(ID)) + return error("Metadata id is already used"); + PFS.MachineMetadataNodes[ID].reset(MD); + } + + return false; +} + +bool MIParser::parseMDTuple(MDNode *&MD, bool IsDistinct) { + SmallVector Elts; + if (parseMDNodeVector(Elts)) + return true; + MD = (IsDistinct ? MDTuple::getDistinct + : MDTuple::get)(MF.getFunction().getContext(), Elts); + return false; +} + +bool MIParser::parseMDNodeVector(SmallVectorImpl &Elts) { + if (Token.isNot(MIToken::lbrace)) + return error("expected '{' here"); + lex(); + + if (Token.is(MIToken::rbrace)) { + lex(); + return false; + } + + do { + Metadata *MD; + if (parseMetadata(MD)) + return true; + + Elts.push_back(MD); + + if (Token.isNot(MIToken::comma)) + break; + lex(); + } while (true); + + if (Token.isNot(MIToken::rbrace)) + return error("expected end of metadata node"); + lex(); + + return false; +} + +// ::= !42 +// ::= !"string" +bool MIParser::parseMetadata(Metadata *&MD) { + if (Token.isNot(MIToken::exclaim)) + return error("expected '!' here"); + lex(); + + if (Token.is(MIToken::StringConstant)) { + std::string Str; + if (parseStringConstant(Str)) + return true; + MD = MDString::get(MF.getFunction().getContext(), Str); + return false; + } + + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected metadata id after '!'"); + + unsigned ID = 0; + if (getUnsigned(ID)) + return true; + lex(); + + auto NodeInfo = PFS.IRSlots.MetadataNodes.find(ID); + if (NodeInfo != PFS.IRSlots.MetadataNodes.end()) { + MD = NodeInfo->second.get(); + return false; + } + // Check machine metadata. + NodeInfo = PFS.MachineMetadataNodes.find(ID); + if (NodeInfo != PFS.MachineMetadataNodes.end()) { + MD = NodeInfo->second.get(); + return false; + } + // Forward reference. + auto &FwdRef = PFS.MachineForwardRefMDNodes[ID]; + FwdRef = MDTuple::getTemporary(MF.getFunction().getContext(), None); + PFS.MachineMetadataNodes[ID].reset(FwdRef.get()); + MD = FwdRef.get(); + + return false; +} + static const char *printImplicitRegisterFlag(const MachineOperand &MO) { assert(MO.isImplicit()); return MO.isDef() ? "implicit-def" : "implicit"; @@ -2012,8 +2137,11 @@ if (getUnsigned(ID)) return true; auto NodeInfo = PFS.IRSlots.MetadataNodes.find(ID); - if (NodeInfo == PFS.IRSlots.MetadataNodes.end()) - return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'"); + if (NodeInfo == PFS.IRSlots.MetadataNodes.end()) { + NodeInfo = PFS.MachineMetadataNodes.find(ID); + if (NodeInfo == PFS.MachineMetadataNodes.end()) + return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'"); + } lex(); Node = NodeInfo->second.get(); return false; @@ -3258,6 +3386,11 @@ return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node); } +bool llvm::parseMachineMetadata(PerFunctionMIParsingState &PFS, + StringRef Src, SMDiagnostic &Error) { + return MIParser(PFS, Error, Src).parseMachineMetadata(); +} + bool MIRFormatter::parseIRValue(StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS, const Value *&V, ErrorCallbackType ErrorCallback) { diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -143,6 +143,10 @@ bool initializeJumpTableInfo(PerFunctionMIParsingState &PFS, const yaml::MachineJumpTable &YamlJTI); + bool parseMachineMetadataNodes(PerFunctionMIParsingState &PFS, + MachineFunction &MF, + const yaml::MachineFunction &YMF); + private: bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, const yaml::StringValue &Source); @@ -151,6 +155,9 @@ MachineBasicBlock *&MBB, const yaml::StringValue &Source); + bool parseMachineMetadata(PerFunctionMIParsingState &PFS, + const yaml::StringValue &Source); + /// Return a MIR diagnostic converted from an MI string diagnostic. SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error, SMRange SourceRange); @@ -457,6 +464,9 @@ if (initializeConstantPool(PFS, *ConstantPool, YamlMF)) return true; } + if (!YamlMF.MachineMetadataNodes.empty() && + parseMachineMetadataNodes(PFS, MF, YamlMF)) + return true; StringRef BlockStr = YamlMF.Body.Value.Value; SMDiagnostic Error; @@ -920,6 +930,24 @@ return false; } +bool MIRParserImpl::parseMachineMetadata(PerFunctionMIParsingState &PFS, + const yaml::StringValue &Source) { + SMDiagnostic Error; + if (llvm::parseMachineMetadata(PFS, Source.Value, Error)) + return error(Error, Source.SourceRange); + return false; +} + +bool MIRParserImpl::parseMachineMetadataNodes( + PerFunctionMIParsingState &PFS, MachineFunction &MF, + const yaml::MachineFunction &YMF) { + for (auto &MDS : YMF.MachineMetadataNodes) { + if (parseMachineMetadata(PFS, MDS)) + return true; + } + return false; +} + SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error, SMRange SourceRange) { assert(SourceRange.isValid() && "Invalid source range"); diff --git a/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.mir b/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.mir @@ -0,0 +1,301 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=none -o - %s | FileCheck %s +--- | + ; ModuleID = 'test/CodeGen/AArch64/memcpy-scoped-aa.ll' + source_filename = "test/CodeGen/AArch64/memcpy-scoped-aa.ll" + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64-unknown-linux-gnu" + + define i32 @test_memcpy(i32* nocapture %p, i32* nocapture readonly %q) { + %p0 = bitcast i32* %p to i8* + %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 + %p1 = bitcast i32* %add.ptr to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3 + %v0 = load i32, i32* %q, align 4, !alias.scope !3, !noalias !0 + %q1 = getelementptr inbounds i32, i32* %q, i64 1 + %v1 = load i32, i32* %q1, align 4, !alias.scope !3, !noalias !0 + %add = add i32 %v0, %v1 + ret i32 %add + } + + define i32 @test_memcpy_inline(i32* nocapture %p, i32* nocapture readonly %q) { + %p0 = bitcast i32* %p to i8* + %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 + %p1 = bitcast i32* %add.ptr to i8* + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3 + %v0 = load i32, i32* %q, align 4, !alias.scope !3, !noalias !0 + %q1 = getelementptr inbounds i32, i32* %q, i64 1 + %v1 = load i32, i32* %q1, align 4, !alias.scope !3, !noalias !0 + %add = add i32 %v0, %v1 + ret i32 %add + } + + define i32 @test_mempcpy(i32* nocapture %p, i32* nocapture readonly %q) { + %p0 = bitcast i32* %p to i8* + %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 + %p1 = bitcast i32* %add.ptr to i8* + %call = tail call i8* @mempcpy(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8* noundef nonnull align 4 dereferenceable(16) %p1, i64 16), !alias.scope !0, !noalias !3 + %v0 = load i32, i32* %q, align 4, !alias.scope !3, !noalias !0 + %q1 = getelementptr inbounds i32, i32* %q, i64 1 + %v1 = load i32, i32* %q1, align 4, !alias.scope !3, !noalias !0 + %add = add i32 %v0, %v1 + ret i32 %add + } + + ; Function Attrs: argmemonly nofree nounwind willreturn + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0 + + ; Function Attrs: argmemonly nofree nounwind willreturn + declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) #0 + + declare i8* @mempcpy(i8*, i8*, i64) + + attributes #0 = { argmemonly nofree nounwind willreturn } + + !0 = !{!1} + !1 = distinct !{!1, !2, !"bax: %p"} + !2 = distinct !{!2, !"bax"} + !3 = !{!4} + !4 = distinct !{!4, !2, !"bax: %q"} + +... +--- +name: test_memcpy +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gpr64common, preferred-register: '' } + - { id: 1, class: gpr64common, preferred-register: '' } + - { id: 2, class: fpr128, preferred-register: '' } + - { id: 3, class: gpr32, preferred-register: '' } + - { id: 4, class: gpr32, preferred-register: '' } + - { id: 5, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +machineMetadataNodes: + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!10 = !{!1, !9}' + - '!6 = distinct !{!6, !7, !"Src"}' + - '!5 = !{!1, !6}' + - '!11 = !{!4, !6}' + - '!8 = !{!4, !9}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' +body: | + bb.0 (%ir-block.0): + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_memcpy + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + ; CHECK: STRQui killed [[LDRQui]], [[COPY1]], 0 :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr killed [[LDRWui]], killed [[LDRWui1]] + ; CHECK: $w0 = COPY [[ADDWrr]] + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr64common = COPY $x1 + %0:gpr64common = COPY $x0 + %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + %3:gpr32 = LDRWui %1, 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + %4:gpr32 = LDRWui %1, 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + %5:gpr32 = ADDWrr killed %3, killed %4 + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: test_memcpy_inline +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gpr64common, preferred-register: '' } + - { id: 1, class: gpr64common, preferred-register: '' } + - { id: 2, class: fpr128, preferred-register: '' } + - { id: 3, class: gpr32, preferred-register: '' } + - { id: 4, class: gpr32, preferred-register: '' } + - { id: 5, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +machineMetadataNodes: + - '!10 = !{!1, !9}' + - '!5 = !{!1, !6}' + - '!8 = !{!4, !9}' + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!11 = !{!4, !6}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' + - '!6 = distinct !{!6, !7, !"Src"}' +body: | + bb.0 (%ir-block.0): + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_memcpy_inline + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + ; CHECK: STRQui killed [[LDRQui]], [[COPY1]], 0 :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr killed [[LDRWui]], killed [[LDRWui1]] + ; CHECK: $w0 = COPY [[ADDWrr]] + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr64common = COPY $x1 + %0:gpr64common = COPY $x0 + %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + %3:gpr32 = LDRWui %1, 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + %4:gpr32 = LDRWui %1, 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + %5:gpr32 = ADDWrr killed %3, killed %4 + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... +--- +name: test_mempcpy +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gpr64common, preferred-register: '' } + - { id: 1, class: gpr64common, preferred-register: '' } + - { id: 2, class: fpr128, preferred-register: '' } + - { id: 3, class: gpr32, preferred-register: '' } + - { id: 4, class: gpr32, preferred-register: '' } + - { id: 5, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +machineMetadataNodes: + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!8 = !{!4, !9}' + - '!5 = !{!1, !6}' + - '!6 = distinct !{!6, !7, !"Src"}' + - '!11 = !{!4, !6}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' + - '!10 = !{!1, !9}' +body: | + bb.0 (%ir-block.0): + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_mempcpy + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load 16 from %ir.p1, align 1, !alias.scope !5, !noalias !8) + ; CHECK: STRQui killed [[LDRQui]], [[COPY1]], 0 :: (store 16 into %ir.p0, align 1, !alias.scope !10, !noalias !11) + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[LDRWui1:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr killed [[LDRWui]], killed [[LDRWui1]] + ; CHECK: $w0 = COPY [[ADDWrr]] + ; CHECK: RET_ReallyLR implicit $w0 + %1:gpr64common = COPY $x1 + %0:gpr64common = COPY $x0 + %2:fpr128 = LDRQui %0, 1 :: (load 16 from %ir.p1, align 1, !alias.scope !5, !noalias !8) + STRQui killed %2, %0, 0 :: (store 16 into %ir.p0, align 1, !alias.scope !10, !noalias !11) + %3:gpr32 = LDRWui %1, 0 :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + %4:gpr32 = LDRWui %1, 1 :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + %5:gpr32 = ADDWrr killed %3, killed %4 + $w0 = COPY %5 + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.mir b/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.mir @@ -0,0 +1,351 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=none -o - %s | FileCheck %s +--- | + ; ModuleID = 'test/CodeGen/AMDGPU/memcpy-scoped-aa.ll' + source_filename = "test/CodeGen/AMDGPU/memcpy-scoped-aa.ll" + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" + target triple = "amdgcn-amd-amdhsa" + + define i32 @test_memcpy(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) #0 { + %p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)* + %add.ptr = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 4 + %p1 = bitcast i32 addrspace(1)* %add.ptr to i8 addrspace(1)* + tail call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p0, i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3 + %1 = bitcast i32 addrspace(1)* %q to <2 x i32> addrspace(1)* + %2 = load <2 x i32>, <2 x i32> addrspace(1)* %1, align 4, !alias.scope !3, !noalias !0 + %v01 = extractelement <2 x i32> %2, i32 0 + %v12 = extractelement <2 x i32> %2, i32 1 + %add = add i32 %v01, %v12 + ret i32 %add + } + + define i32 @test_memcpy_inline(i32 addrspace(1)* nocapture %p, i32 addrspace(1)* nocapture readonly %q) #0 { + %p0 = bitcast i32 addrspace(1)* %p to i8 addrspace(1)* + %add.ptr = getelementptr inbounds i32, i32 addrspace(1)* %p, i64 4 + %p1 = bitcast i32 addrspace(1)* %add.ptr to i8 addrspace(1)* + tail call void @llvm.memcpy.inline.p1i8.p1i8.i64(i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p0, i8 addrspace(1)* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3 + %1 = bitcast i32 addrspace(1)* %q to <2 x i32> addrspace(1)* + %2 = load <2 x i32>, <2 x i32> addrspace(1)* %1, align 4, !alias.scope !3, !noalias !0 + %v01 = extractelement <2 x i32> %2, i32 0 + %v12 = extractelement <2 x i32> %2, i32 1 + %add = add i32 %v01, %v12 + ret i32 %add + } + + ; Function Attrs: argmemonly nofree nounwind willreturn + declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i64, i1 immarg) #1 + + ; Function Attrs: argmemonly nofree nounwind willreturn + declare void @llvm.memcpy.inline.p1i8.p1i8.i64(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i64 immarg, i1 immarg) #1 + + ; Function Attrs: convergent nounwind willreturn + declare { i1, i32 } @llvm.amdgcn.if.i32(i1) #2 + + ; Function Attrs: convergent nounwind willreturn + declare { i1, i32 } @llvm.amdgcn.else.i32.i32(i32) #2 + + ; Function Attrs: convergent nounwind readnone willreturn + declare i32 @llvm.amdgcn.if.break.i32(i1, i32) #3 + + ; Function Attrs: convergent nounwind willreturn + declare i1 @llvm.amdgcn.loop.i32(i32) #2 + + ; Function Attrs: convergent nounwind willreturn + declare void @llvm.amdgcn.end.cf.i32(i32) #2 + + attributes #0 = { "target-cpu"="gfx1010" } + attributes #1 = { argmemonly nofree nounwind willreturn "target-cpu"="gfx1010" } + attributes #2 = { convergent nounwind willreturn } + attributes #3 = { convergent nounwind readnone willreturn } + + !0 = !{!1} + !1 = distinct !{!1, !2, !"bax: %p"} + !2 = distinct !{!2, !"bax"} + !3 = !{!4} + !4 = distinct !{!4, !2, !"bax: %q"} + +... +--- +name: test_memcpy +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: vgpr_32, preferred-register: '' } + - { id: 2, class: vgpr_32, preferred-register: '' } + - { id: 3, class: vgpr_32, preferred-register: '' } + - { id: 4, class: sreg_64, preferred-register: '' } + - { id: 5, class: ccr_sgpr_64, preferred-register: '' } + - { id: 6, class: sreg_64, preferred-register: '' } + - { id: 7, class: sreg_64, preferred-register: '' } + - { id: 8, class: vreg_128, preferred-register: '' } + - { id: 9, class: vreg_64, preferred-register: '' } + - { id: 10, class: vreg_64, preferred-register: '' } + - { id: 11, class: vreg_64, preferred-register: '' } + - { id: 12, class: vreg_64, preferred-register: '' } + - { id: 13, class: vgpr_32, preferred-register: '' } + - { id: 14, class: vgpr_32, preferred-register: '' } + - { id: 15, class: vgpr_32, preferred-register: '' } + - { id: 16, class: ccr_sgpr_64, preferred-register: '' } + - { id: 17, class: vreg_64, preferred-register: '' } + - { id: 18, class: vreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$vgpr1', virtual-reg: '%1' } + - { reg: '$vgpr2', virtual-reg: '%2' } + - { reg: '$vgpr3', virtual-reg: '%3' } + - { reg: '$sgpr30_sgpr31', virtual-reg: '%4' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + dynLDSAlign: 1 + isEntryFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + mode: + ieee: true + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 20 +machineMetadataNodes: + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!6 = distinct !{!6, !7, !"Src"}' + - '!11 = !{!4, !6}' + - '!5 = !{!1, !6}' + - '!8 = !{!4, !9}' + - '!10 = !{!1, !9}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' +body: | + bb.0 (%ir-block.0): + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_memcpy + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) + ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) + ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load 8 from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) + ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 + ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 + ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec + ; CHECK: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]] + ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] + ; CHECK: S_SETPC_B64_return [[COPY11]], implicit $vgpr0 + %4:sreg_64 = COPY $sgpr30_sgpr31 + %3:vgpr_32 = COPY $vgpr3 + %2:vgpr_32 = COPY $vgpr2 + %1:vgpr_32 = COPY $vgpr1 + %0:vgpr_32 = COPY $vgpr0 + %17:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 + %18:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 + %9:vreg_64 = COPY %18 + %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) + %10:vreg_64 = COPY %18 + GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) + %12:vreg_64 = COPY %17 + %11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load 8 from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) + %13:vgpr_32 = COPY %11.sub0 + %14:vgpr_32 = COPY %11.sub1 + %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec + %5:ccr_sgpr_64 = COPY %4 + $vgpr0 = COPY %15 + %16:ccr_sgpr_64 = COPY %5 + S_SETPC_B64_return %16, implicit $vgpr0 + +... +--- +name: test_memcpy_inline +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: vgpr_32, preferred-register: '' } + - { id: 2, class: vgpr_32, preferred-register: '' } + - { id: 3, class: vgpr_32, preferred-register: '' } + - { id: 4, class: sreg_64, preferred-register: '' } + - { id: 5, class: ccr_sgpr_64, preferred-register: '' } + - { id: 6, class: sreg_64, preferred-register: '' } + - { id: 7, class: sreg_64, preferred-register: '' } + - { id: 8, class: vreg_128, preferred-register: '' } + - { id: 9, class: vreg_64, preferred-register: '' } + - { id: 10, class: vreg_64, preferred-register: '' } + - { id: 11, class: vreg_64, preferred-register: '' } + - { id: 12, class: vreg_64, preferred-register: '' } + - { id: 13, class: vgpr_32, preferred-register: '' } + - { id: 14, class: vgpr_32, preferred-register: '' } + - { id: 15, class: vgpr_32, preferred-register: '' } + - { id: 16, class: ccr_sgpr_64, preferred-register: '' } + - { id: 17, class: vreg_64, preferred-register: '' } + - { id: 18, class: vreg_64, preferred-register: '' } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$vgpr1', virtual-reg: '%1' } + - { reg: '$vgpr2', virtual-reg: '%2' } + - { reg: '$vgpr3', virtual-reg: '%3' } + - { reg: '$sgpr30_sgpr31', virtual-reg: '%4' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + explicitKernArgSize: 0 + maxKernArgAlign: 1 + ldsSize: 0 + dynLDSAlign: 1 + isEntryFunction: false + noSignedZerosFPMath: false + memoryBound: false + waveLimiter: false + hasSpilledSGPRs: false + hasSpilledVGPRs: false + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + mode: + ieee: true + dx10-clamp: true + fp32-input-denormals: true + fp32-output-denormals: true + fp64-fp16-input-denormals: true + fp64-fp16-output-denormals: true + highBitsOf32BitAddress: 0 + occupancy: 20 +machineMetadataNodes: + - '!6 = distinct !{!6, !7, !"Src"}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!11 = !{!4, !6}' + - '!5 = !{!1, !6}' + - '!8 = !{!4, !9}' + - '!10 = !{!1, !9}' +body: | + bb.0 (%ir-block.0): + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_memcpy_inline + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1 + ; CHECK: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY5]], 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) + ; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] + ; CHECK: GLOBAL_STORE_DWORDX4 [[COPY6]], killed [[GLOBAL_LOAD_DWORDX4_]], 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) + ; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 killed [[COPY7]], 0, 0, implicit $exec :: (load 8 from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) + ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 + ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 + ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec + ; CHECK: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]] + ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] + ; CHECK: S_SETPC_B64_return [[COPY11]], implicit $vgpr0 + %4:sreg_64 = COPY $sgpr30_sgpr31 + %3:vgpr_32 = COPY $vgpr3 + %2:vgpr_32 = COPY $vgpr2 + %1:vgpr_32 = COPY $vgpr1 + %0:vgpr_32 = COPY $vgpr0 + %17:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 + %18:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1 + %9:vreg_64 = COPY %18 + %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %9, 16, 0, implicit $exec :: (load 16 from %ir.p1, align 4, !alias.scope !5, !noalias !8, addrspace 1) + %10:vreg_64 = COPY %18 + GLOBAL_STORE_DWORDX4 %10, killed %8, 0, 0, implicit $exec :: (store 16 into %ir.p0, align 4, !alias.scope !10, !noalias !11, addrspace 1) + %12:vreg_64 = COPY %17 + %11:vreg_64 = GLOBAL_LOAD_DWORDX2 killed %12, 0, 0, implicit $exec :: (load 8 from %ir.1, align 4, !alias.scope !3, !noalias !0, addrspace 1) + %13:vgpr_32 = COPY %11.sub0 + %14:vgpr_32 = COPY %11.sub1 + %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec + %5:ccr_sgpr_64 = COPY %4 + $vgpr0 = COPY %15 + %16:ccr_sgpr_64 = COPY %5 + S_SETPC_B64_return %16, implicit $vgpr0 + +... diff --git a/llvm/test/CodeGen/X86/memcpy-scoped-aa.mir b/llvm/test/CodeGen/X86/memcpy-scoped-aa.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/memcpy-scoped-aa.mir @@ -0,0 +1,307 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=none -o - %s | FileCheck %s +--- | + ; ModuleID = 'test/CodeGen/X86/memcpy-scoped-aa.ll' + source_filename = "test/CodeGen/X86/memcpy-scoped-aa.ll" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux-gnu" + + define i32 @test_memcpy(i32* nocapture %p, i32* nocapture readonly %q) { + %p0 = bitcast i32* %p to i8* + %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 + %p1 = bitcast i32* %add.ptr to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3 + %v0 = load i32, i32* %q, align 4, !alias.scope !3, !noalias !0 + %q1 = getelementptr inbounds i32, i32* %q, i64 1 + %v1 = load i32, i32* %q1, align 4, !alias.scope !3, !noalias !0 + %add = add i32 %v0, %v1 + ret i32 %add + } + + define i32 @test_memcpy_inline(i32* nocapture %p, i32* nocapture readonly %q) { + %p0 = bitcast i32* %p to i8* + %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 + %p1 = bitcast i32* %add.ptr to i8* + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8* noundef nonnull align 4 dereferenceable(16) %p1, i64 16, i1 false), !alias.scope !0, !noalias !3 + %v0 = load i32, i32* %q, align 4, !alias.scope !3, !noalias !0 + %q1 = getelementptr inbounds i32, i32* %q, i64 1 + %v1 = load i32, i32* %q1, align 4, !alias.scope !3, !noalias !0 + %add = add i32 %v0, %v1 + ret i32 %add + } + + define i32 @test_mempcpy(i32* nocapture %p, i32* nocapture readonly %q) { + %p0 = bitcast i32* %p to i8* + %add.ptr = getelementptr inbounds i32, i32* %p, i64 4 + %p1 = bitcast i32* %add.ptr to i8* + %call = tail call i8* @mempcpy(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8* noundef nonnull align 4 dereferenceable(16) %p1, i64 16), !alias.scope !0, !noalias !3 + %v0 = load i32, i32* %q, align 4, !alias.scope !3, !noalias !0 + %q1 = getelementptr inbounds i32, i32* %q, i64 1 + %v1 = load i32, i32* %q1, align 4, !alias.scope !3, !noalias !0 + %add = add i32 %v0, %v1 + ret i32 %add + } + + ; Function Attrs: argmemonly nofree nounwind willreturn + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0 + + ; Function Attrs: argmemonly nofree nounwind willreturn + declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) #0 + + declare i8* @mempcpy(i8*, i8*, i64) + + attributes #0 = { argmemonly nofree nounwind willreturn } + + !0 = !{!1} + !1 = distinct !{!1, !2, !"bax: %p"} + !2 = distinct !{!2, !"bax"} + !3 = !{!4} + !4 = distinct !{!4, !2, !"bax: %q"} + +... +--- +name: test_memcpy +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gr64, preferred-register: '' } + - { id: 1, class: gr64, preferred-register: '' } + - { id: 2, class: gr64, preferred-register: '' } + - { id: 3, class: gr64, preferred-register: '' } + - { id: 4, class: gr32, preferred-register: '' } + - { id: 5, class: gr32, preferred-register: '' } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } + - { reg: '$rsi', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +machineMetadataNodes: + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!8 = !{!4, !9}' + - '!5 = !{!1, !6}' + - '!6 = distinct !{!6, !7, !"Src"}' + - '!11 = !{!4, !6}' + - '!10 = !{!1, !9}' +body: | + bb.0 (%ir-block.0): + liveins: $rdi, $rsi + + ; CHECK-LABEL: name: test_memcpy + ; CHECK: liveins: $rdi, $rsi + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store 8 into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store 8 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: $eax = COPY [[ADD32rm]] + ; CHECK: RET 0, $eax + %1:gr64 = COPY $rsi + %0:gr64 = COPY $rdi + %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) + MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) + MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + %4:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + %5:gr32 = ADD32rm %4, %1, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + $eax = COPY %5 + RET 0, $eax + +... +--- +name: test_memcpy_inline +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gr64, preferred-register: '' } + - { id: 1, class: gr64, preferred-register: '' } + - { id: 2, class: gr64, preferred-register: '' } + - { id: 3, class: gr64, preferred-register: '' } + - { id: 4, class: gr32, preferred-register: '' } + - { id: 5, class: gr32, preferred-register: '' } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } + - { reg: '$rsi', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +machineMetadataNodes: + - '!8 = !{!4, !9}' + - '!9 = distinct !{!9, !7, !"Dst"}' + - '!5 = !{!1, !6}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' + - '!11 = !{!4, !6}' + - '!10 = !{!1, !9}' + - '!6 = distinct !{!6, !7, !"Src"}' +body: | + bb.0 (%ir-block.0): + liveins: $rdi, $rsi + + ; CHECK-LABEL: name: test_memcpy_inline + ; CHECK: liveins: $rdi, $rsi + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store 8 into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store 8 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: $eax = COPY [[ADD32rm]] + ; CHECK: RET 0, $eax + %1:gr64 = COPY $rsi + %0:gr64 = COPY $rdi + %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 4, !alias.scope !5, !noalias !8) + %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 4, !alias.scope !5, !noalias !8) + MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 4, !alias.scope !10, !noalias !11) + MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 4, !alias.scope !10, !noalias !11) + %4:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + %5:gr32 = ADD32rm %4, %1, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + $eax = COPY %5 + RET 0, $eax + +... +--- +name: test_mempcpy +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gr64, preferred-register: '' } + - { id: 1, class: gr64, preferred-register: '' } + - { id: 2, class: gr64, preferred-register: '' } + - { id: 3, class: gr64, preferred-register: '' } + - { id: 4, class: gr32, preferred-register: '' } + - { id: 5, class: gr32, preferred-register: '' } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } + - { reg: '$rsi', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +machineMetadataNodes: + - '!5 = !{!1, !6}' + - '!8 = !{!4, !9}' + - '!11 = !{!4, !6}' + - '!10 = !{!1, !9}' + - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' + - '!6 = distinct !{!6, !7, !"Src"}' + - '!9 = distinct !{!9, !7, !"Dst"}' +body: | + bb.0 (%ir-block.0): + liveins: $rdi, $rsi + + ; CHECK-LABEL: name: test_mempcpy + ; CHECK: liveins: $rdi, $rsi + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 1, !alias.scope !5, !noalias !8) + ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 1, !alias.scope !5, !noalias !8) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 8, $noreg, killed [[MOV64rm1]] :: (store 8 into %ir.p0 + 8, align 1, !alias.scope !10, !noalias !11) + ; CHECK: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[MOV64rm]] :: (store 8 into %ir.p0, align 1, !alias.scope !10, !noalias !11) + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY]], 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + ; CHECK: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[COPY]], 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + ; CHECK: $eax = COPY [[ADD32rm]] + ; CHECK: RET 0, $eax + %1:gr64 = COPY $rsi + %0:gr64 = COPY $rdi + %2:gr64 = MOV64rm %0, 1, $noreg, 16, $noreg :: (load 8 from %ir.p1, align 1, !alias.scope !5, !noalias !8) + %3:gr64 = MOV64rm %0, 1, $noreg, 24, $noreg :: (load 8 from %ir.p1 + 8, align 1, !alias.scope !5, !noalias !8) + MOV64mr %0, 1, $noreg, 8, $noreg, killed %3 :: (store 8 into %ir.p0 + 8, align 1, !alias.scope !10, !noalias !11) + MOV64mr %0, 1, $noreg, 0, $noreg, killed %2 :: (store 8 into %ir.p0, align 1, !alias.scope !10, !noalias !11) + %4:gr32 = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4 from %ir.q, !alias.scope !3, !noalias !0) + %5:gr32 = ADD32rm %4, %1, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %ir.q1, !alias.scope !3, !noalias !0) + $eax = COPY %5 + RET 0, $eax + +...