diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -252,7 +252,30 @@ llvm::hash_value(ID.Inst->getType()), llvm::hash_value(ID.getPredicate()), llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); - else if (CallInst *CI = dyn_cast(ID.Inst)) { + + if (IntrinsicInst *II = dyn_cast(ID.Inst)) { + // To hash intrinsics, we use the opcode, and types like the other + // instructions, but also, the Intrinsic ID, and the Name of the + // intrinsic. + Intrinsic::ID IntrinsicID = II->getIntrinsicID(); + FunctionType *FT = II->getFunctionType(); + std::string Name; + // If there is an overloaded name, we have to use the complex version + // of getName to get the entire string. + if (Intrinsic::isOverloaded(IntrinsicID)) + Name = + Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT); + // If there is not an overloaded name, we only need to use this version. + else + Name = Intrinsic::getName(IntrinsicID).str(); + return llvm::hash_combine( + llvm::hash_value(ID.Inst->getOpcode()), + llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID), + llvm::hash_value(Name), + llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); + } + + if (isa(ID.Inst)) { std::string FunctionName = *ID.CalleeName; return llvm::hash_combine( llvm::hash_value(ID.Inst->getOpcode()), @@ -260,6 +283,7 @@ llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName), llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); } + return llvm::hash_combine( llvm::hash_value(ID.Inst->getOpcode()), llvm::hash_value(ID.Inst->getType()), @@ -512,8 +536,17 @@ // analyzed for similarity as it has no bearing on the outcome of the // program. InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; } - // TODO: Handle specific intrinsics. - InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; } + InstrType visitIntrinsicInst(IntrinsicInst &II) { + // These are disabled due to complications in the CodeExtractor when + // outlining these instructions. For instance, It is unclear what we + // should do when moving only the start or end lifetime instruction into + // an outlined function. Also, assume-like intrinsics could be removed + // from the region, removing arguments, causing discrepencies in the + // number of inputs between different regions. + if (II.isLifetimeStartOrEnd() || II.isAssumeLikeIntrinsic()) + return Illegal; + return EnableIntrinsics ? Legal : Illegal; + } // We only allow call instructions where the function has a name and // is not an indirect call. InstrType visitCallInst(CallInst &CI) { @@ -540,6 +573,10 @@ // The flag variable that lets the classifier know whether we should // allow indirect calls to be considered legal instructions. bool EnableIndirectCalls = false; + + // Flag that lets the classifier know whether we should allow intrinsics to + // be checked for similarity. + bool EnableIntrinsics = false; }; /// Maps an Instruction to a member of InstrType. @@ -926,10 +963,12 @@ public: IRSimilarityIdentifier(bool MatchBranches = true, bool MatchIndirectCalls = true, - bool MatchCallsWithName = false) + bool MatchCallsWithName = false, + bool MatchIntrinsics = true) : Mapper(&InstDataAllocator, &InstDataListAllocator), EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls), - EnableMatchingCallsByName(MatchCallsWithName) {} + EnableMatchingCallsByName(MatchCallsWithName), + EnableIntrinsics(MatchIntrinsics) {} private: /// Map the instructions in the module to unsigned integers, using mapping @@ -1018,6 +1057,10 @@ /// convention, attributes and type signature. bool EnableMatchingCallsByName = true; + /// The flag variable that marks whether we should check intrinsics for + /// similarity. + bool EnableIntrinsics = true; + /// The SimilarityGroups found with the most recent run of \ref /// findSimilarity. None if there is no recent run. Optional SimilarityCandidates; diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h --- a/llvm/include/llvm/Transforms/IPO/IROutliner.h +++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -360,7 +360,7 @@ bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; } // TODO: Handle specific intrinsics individually from those that can be // handled. - bool IntrinsicInst(IntrinsicInst &II) { return false; } + bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; } // We only handle CallInsts that are not indirect, since we cannot guarantee // that they have a name in these cases. bool visitCallInst(CallInst &CI) { @@ -396,6 +396,10 @@ // The flag variable that marks whether we should allow indirect calls // to be outlined. bool EnableIndirectCalls = true; + + // The flag variable that marks whether we should allow intrinsics + // instructions to be outlined. + bool EnableIntrinsics = false; }; /// A InstVisitor used to exclude certain instructions from being outlined. diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -29,7 +29,6 @@ cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes.")); -} // namespace llvm cl::opt DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), @@ -41,6 +40,9 @@ cl::desc("only allow matching call instructions if the " "name and type signature match.")); +cl::opt + DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, + cl::desc("Don't match or outline intrinsics")); IRInstructionData::IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDList) @@ -48,6 +50,8 @@ initializeInstruction(); } +} // namespace llvm + void IRInstructionData::initializeInstruction() { // We check for whether we have a comparison instruction. If it is, we // find the "less than" version of the predicate for consistency for @@ -1103,6 +1107,7 @@ Mapper.InstClassifier.EnableBranches = this->EnableBranches; Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; + Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; populateMapper(Modules, InstrList, IntegerMapping); findCandidates(InstrList, IntegerMapping); @@ -1115,6 +1120,7 @@ Mapper.InstClassifier.EnableBranches = this->EnableBranches; Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; + Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; std::vector InstrList; std::vector IntegerMapping; @@ -1136,7 +1142,7 @@ bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, - MatchCallsByName)); + MatchCallsByName, !DisableIntrinsics)); return false; } @@ -1153,9 +1159,8 @@ AnalysisKey IRSimilarityAnalysis::Key; IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, ModuleAnalysisManager &) { - auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, - MatchCallsByName); + MatchCallsByName, !DisableIntrinsics); IRSI.findSimilarity(M); return IRSI; } diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -38,12 +38,17 @@ // matching and outlining. namespace llvm { extern cl::opt DisableBranches; -} // namespace llvm // A command flag to be used for debugging to indirect calls from similarity // matching and outlining. extern cl::opt DisableIndirectCalls; +// A command flag to be used for debugging to exclude intrinsics from similarity +// matching and outlining. +extern cl::opt DisableIntrinsics; + +} // namespace llvm + // Set to true if the user wants the ir outliner to run on linkonceodr linkage // functions. This is false by default because the linker can dedupe linkonceodr // functions. Since the outliner is confined to a single module (modulo LTO), @@ -2524,6 +2529,8 @@ // Find the possible similarity sections. InstructionClassifier.EnableBranches = !DisableBranches; InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls; + InstructionClassifier.EnableIntrinsics = !DisableIntrinsics; + IRSimilarityIdentifier &Identifier = getIRSI(M); SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity(); diff --git a/llvm/test/Transforms/IROutliner/illegal-memmove.ll b/llvm/test/Transforms/IROutliner/different-intrinsics.ll copy from llvm/test/Transforms/IROutliner/illegal-memmove.ll copy to llvm/test/Transforms/IROutliner/different-intrinsics.ll --- a/llvm/test/Transforms/IROutliner/illegal-memmove.ll +++ b/llvm/test/Transforms/IROutliner/different-intrinsics.ll @@ -1,12 +1,31 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs ; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s -; This test checks that we do not outline memcpy intrinsics since it may require -; extra address space checks. +; This test checks that we do not outline different intrinsics as the same +; function or as a value like we would for non-intrinsic functions. +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} + +define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} ; CHECK-LABEL: @function1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 @@ -19,23 +38,14 @@ ; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) ; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) ; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) ; CHECK-NEXT: ret i8 [[RET_RELOAD]] ; -entry: - %a = load i8, i8* %s - %b = load i8, i8* %d - call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) - %c = add i8 %a, %b - %ret = load i8, i8* %s - ret i8 %ret -} - -define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +; ; CHECK-LABEL: @function2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 @@ -55,11 +65,28 @@ ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) ; CHECK-NEXT: ret i8 [[RET_RELOAD]] ; -entry: - %a = load i8, i8* %s - %b = load i8, i8* %d - call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) - %c = add i8 %a, %b - %ret = load i8, i8* %s - ret i8 %ret -} +; +; CHECK-LABEL: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[C:%.*]] = add i8 [[TMP0:%.*]], [[TMP1:%.*]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP2:%.*]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal void @outlined_ir_func_1( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[A]], i8* [[TMP2:%.*]], align 1 +; CHECK-NEXT: store i8 [[B]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/illegal-memcpy.ll b/llvm/test/Transforms/IROutliner/illegal-memcpy.ll --- a/llvm/test/Transforms/IROutliner/illegal-memcpy.ll +++ b/llvm/test/Transforms/IROutliner/illegal-memcpy.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test checks that we do not outline memcpy intrinsics since it may require ; extra address space checks. diff --git a/llvm/test/Transforms/IROutliner/illegal-memmove.ll b/llvm/test/Transforms/IROutliner/illegal-memmove.ll --- a/llvm/test/Transforms/IROutliner/illegal-memmove.ll +++ b/llvm/test/Transforms/IROutliner/illegal-memmove.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test checks that we do not outline memcpy intrinsics since it may require ; extra address space checks. diff --git a/llvm/test/Transforms/IROutliner/illegal-memset.ll b/llvm/test/Transforms/IROutliner/illegal-memset.ll --- a/llvm/test/Transforms/IROutliner/illegal-memset.ll +++ b/llvm/test/Transforms/IROutliner/illegal-memset.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test checks that we do not outline memset intrinsics since it requires ; extra address space checks. diff --git a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll --- a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll +++ b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test ensures that we do not outline vararg instructions or intrinsics, as ; they may cause inconsistencies when outlining. diff --git a/llvm/test/Transforms/IROutliner/outline-memcpy.ll b/llvm/test/Transforms/IROutliner/outline-memcpy.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outline-memcpy.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we successfully outline identical memcpy instructions. + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} + +define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP0]], i64 [[TMP2:%.*]], i1 false) +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP0]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/outline-memmove.ll b/llvm/test/Transforms/IROutliner/outline-memmove.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outline-memmove.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we sucecssfully outline identical memmove instructions. + +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} + +define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1 +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP0]], i64 [[TMP2:%.*]], i1 false) +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP0]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/illegal-memset.ll b/llvm/test/Transforms/IROutliner/outline-memset.ll copy from llvm/test/Transforms/IROutliner/illegal-memset.ll copy to llvm/test/Transforms/IROutliner/outline-memset.ll --- a/llvm/test/Transforms/IROutliner/illegal-memset.ll +++ b/llvm/test/Transforms/IROutliner/outline-memset.ll @@ -1,20 +1,11 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs ; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s -; This test checks that we do not outline memset intrinsics since it requires -; extra address space checks. +; This test checks that we successfully outline identical memset instructions. declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) define i64 @function1(i64 %x, i64 %z, i64 %n) { -; CHECK-LABEL: @function1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[POOL:%.*]] = alloca [59 x i64], align 4 -; CHECK-NEXT: [[TMP:%.*]] = bitcast [59 x i64]* [[POOL]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP]], i8 0, i64 236, i1 false) -; CHECK-NEXT: call void @outlined_ir_func_0(i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]]) -; CHECK-NEXT: ret i64 0 -; entry: %pool = alloca [59 x i64], align 4 %tmp = bitcast [59 x i64]* %pool to i8* @@ -26,14 +17,6 @@ } define i64 @function2(i64 %x, i64 %z, i64 %n) { -; CHECK-LABEL: @function2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[POOL:%.*]] = alloca [59 x i64], align 4 -; CHECK-NEXT: [[TMP:%.*]] = bitcast [59 x i64]* [[POOL]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP]], i8 0, i64 236, i1 false) -; CHECK-NEXT: call void @outlined_ir_func_0(i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]]) -; CHECK-NEXT: ret i64 0 -; entry: %pool = alloca [59 x i64], align 4 %tmp = bitcast [59 x i64]* %pool to i8* @@ -43,3 +26,30 @@ %c = add i64 %x, %z ret i64 0 } +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[POOL:%.*]] = alloca [59 x i64], align 4 +; CHECK-NEXT: call void @outlined_ir_func_0([59 x i64]* [[POOL]], i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]]) +; CHECK-NEXT: ret i64 0 +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[POOL:%.*]] = alloca [59 x i64], align 4 +; CHECK-NEXT: call void @outlined_ir_func_0([59 x i64]* [[POOL]], i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]]) +; CHECK-NEXT: ret i64 0 +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[TMP:%.*]] = bitcast [59 x i64]* [[TMP0:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP]], i8 0, i64 236, i1 false) +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[TMP1:%.*]], 0 +; CHECK-NEXT: [[A:%.*]] = add i64 [[TMP2:%.*]], [[TMP3:%.*]] +; CHECK-NEXT: [[C:%.*]] = add i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll copy from llvm/test/Transforms/IROutliner/illegal-vaarg.ll copy to llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll --- a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll +++ b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll @@ -1,34 +1,14 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs ; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s -; This test ensures that we do not outline vararg instructions or intrinsics, as -; they may cause inconsistencies when outlining. +; This test checks that we sucessfully outline identical memcpy var arg +; intrinsics, but not the var arg instruction itself. declare void @llvm.va_start(i8*) declare void @llvm.va_copy(i8*, i8*) declare void @llvm.va_end(i8*) define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind { -; CHECK-LABEL: @func1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[AP1_LOC:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 -; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 -; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]]) -; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]]) -; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 -; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1_RELOAD]]) -; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1_RELOAD]]) -; CHECK-NEXT: store i32 [[TMP0]], i32* [[C]], align 4 -; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 -; CHECK-NEXT: ret i32 [[TMP]] -; entry: %a.addr = alloca i32, align 4 %b.addr = alloca double, align 8 @@ -47,27 +27,6 @@ } define i32 @func2(i32 %a, double %b, i8* %v, ...) nounwind { -; CHECK-LABEL: @func2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[AP1_LOC:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 -; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 -; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]]) -; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]]) -; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 -; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1_RELOAD]]) -; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1_RELOAD]]) -; CHECK-NEXT: store i32 [[TMP0]], i32* [[C]], align 4 -; CHECK-NEXT: [[AP2:%.*]] = bitcast i8** [[AP]] to i8* -; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 -; CHECK-NEXT: ret i32 [[TMP]] -; entry: %a.addr = alloca i32, align 4 %b.addr = alloca double, align 8 @@ -85,3 +44,47 @@ %tmp = load i32, i32* %c, align 4 ret i32 %tmp } +; CHECK-LABEL: @func1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]]) +; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[V:%.*]], i8* [[AP1]], i32 [[TMP0]], i32* [[C]]) +; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: ret i32 [[TMP]] +; +; +; CHECK-LABEL: @func2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]]) +; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[V:%.*]], i8* [[AP1]], i32 [[TMP0]], i32* [[C]]) +; CHECK-NEXT: [[AP2:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: ret i32 [[TMP]] +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: call void @llvm.va_copy(i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) +; CHECK-NEXT: call void @llvm.va_end(i8* [[TMP1]]) +; CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: ret void +; diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp --- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -1479,7 +1479,8 @@ // are considered illegal since is extra checking needed to handle the address // space checking. -// Checks that a memset instruction is mapped to an illegal value. +// Checks that a memset instruction is mapped to an illegal value when +// specified. TEST(IRInstructionMapper, MemSetIllegal) { StringRef ModuleString = R"( declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) @@ -1503,6 +1504,7 @@ SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); @@ -1510,7 +1512,8 @@ ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]); } -// Checks that a memcpy instruction is mapped to an illegal value. +// Checks that a memcpy instruction is mapped to an illegal value when +// specified. TEST(IRInstructionMapper, MemCpyIllegal) { StringRef ModuleString = R"( declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) @@ -1534,6 +1537,7 @@ SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); @@ -1542,7 +1546,8 @@ ASSERT_LT(UnsignedVec[2], UnsignedVec[0]); } -// Checks that a memmove instruction is mapped to an illegal value. +// Checks that a memmove instruction is mapped to an illegal value when +// specified. TEST(IRInstructionMapper, MemMoveIllegal) { StringRef ModuleString = R"( declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) @@ -1566,6 +1571,7 @@ SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); @@ -1573,6 +1579,45 @@ ASSERT_LT(UnsignedVec[2], UnsignedVec[0]); } +// Checks that mem* instructions are mapped to an legal value when not +// specified, and that all the intrinsics are marked differently. +TEST(IRInstructionMapper, MemOpsLegal) { + StringRef ModuleString = R"( + declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + + define i64 @function(i64 %x, i64 %z, i64 %n) { + entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memmove.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + call void @llvm.memcpy.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = true; + getVectors(*M, Mapper, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(9)); + ASSERT_LT(UnsignedVec[2], UnsignedVec[3]); + ASSERT_LT(UnsignedVec[3], UnsignedVec[4]); + ASSERT_LT(UnsignedVec[4], UnsignedVec[5]); +} + // Checks that a variable argument instructions are mapped to an illegal value. // We exclude variable argument instructions since variable arguments // requires extra checking of the argument list. @@ -1614,6 +1659,7 @@ SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size());