Index: llvm/include/llvm/Analysis/IRSimilarityIdentifier.h =================================================================== --- llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -223,12 +223,36 @@ llvm::hash_value(ID.Inst->getType()), llvm::hash_value(ID.getPredicate()), llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); - else if (CallInst *CI = dyn_cast(ID.Inst)) + + if (IntrinsicInst *II = dyn_cast(ID.Inst)) { + // To hash intrinsics, we use the opcode, and types like the other + // instructions, but also, the Intrinsic ID, and the Name of the + // intrinsic. + Intrinsic::ID IntrinsicID = II->getIntrinsicID(); + FunctionType *FT = II->getFunctionType(); + std::string Name; + // If there is an overloaded name, we have to use the complex version + // of getName to get the entire string. + if (Intrinsic::isOverloaded(IntrinsicID)) + Name = + Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT); + // If there is not an overloaded name, we only need to use this version. + else + Name = Intrinsic::getName(IntrinsicID).str(); + return llvm::hash_combine( + llvm::hash_value(ID.Inst->getOpcode()), + llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID), + llvm::hash_value(Name), + llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); + } + + if (CallInst *CI = dyn_cast(ID.Inst)) return llvm::hash_combine( llvm::hash_value(ID.Inst->getOpcode()), llvm::hash_value(ID.Inst->getType()), llvm::hash_value(CI->getCalledFunction()->getName().str()), llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); + return llvm::hash_combine( llvm::hash_value(ID.Inst->getOpcode()), llvm::hash_value(ID.Inst->getType()), @@ -478,7 +502,17 @@ // program. InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; } // TODO: Handle specific intrinsics. - InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; } + InstrType visitIntrinsicInst(IntrinsicInst &II) { + // These are disabled due to complications in the CodeExtractor when + // outlining these instructions. For instance, It is unclear what we + // should do when moving only the start or end lifetime instruction into + // an outlined function. Also, assume-like intrinsics could be removed + // from the region, removing arguments, causing discrepencies in the + // number of inputs between different regions. + if (II.isLifetimeStartOrEnd() || II.isAssumeLikeIntrinsic()) + return Illegal; + return EnableIntrinsics ? Legal : Illegal; + } // We only allow call instructions where the function has a name and // is not an indirect call. InstrType visitCallInst(CallInst &CI) { @@ -498,6 +532,10 @@ // The flag variable that lets the classifier know whether we should // allow branches to be checked for similarity. bool EnableBranches = false; + + // Flag that lets the classifier know whether we should allow intrinsics to + // be checked for similarity. + bool EnableIntrinsics = false; }; /// Maps an Instruction to a member of InstrType. @@ -884,9 +922,9 @@ /// analyzing the module. class IRSimilarityIdentifier { public: - IRSimilarityIdentifier(bool MatchBranches = true) + IRSimilarityIdentifier(bool MatchBranches = true, bool MatchIntrinsics = true) : Mapper(&InstDataAllocator, &InstDataListAllocator), - EnableBranches(MatchBranches) {} + EnableBranches(MatchBranches), EnableIntrinsics(MatchIntrinsics) {} private: /// Map the instructions in the module to unsigned integers, using mapping @@ -966,6 +1004,10 @@ /// similarity, or only look within basic blocks. bool EnableBranches = true; + /// The flag variable that marks whether we should check intrinsics for + /// similarity. + bool EnableIntrinsics = true; + /// The SimilarityGroups found with the most recent run of \ref /// findSimilarity. None if there is no recent run. Optional SimilarityCandidates; Index: llvm/include/llvm/Transforms/IPO/IROutliner.h =================================================================== --- llvm/include/llvm/Transforms/IPO/IROutliner.h +++ llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -360,7 +360,7 @@ bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; } // TODO: Handle specific intrinsics individually from those that can be // handled. - bool IntrinsicInst(IntrinsicInst &II) { return false; } + bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; } // We only handle CallInsts that are not indirect, since we cannot guarantee // that they have a name in these cases. bool visitCallInst(CallInst &CI) { @@ -384,6 +384,10 @@ // The flag variable that marks whether we should allow branch instructions // to be outlined. bool EnableBranches = false; + + // The flag variable that marks whether we should allow intrinsics + // instructions to be outlined. + bool EnableIntrinsics = false; }; /// A InstVisitor used to exclude certain instructions from being outlined. Index: llvm/lib/Analysis/IRSimilarityIdentifier.cpp =================================================================== --- llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -29,6 +29,10 @@ cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes.")); +cl::opt + DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, + cl::desc("Don't match or outline intrinsics")); + IRInstructionData::IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDList) : Inst(&I), Legal(Legality), IDL(&IDList) { @@ -1074,6 +1078,7 @@ std::vector InstrList; std::vector IntegerMapping; Mapper.InstClassifier.EnableBranches = this->EnableBranches; + Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; populateMapper(Modules, InstrList, IntegerMapping); findCandidates(InstrList, IntegerMapping); @@ -1084,6 +1089,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { resetSimilarityCandidates(); Mapper.InstClassifier.EnableBranches = this->EnableBranches; + Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; std::vector InstrList; std::vector IntegerMapping; @@ -1104,7 +1110,7 @@ } bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { - IRSI.reset(new IRSimilarityIdentifier(!DisableBranches)); + IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIntrinsics)); return false; } @@ -1122,7 +1128,7 @@ IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, ModuleAnalysisManager &) { - auto IRSI = IRSimilarityIdentifier(!DisableBranches); + auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIntrinsics); IRSI.findSimilarity(M); return IRSI; } Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -38,6 +38,10 @@ // matching and outlining. extern cl::opt DisableBranches; +// A command flag to be used for debugging to exclude intrinsics from similarity +// matching and outlining. +extern cl::opt DisableIntrinsics; + // Set to true if the user wants the ir outliner to run on linkonceodr linkage // functions. This is false by default because the linker can dedupe linkonceodr // functions. Since the outliner is confined to a single module (modulo LTO), @@ -2507,6 +2511,7 @@ unsigned IROutliner::doOutline(Module &M) { // Find the possible similarity sections. InstructionClassifier.EnableBranches = !DisableBranches; + InstructionClassifier.EnableIntrinsics = !DisableIntrinsics; IRSimilarityIdentifier &Identifier = getIRSI(M); SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity(); Index: llvm/test/Transforms/IROutliner/different-intrinsics.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/different-intrinsics.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we do not outline different intrinsics as the same +; function or as a value like we would for non-intrinsic functions. + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} + +define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) +; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 +; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) +; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 +; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK-LABEL: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[C:%.*]] = add i8 [[TMP0:%.*]], [[TMP1:%.*]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP2:%.*]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal void @outlined_ir_func_1( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[A]], i8* [[TMP2:%.*]], align 1 +; CHECK-NEXT: store i8 [[B]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; Index: llvm/test/Transforms/IROutliner/illegal-memcpy.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-memcpy.ll +++ llvm/test/Transforms/IROutliner/illegal-memcpy.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test checks that we do not outline memcpy intrinsics since it may require ; extra address space checks. Index: llvm/test/Transforms/IROutliner/illegal-memmove.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-memmove.ll +++ llvm/test/Transforms/IROutliner/illegal-memmove.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test checks that we do not outline memcpy intrinsics since it may require ; extra address space checks. Index: llvm/test/Transforms/IROutliner/illegal-memset.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-memset.ll +++ llvm/test/Transforms/IROutliner/illegal-memset.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test checks that we do not outline memset intrinsics since it requires ; extra address space checks. Index: llvm/test/Transforms/IROutliner/illegal-vaarg.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-vaarg.ll +++ llvm/test/Transforms/IROutliner/illegal-vaarg.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test ensures that we do not outline vararg instructions or intrinsics, as ; they may cause inconsistencies when outlining. Index: llvm/test/Transforms/IROutliner/outline-memcpy.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outline-memcpy.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we successfully outline identical memcpy instructions. + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} + +define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP0]], i64 [[TMP2:%.*]], i1 false) +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP0]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; Index: llvm/test/Transforms/IROutliner/outline-memmove.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outline-memmove.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we sucecssfully outline identical memmove instructions. + +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} + +define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1 +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP0]], i64 [[TMP2:%.*]], i1 false) +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP0]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; Index: llvm/test/Transforms/IROutliner/outline-memset.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outline-memset.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we successfully outline identical memset instructions. + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + +define i64 @function1(i64 %x, i64 %z, i64 %n) { +entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 +} + +define i64 @function2(i64 %x, i64 %z, i64 %n) { +entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[POOL:%.*]] = alloca [59 x i64], align 4 +; CHECK-NEXT: call void @outlined_ir_func_0([59 x i64]* [[POOL]], i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]]) +; CHECK-NEXT: ret i64 0 +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[POOL:%.*]] = alloca [59 x i64], align 4 +; CHECK-NEXT: call void @outlined_ir_func_0([59 x i64]* [[POOL]], i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]]) +; CHECK-NEXT: ret i64 0 +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[TMP:%.*]] = bitcast [59 x i64]* [[TMP0:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP]], i8 0, i64 236, i1 false) +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[TMP1:%.*]], 0 +; CHECK-NEXT: [[A:%.*]] = add i64 [[TMP2:%.*]], [[TMP3:%.*]] +; CHECK-NEXT: [[C:%.*]] = add i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: ret void +; Index: llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we sucessfully outline identical memcpy var arg +; intrinsics, but not the var arg instruction itself. + +declare void @llvm.va_start(i8*) +declare void @llvm.va_copy(i8*, i8*) +declare void @llvm.va_end(i8*) + +define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + call void @llvm.va_copy(i8* %v, i8* %ap1) + call void @llvm.va_end(i8* %ap1) + store i32 %0, i32* %c, align 4 + %tmp = load i32, i32* %c, align 4 + ret i32 %tmp +} + +define i32 @func2(i32 %a, double %b, i8* %v, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + call void @llvm.va_copy(i8* %v, i8* %ap1) + call void @llvm.va_end(i8* %ap1) + store i32 %0, i32* %c, align 4 + %ap2 = bitcast i8** %ap to i8* + %tmp = load i32, i32* %c, align 4 + ret i32 %tmp +} +; CHECK-LABEL: @func1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]]) +; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[V:%.*]], i8* [[AP1]], i32 [[TMP0]], i32* [[C]]) +; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: ret i32 [[TMP]] +; +; +; CHECK-LABEL: @func2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]]) +; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[V:%.*]], i8* [[AP1]], i32 [[TMP0]], i32* [[C]]) +; CHECK-NEXT: [[AP2:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: ret i32 [[TMP]] +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: call void @llvm.va_copy(i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) +; CHECK-NEXT: call void @llvm.va_end(i8* [[TMP1]]) +; CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: ret void +; Index: llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp =================================================================== --- llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -1422,7 +1422,8 @@ // are considered illegal since is extra checking needed to handle the address // space checking. -// Checks that a memset instruction is mapped to an illegal value. +// Checks that a memset instruction is mapped to an illegal value when +// specified. TEST(IRInstructionMapper, MemSetIllegal) { StringRef ModuleString = R"( declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) @@ -1446,6 +1447,7 @@ SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); @@ -1453,7 +1455,8 @@ ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]); } -// Checks that a memcpy instruction is mapped to an illegal value. +// Checks that a memcpy instruction is mapped to an illegal value when +// specified. TEST(IRInstructionMapper, MemCpyIllegal) { StringRef ModuleString = R"( declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) @@ -1477,6 +1480,7 @@ SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); @@ -1485,7 +1489,8 @@ ASSERT_LT(UnsignedVec[2], UnsignedVec[0]); } -// Checks that a memmove instruction is mapped to an illegal value. +// Checks that a memmove instruction is mapped to an illegal value when +// specified. TEST(IRInstructionMapper, MemMoveIllegal) { StringRef ModuleString = R"( declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) @@ -1509,6 +1514,7 @@ SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); @@ -1516,6 +1522,45 @@ ASSERT_LT(UnsignedVec[2], UnsignedVec[0]); } +// Checks that mem* instructions are mapped to an legal value when not +// specified, and that all the intrinsics are marked differently. +TEST(IRInstructionMapper, MemOpsLegal) { + StringRef ModuleString = R"( + declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + + define i64 @function(i64 %x, i64 %z, i64 %n) { + entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memmove.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + call void @llvm.memcpy.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = true; + getVectors(*M, Mapper, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(9)); + ASSERT_LT(UnsignedVec[2], UnsignedVec[3]); + ASSERT_LT(UnsignedVec[3], UnsignedVec[4]); + ASSERT_LT(UnsignedVec[4], UnsignedVec[5]); +} + // Checks that a variable argument instructions are mapped to an illegal value. // We exclude variable argument instructions since variable arguments // requires extra checking of the argument list. @@ -1557,6 +1602,7 @@ SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size());