Index: lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -51,6 +51,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -119,6 +120,7 @@ TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; const DataLayout *DL; + OptimizationRemarkEmitter &ORE; bool ApplyCodeSizeHeuristics; public: @@ -126,8 +128,9 @@ LoopInfo *LI, ScalarEvolution *SE, TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, - const DataLayout *DL) - : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL) {} + const DataLayout *DL, + OptimizationRemarkEmitter &ORE) + : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {} bool runOnLoop(Loop *L); @@ -220,7 +223,12 @@ *L->getHeader()->getParent()); const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout(); - LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL); + // For the old PM, we can't use OptimizationRemarkEmitter as an analysis + // pass. Function analyses need to be preserved across loop transformations + // but ORE cannot be preserved (see comment before the pass definition). + OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); + + LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE); return LIR.runOnLoop(L); } @@ -242,7 +250,19 @@ LPMUpdater &) { const auto *DL = &L.getHeader()->getModule()->getDataLayout(); - LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL); + const auto &FAM = + AM.getResult(L, AR).getManager(); + Function *F = L.getHeader()->getParent(); + + auto *ORE = FAM.getCachedResult(*F); + // FIXME: This should probably be optional rather than required. + if (!ORE) + report_fatal_error( + "LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached " + "at a higher level"); + + LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL, + *ORE); if (!LIR.runOnLoop(&L)) return PreservedAnalyses::all(); @@ -951,6 +971,14 @@ << "\n"); NewCall->setDebugLoc(TheStore->getDebugLoc()); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore", + NewCall->getDebugLoc(), Preheader) + << (Twine("Transformed loop-strided store into a call to ") + + NewCall->getCalledFunction()->getName() + "() function") + .str(); + }); + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. for (auto *I : Stores) @@ -1082,6 +1110,13 @@ << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad", + NewCall->getDebugLoc(), Preheader) + << (Twine("Formed a call to ") + + NewCall->getCalledFunction()->getName() + "() function") + .str(); + }); // Okay, the memcpy has been formed. Zap the original store and anything that // feeds into it. Index: test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll =================================================================== --- /dev/null +++ test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -basicaa -debugify -loop-idiom -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info < %s -S 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Check that everything still works when debuginfo is present, and that it is reasonably propagated. + +; CHECK: remark: :6:1: Formed a call to llvm.memcpy.p0i8.p0i8.i64() function + +define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp { +; CHECK-LABEL: @test6_dest_align( +; CHECK-NEXT: bb.nph: +; CHECK-NEXT: [[DEST1:%.*]] = bitcast i32* [[DEST:%.*]] to i8* +; CHECK-NEXT: [[BASE2:%.*]] = bitcast i32* [[BASE:%.*]] to i8* +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SIZE:%.*]], 2, !dbg !18 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST1]], i8* align 1 [[BASE2]], i64 [[TMP0]], i1 false), !dbg !19 +; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg !18 +; CHECK: for.body: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], !dbg !20 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INDVAR]], metadata !9, metadata !DIExpression()), !dbg !20 +; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[INDVAR]], !dbg !21 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[I_0_014]], metadata !11, metadata !DIExpression()), !dbg !21 +; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i32, i32* [[DEST]], i64 [[INDVAR]], !dbg !22 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[DESTI]], metadata !12, metadata !DIExpression()), !dbg !22 +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[I_0_014]], align 1, !dbg !23 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[V]], metadata !13, metadata !DIExpression()), !dbg !23 +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1, !dbg !24 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INDVAR_NEXT]], metadata !15, metadata !DIExpression()), !dbg !24 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]], !dbg !25 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[EXITCOND]], metadata !16, metadata !DIExpression()), !dbg !25 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !dbg !26 +; CHECK: for.end: +; CHECK-NEXT: ret void, !dbg !27 +; +bb.nph: + br label %for.body + +for.body: ; preds = %bb.nph, %for.body + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] + %I.0.014 = getelementptr i32, i32* %Base, i64 %indvar + %DestI = getelementptr i32, i32* %Dest, i64 %indvar + %V = load i32, i32* %I.0.014, align 1 + store i32 %V, i32* %DestI, align 4 + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %Size + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} Index: test/Transforms/LoopIdiom/memset-debugify-remarks.ll =================================================================== --- /dev/null +++ test/Transforms/LoopIdiom/memset-debugify-remarks.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -basicaa -debugify -loop-idiom -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info < %s -S 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Check that everything still works when debuginfo is present, and that it is reasonably propagated. + +; void my_basic_memset(char* begin, char* end, char value) { +; for( ; begin != end; ++begin) +; *begin = value; +; } + +; CHECK: remark: :4:1: Transformed loop-strided store into a call to llvm.memset.p0i8.i64() function + +define void @_Z15my_basic_memsetPcS_c(i8* %ptr, i8* %end, i8 %value) { +; CHECK-LABEL: @_Z15my_basic_memsetPcS_c( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR1:%.*]] = ptrtoint i8* [[PTR:%.*]] to i64 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8* [[PTR]], [[END:%.*]], !dbg !15 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP3]], metadata !9, metadata !DIExpression()), !dbg !15 +; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]], !dbg !16 +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[PTR1]], !dbg !17 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[END]], i64 [[TMP0]], !dbg !17 +; CHECK-NEXT: [[SCEVGEP2:%.*]] = ptrtoint i8* [[SCEVGEP]] to i64 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[PTR]], i8 [[VALUE:%.*]], i64 [[SCEVGEP2]], i1 false), !dbg !17 +; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg !17 +; CHECK: for.body: +; CHECK-NEXT: [[PTR_ADDR_04:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[PTR]], [[FOR_BODY_PREHEADER]] ], !dbg !18 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[PTR_ADDR_04]], metadata !11, metadata !DIExpression()), !dbg !18 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PTR_ADDR_04]], i64 1, !dbg !19 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[INCDEC_PTR]], metadata !13, metadata !DIExpression()), !dbg !19 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[INCDEC_PTR]], [[END]], !dbg !20 +; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]], metadata !14, metadata !DIExpression()), !dbg !20 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !dbg !21 +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]], !dbg !22 +; CHECK: for.end: +; CHECK-NEXT: ret void, !dbg !22 +; +entry: + %cmp3 = icmp eq i8* %ptr, %end + br i1 %cmp3, label %for.end, label %for.body + +for.body: ; preds = %entry, %for.body + %ptr.addr.04 = phi i8* [ %incdec.ptr, %for.body ], [ %ptr, %entry ] + store i8 %value, i8* %ptr.addr.04, align 1 + %incdec.ptr = getelementptr inbounds i8, i8* %ptr.addr.04, i64 1 + %cmp = icmp eq i8* %incdec.ptr, %end + br i1 %cmp, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} Index: test/Transforms/LoopIdiom/nontemporal_store.ll =================================================================== --- test/Transforms/LoopIdiom/nontemporal_store.ll +++ test/Transforms/LoopIdiom/nontemporal_store.ll @@ -1,5 +1,5 @@ ; RUN: opt -loop-idiom < %s -S | FileCheck %s -; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,loop(loop-idiom)' < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(loop-idiom)' < %s -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"