Index: llvm/include/llvm/Transforms/Utils/Cloning.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Cloning.h +++ llvm/include/llvm/Transforms/Utils/Cloning.h @@ -80,10 +80,11 @@ /// Return a copy of the specified basic block, but without /// embedding the block into a particular function. The block returned is an -/// exact copy of the specified basic block, without any remapping having been -/// performed. Because of this, this is only suitable for applications where -/// the basic block will be inserted into the same function that it was cloned -/// from (loop unrolling would use this, for example). +/// exact copy (see possible exception below) of the specified basic block, +/// without any remapping having been performed. Because of this, this is only +/// suitable for applications where the basic block will be inserted into the +/// same function that it was cloned from (loop unrolling would use this, for +/// example). /// /// Also, note that this function makes a direct copy of the basic block, and /// can thus produce illegal LLVM code. In particular, it will copy any PHI @@ -105,10 +106,13 @@ /// If you would like to collect additional information about the cloned /// function, you can specify a ClonedCodeInfo object with the optional fifth /// parameter. +/// +/// To exclude cloning of assumptions, set the optional 7th parameter to false. BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix = "", Function *F = nullptr, ClonedCodeInfo *CodeInfo = nullptr, - DebugInfoFinder *DIFinder = nullptr); + DebugInfoFinder *DIFinder = nullptr, + bool ShouldCloneAssumes = true); /// Return a copy of the specified function and add it to that /// function's module. Also, any references specified in the VMap are changed Index: llvm/lib/Transforms/Utils/CloneFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/CloneFunction.cpp +++ llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -43,7 +43,8 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, ClonedCodeInfo *CodeInfo, - DebugInfoFinder *DIFinder) { + DebugInfoFinder *DIFinder, + bool ShouldCloneAssumes) { DenseMap Cache; BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) @@ -54,6 +55,13 @@ // Loop over all instructions, and copy them over. for (const Instruction &I : *BB) { + // A caller (for example LoopUnroll) may want to avoid cloning assumptions + // because they are not useful and potentially expensive to analyze. + if (!ShouldCloneAssumes) + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::assume) + continue; + if (DIFinder && TheModule) DIFinder->processInstruction(*TheModule, I); Index: llvm/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -602,7 +602,9 @@ for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { ValueToValueMapTy VMap; - BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); + BasicBlock *New = + CloneBasicBlock(*BB, VMap, "." + Twine(It), nullptr, nullptr, nullptr, + /* ShouldCloneAssumes */ false); Header->getParent()->getBasicBlockList().push_back(New); assert((*BB != Header || LI->getLoopFor(*BB) == L) && Index: llvm/test/Transforms/GVNSink/assumption.ll =================================================================== --- llvm/test/Transforms/GVNSink/assumption.ll +++ llvm/test/Transforms/GVNSink/assumption.ll @@ -17,11 +17,7 @@ ; CHECK-NEXT: [[I32_I:%.*]] = icmp eq i32 [[I1_I]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[I32_I]]) ; CHECK-NEXT: [[I1_I_1:%.*]] = load volatile i32, i32* @g, align 4 -; CHECK-NEXT: [[I32_I_1:%.*]] = icmp eq i32 [[I1_I_1]], 0 -; CHECK-NEXT: call void @llvm.assume(i1 [[I32_I_1]]) ; CHECK-NEXT: [[I1_I_2:%.*]] = load volatile i32, i32* @g, align 4 -; CHECK-NEXT: [[I32_I_2:%.*]] = icmp eq i32 [[I1_I_2]], 0 -; CHECK-NEXT: call void @llvm.assume(i1 [[I32_I_2]]) ; CHECK-NEXT: br label [[BB4_I]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: func_1.exit: ; CHECK-NEXT: unreachable Index: llvm/test/Transforms/PhaseOrdering/X86/assume-explosion.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PhaseOrdering/X86/assume-explosion.ll @@ -0,0 +1,206 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O3 -S < %s | FileCheck %s + +; This test confirms that we do not create assumes, +; clone them excessively, and then cause a compile-time +; explosion trying to simplify them all. + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx11.0.0" + +@e = global i16 0, align 2 +@a = global i32 0, align 4 +@c = global i32 0, align 4 +@b = global i32 0, align 4 +@d = global i32 0, align 4 + +define void @f() #0 { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 5, i32* @c, align 4, !tbaa [[TBAA3:![0-9]+]] +; CHECK-NEXT: [[DOTPRE_PRE:%.*]] = load i32, i32* @b, align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[DOTPRE7_PRE:%.*]] = load i32, i32* @d, align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[DOTPRE_PRE]], 57 +; CHECK-NEXT: [[CMP6_NOT:%.*]] = icmp eq i32 [[XOR]], [[DOTPRE7_PRE]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <16 x i32> poison, i32 [[DOTPRE7_PRE]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT9]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[DOTPRE_PRE]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i32> [[BROADCAST_SPLAT]], +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <16 x i32> [[TMP0]], [[BROADCAST_SPLAT10]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP3]]) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i1> [[TMP1]], i32 2 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i1> [[TMP1]], i32 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP5]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i1> [[TMP1]], i32 4 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP6]]) +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i1> [[TMP1]], i32 5 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP7]]) +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP1]], i32 6 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP8]]) +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP1]], i32 7 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP9]]) +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i1> [[TMP1]], i32 8 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP10]]) +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP1]], i32 9 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP11]]) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i1> [[TMP1]], i32 10 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP12]]) +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[TMP1]], i32 11 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP13]]) +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i1> [[TMP1]], i32 12 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP14]]) +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP1]], i32 13 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP15]]) +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i1> [[TMP1]], i32 14 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP16]]) +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[TMP1]], i32 15 +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP17]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP6_NOT]]) +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ 5, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[VECTOR_PH]] ] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[TMP18]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP18]], 63 +; CHECK-NEXT: br i1 [[CMP]], label [[VECTOR_PH]], label [[FOR_END34:%.*]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: for.end34: +; CHECK-NEXT: store i32 [[INC]], i32* @c, align 4, !tbaa [[TBAA3]] +; CHECK-NEXT: store i16 61, i16* @e, align 2, !tbaa [[TBAA9:![0-9]+]] +; CHECK-NEXT: ret void +; +entry: + store i32 5, i32* @c, align 4, !tbaa !3 + br label %for.cond + +for.cond: + %0 = load i32, i32* @c, align 4, !tbaa !3 + %cmp = icmp sle i32 %0, 63 + br i1 %cmp, label %for.body, label %for.end34 + +for.body: + store i16 9, i16* @e, align 2, !tbaa !7 + br label %for.cond1 + +for.cond1: + %1 = load i16, i16* @e, align 2, !tbaa !7 + %conv = zext i16 %1 to i32 + %cmp2 = icmp sle i32 %conv, 60 + br i1 %cmp2, label %for.body4, label %for.end32 + +for.body4: + %2 = load i16, i16* @e, align 2, !tbaa !7 + %conv5 = zext i16 %2 to i32 + %3 = load i32, i32* @b, align 4, !tbaa !3 + %xor = xor i32 %conv5, %3 + %4 = load i32, i32* @d, align 4, !tbaa !3 + %cmp6 = icmp ne i32 %xor, %4 + br i1 %cmp6, label %if.then, label %if.end27 + +if.then: + %5 = load i32, i32* @a, align 4, !tbaa !3 + %conv8 = sext i32 %5 to i64 + %6 = inttoptr i64 %conv8 to i8* + store i8 3, i8* %6, align 1, !tbaa !9 + br label %for.cond9 + +for.cond9: + %7 = load i8, i8* %6, align 1, !tbaa !9 + %conv10 = sext i8 %7 to i32 + %cmp11 = icmp sle i32 %conv10, 32 + br i1 %cmp11, label %for.body13, label %for.end26 + +for.body13: + %8 = load i8, i8* %6, align 1, !tbaa !9 + %tobool = icmp ne i8 %8, 0 + br i1 %tobool, label %if.then14, label %if.end + +if.then14: + store i8 1, i8* bitcast (i32* @a to i8*), align 1, !tbaa !9 + br label %for.cond15 + +for.cond15: + %9 = load i8, i8* bitcast (i32* @a to i8*), align 1, !tbaa !9 + %conv16 = sext i8 %9 to i32 + %cmp17 = icmp sle i32 %conv16, 30 + br i1 %cmp17, label %for.body19, label %for.end + +for.body19: + %10 = load i32, i32* @c, align 4, !tbaa !3 + %cmp20 = icmp eq i32 0, %10 + %conv21 = zext i1 %cmp20 to i32 + %11 = load i8, i8* bitcast (i32* @a to i8*), align 1, !tbaa !9 + %conv22 = sext i8 %11 to i32 + %and = and i32 %conv22, %conv21 + %conv23 = trunc i32 %and to i8 + store i8 %conv23, i8* bitcast (i32* @a to i8*), align 1, !tbaa !9 + br label %for.cond15, !llvm.loop !10 + +for.end: + br label %if.end + +if.end: + br label %for.inc + +for.inc: + %12 = load i8, i8* %6, align 1, !tbaa !9 + %conv24 = sext i8 %12 to i32 + %add = add nsw i32 %conv24, 1 + %conv25 = trunc i32 %add to i8 + store i8 %conv25, i8* %6, align 1, !tbaa !9 + br label %for.cond9, !llvm.loop !12 + +for.end26: + br label %if.end27 + +if.end27: + br label %for.inc28 + +for.inc28: + %13 = load i16, i16* @e, align 2, !tbaa !7 + %conv29 = zext i16 %13 to i32 + %add30 = add nsw i32 %conv29, 1 + %conv31 = trunc i32 %add30 to i16 + store i16 %conv31, i16* @e, align 2, !tbaa !7 + br label %for.cond1, !llvm.loop !13 + +for.end32: + br label %for.inc33 + +for.inc33: + %14 = load i32, i32* @c, align 4, !tbaa !3 + %inc = add nsw i32 %14, 1 + store i32 %inc, i32* @c, align 4, !tbaa !3 + br label %for.cond, !llvm.loop !14 + +for.end34: + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +attributes #0 = { nounwind ssp uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" } +attributes #1 = { argmemonly nofree nosync nounwind willreturn } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 7a4abc07dd8f1d8217e482ebbf438197c1aea7f0)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"short", !5, i64 0} +!9 = !{!5, !5, i64 0} +!10 = distinct !{!10, !11} +!11 = !{!"llvm.loop.mustprogress"} +!12 = distinct !{!12, !11} +!13 = distinct !{!13, !11} +!14 = distinct !{!14, !11}