Index: llvm/include/llvm/Transforms/Utils/CodeExtractor.h =================================================================== --- llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -77,9 +77,11 @@ /// sequence out into its new function. When a DominatorTree is also given, /// extra checking and transformations are enabled. If AllowVarArgs is true, /// vararg functions can be extracted. This is safe, if all vararg handling - /// code is extracted, including vastart. If AllowAlloca is true, then - /// extraction of blocks containing alloca instructions would be possible, - /// however code extractor won't validate whether extraction is legal. + /// code is extracted, including vastart. If AllowAlloca is true, extraction + /// of blocks containing alloca instructions is always permitted, however + /// code extractor won't validate whether extraction is legal. Allocas + /// occurring between a stack{save,restore} pair within a block are always + /// safely extracted. CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr, Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -110,11 +110,13 @@ } } - // If explicitly requested, allow vastart and alloca. For invoke instructions - // verify that extraction is valid. + // Always allow allocas wrapped in between a stack{save,restore} pair. If + // explicitly requested, allow vastart and other allocas. Only verify that + // extraction is valid for invoke instructions. + SmallPtrSet ActiveStackSaves; for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { if (isa(I)) { - if (!AllowAlloca) + if (ActiveStackSaves.empty() && !AllowAlloca) return false; continue; } @@ -167,14 +169,37 @@ continue; } - if (const CallInst *CI = dyn_cast(I)) - if (const Function *F = CI->getCalledFunction()) - if (F->getIntrinsicID() == Intrinsic::vastart) { - if (AllowVarArgs) - continue; - else - return false; + if (const CallInst *CI = dyn_cast(I)) { + const Function *F = CI->getCalledFunction(); + if (!F) + continue; + + auto IID = F->getIntrinsicID(); + if (IID == Intrinsic::vastart) { + if (AllowVarArgs) + continue; + else + return false; + } + + // Keep track of stack{save,restore} pairs within this block. As long + // as at least one of the stackrestores has not occurred, it's safe to + // outline any allocas seen in the block. + if (IID == Intrinsic::stacksave) { + for (auto *Restore : CI->users()) { + auto *RestoreCI = cast(Restore); + assert(RestoreCI->getCalledFunction()->getIntrinsicID() == + Intrinsic::stackrestore && + "Use of stacksave not a stackrestore?"); + if (RestoreCI->getParent() == &BB) { + ActiveStackSaves.insert(CI); + break; + } } + } else if (IID == Intrinsic::stackrestore) { + ActiveStackSaves.erase(CI->getOperand(0)); + } + } } return true; Index: llvm/test/Transforms/HotColdSplit/outline-safe-alloca.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/HotColdSplit/outline-safe-alloca.ll @@ -0,0 +1,131 @@ +; RUN: opt -S -hotcoldsplit < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.14.0" + +; Example taken from test/Transforms/HotColdSplit/outline-if-then-else.ll. + +; CHECK-LABEL: define {{.*}}@no_active_stack_save( +; CHECK-NOT: @no_active_stack_save.cold +define void @no_active_stack_save(i32 %cond) { +entry: + %cond.addr = alloca i32 + store i32 %cond, i32* %cond.addr + %0 = load i32, i32* %cond.addr + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.end2 + +if.then: ; preds = %entry + %1 = load i32, i32* %cond.addr + %cmp = icmp sgt i32 %1, 10 + br i1 %cmp, label %if.then1, label %if.else + +if.then1: ; preds = %if.then + call void @sideeffect(i32 0) + br label %if.end + +if.else: ; preds = %if.then + call void @sideeffect(i32 1) + br label %if.end + +if.end: ; preds = %if.else, %if.then1 + %save = call i8* @llvm.stacksave() + %vla = alloca [100 x i8] + call void (...) @sink() + ret void + +if.end2: ; preds = %entry + call void @sideeffect(i32 2) + ret void +} + +; CHECK-LABEL: define {{.*}}@no_active_stack_save2( +; CHECK-NOT: @no_active_stack_save2.cold +define void @no_active_stack_save2(i32 %cond) { +entry: + %cond.addr = alloca i32 + store i32 %cond, i32* %cond.addr + %0 = load i32, i32* %cond.addr + %tobool = icmp ne i32 %0, 0 + %save2 = call i8* @llvm.stacksave() + br i1 %tobool, label %if.then, label %if.end2 + +if.then: ; preds = %entry + %1 = load i32, i32* %cond.addr + %cmp = icmp sgt i32 %1, 10 + br i1 %cmp, label %if.then1, label %if.else + +if.then1: ; preds = %if.then + call void @sideeffect(i32 0) + br label %if.end + +if.else: ; preds = %if.then + call void @sideeffect(i32 1) + br label %if.end + +if.end: ; preds = %if.else, %if.then1 + %save = call i8* @llvm.stacksave() + %vla = alloca [100 x i8] + call void (...) @sink() + call void @llvm.stackrestore(i8* %save2) + ret void + +if.end2: ; preds = %entry + call void @sideeffect(i32 2) + ret void +} + +; CHECK-LABEL: define {{.*}}@has_active_stack_save( +; CHECK: call void @has_active_stack_save.cold.1 +define void @has_active_stack_save(i32 %cond) { +entry: + %cond.addr = alloca i32 + store i32 %cond, i32* %cond.addr + %0 = load i32, i32* %cond.addr + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.end2 + +if.then: ; preds = %entry + %1 = load i32, i32* %cond.addr + %cmp = icmp sgt i32 %1, 10 + br i1 %cmp, label %if.then1, label %if.else + +if.then1: ; preds = %if.then + call void @sideeffect(i32 0) + br label %if.end + +if.else: ; preds = %if.then + call void @sideeffect(i32 1) + br label %if.end + +if.end: ; preds = %if.else, %if.then1 + %save = call i8* @llvm.stacksave() + %save2 = call i8* @llvm.stacksave() ; Useless, but shouldn't impede outlining. + %vla = alloca [100 x i8] + call void (...) @sink() + call void @llvm.stackrestore(i8* %save) + ret void + +if.end2: ; preds = %entry + call void @sideeffect(i32 2) + ret void +} + + +; TODO: Hoist the alloca and eliminate the stack{save,restore} pair. +; CHECK-LABEL: define {{.*}}@has_active_stack_save.cold.1 +; CHECK: call {{.*}}@sideeffect +; CHECK: call {{.*}}@sideeffect +; CHECK: call {{.*}}@llvm.stacksave +; CHECK-NEXT: call {{.*}}@llvm.stacksave +; CHECK-NEXT: alloca [100 x i8] +; CHECK-NEXT: call {{.*}}@sink +; CHECK-NEXT: call {{.*}}@llvm.stackrestore + +declare void @sideeffect(i32) + +declare void @sink(...) cold + +declare i8* @llvm.stacksave() + +declare void @llvm.stackrestore(i8*)