Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -1538,8 +1538,18 @@ EndBB)))) return false; if (!SpeculatedStoreValue && - ComputeSpeculationCost(I, DL) > PHINodeFoldingThreshold) - return false; + ComputeSpeculationCost(I, DL) > PHINodeFoldingThreshold) { + // Special case where the only instruction in the basic block (excluding + // the terminator) is a cttz/ctlz intrinsic call. It may still be + // beneficial to hoist it from 'ThenBB'. + if (!isa(I)) + return false; + + IntrinsicInst *II = cast(I); + if (II->getIntrinsicID() != Intrinsic::cttz && + II->getIntrinsicID() != Intrinsic::ctlz) + return false; + } // Store the store speculation candidate. if (SpeculatedStoreValue) @@ -1588,6 +1598,56 @@ passingValueIsAlwaysUndefined(ThenV, PN)) return false; + // See if we can hoist a cttz/ctlz from ThenBB into BB. + // + // Example: + // entry: + // ... + // %cmp = icmp eq i64 %val, 0 + // br i1 %cmp, label %end.bb, label %then.bb + // + // then.bb: + // %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 true) + // br label %EndBB + // + // end.bb: + // %cond = phi i64 [ %c, %then.bb ], [ 64, %entry ] + // + // ==> + // + // entry: + // ... + // %cmp = icmp eq i64 %val, 0 + // %c = tail call i64 @llvm.cttz.i64(i64 %val, i1 false) + // select i1 %cmp, i64 64, i64 %c + // + if (IntrinsicInst *II = dyn_cast(ThenV)) { + // Don't convert this phi node into a select if 'ThenV' is a cttz/ctlz + // intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits of + // the value in input to the cttz/ctlz. + if (II->getIntrinsicID() == Intrinsic::cttz || + II->getIntrinsicID() == Intrinsic::ctlz) { + unsigned BitWidth = ThenV->getType()->getIntegerBitWidth(); + ConstantInt *CInt = dyn_cast(OrigV); + if (!CInt || !CInt->equalsInt(BitWidth)) + return false; + + // Don't convert to select if 'ThenBB' is not on the false edge of the + // conditional branch. + if (!isa(BrCond)) + return false; + + ICmpInst *Cmp = cast(BrCond); + if (Cmp->getPredicate() != ICmpInst::ICMP_EQ || + Cmp->getOperand(0) != II->getArgOperand(0) || + !isa(Cmp->getOperand(1)) || + // Make sure that 'ThenBB' is only taken if the input to the + // cttz/ctlz intrinsic call is not zero. + !cast(Cmp->getOperand(1))->isZero()) + return false; + } + } + HaveRewritablePHIs = true; ConstantExpr *OrigCE = dyn_cast(OrigV); ConstantExpr *ThenCE = dyn_cast(ThenV); @@ -1654,6 +1714,30 @@ Value *TrueV = ThenV, *FalseV = OrigV; if (Invert) std::swap(TrueV, FalseV); + + // A call to cttz/ctlz is only speculated if ThenBB is on the false edge of + // the conditional branch. If so, then flag 'Invert' is always expected to + // be set, and 'FalseV' would point to the call to cttz/ctlz. + if (IntrinsicInst *II = dyn_cast(FalseV)) { + Intrinsic::ID ID = II->getIntrinsicID(); + if ((ID == Intrinsic::cttz || ID == Intrinsic::ctlz) && + cast(II->getArgOperand(1))->isOne()) { + // Construct a new call to cttz/ctlz and clear the + // "undefined on zero" flag. + Type *Ty = II->getArgOperand(0)->getType(); + Value *Args[] = { II->getArgOperand(0), + ConstantInt::getFalse(II->getContext()) }; + Module *M = BB->getParent()->getParent(); + Value *IF = Intrinsic::getDeclaration(M, ID, Ty); + Instruction *NewI = Builder.CreateCall(IF, Args); + + // Replace the old call to cttz/ctlz with 'NewI'. + II->replaceAllUsesWith(NewI); + II->eraseFromParent(); + FalseV = NewI; + } + } + Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName()); PN->setIncomingValue(OrigI, V); Index: test/Transforms/SimplifyCFG/cttz-ctlz.ll =================================================================== --- test/Transforms/SimplifyCFG/cttz-ctlz.ll +++ test/Transforms/SimplifyCFG/cttz-ctlz.ll @@ -0,0 +1,229 @@ +; RUN: opt < %s -simplifycfg -S | FileCheck %s + +define i64 @test1(i64 %A) { +; CHECK-LABEL: @test1( +; CHECK: [[CTLZ:%[A-Za-z0-9]+]] = call i64 @llvm.ctlz.i64(i64 %A, i1 false) +; CHECK-NEXT: select i1 %tobool, i64 64, i64 [[CTLZ]] +entry: + %tobool = icmp eq i64 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ] + ret i64 %cond +} + + +define i32 @test2(i32 %A) { +; CHECK-LABEL: @test2( +; CHECK: [[CTLZ:%[A-Za-z0-9]+]] = call i32 @llvm.ctlz.i32(i32 %A, i1 false) +; CHECK-NEXT: select i1 %tobool, i32 32, i32 [[CTLZ]] +entry: + %tobool = icmp eq i32 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ] + ret i32 %cond +} + + +define signext i16 @test3(i16 signext %A) { +; CHECK-LABEL: @test3( +; CHECK: [[CTLZ:%[A-Za-z0-9]+]] = call i16 @llvm.ctlz.i16(i16 %A, i1 false) +; CHECK-NEXT: select i1 %tobool, i16 16, i16 [[CTLZ]] +entry: + %tobool = icmp eq i16 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ] + ret i16 %cond +} + + +define i64 @test1b(i64 %A) { +; CHECK-LABEL: @test1b( +; CHECK: [[CTTZ:%[A-Za-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %A, i1 false) +; CHECK-NEXT: select i1 %tobool, i64 64, i64 [[CTTZ]] +entry: + %tobool = icmp eq i64 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ] + ret i64 %cond +} + + +define i32 @test2b(i32 %A) { +; CHECK-LABEL: @test2b( +; CHECK: [[CTTZ:%[A-Za-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %A, i1 false) +; CHECK-NEXT: select i1 %tobool, i32 32, i32 [[CTTZ]] +entry: + %tobool = icmp eq i32 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ] + ret i32 %cond +} + + +define signext i16 @test3b(i16 signext %A) { +; CHECK-LABEL: @test3b( +; CHECK: [[CTTZ:%[A-Za-z0-9]+]] = call i16 @llvm.cttz.i16(i16 %A, i1 false) +; CHECK-NEXT: select i1 %tobool, i16 16, i16 [[CTTZ]] +entry: + %tobool = icmp eq i16 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ] + ret i16 %cond +} + + +define i64 @test1c(i64 %A) { +; CHECK-LABEL: @test1c( +; CHECK: call i64 @llvm.ctlz.i64(i64 %A, i1 true) +; CHECK: phi i64 [ %0, %cond.true ], [ 63, %entry ] +; CHECK-NEXT: ret +entry: + %tobool = icmp eq i64 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ] + ret i64 %cond +} + +define i32 @test2c(i32 %A) { +; CHECK-LABEL: @test2c( +; CHECK: call i32 @llvm.ctlz.i32(i32 %A, i1 true) +; CHECK: phi i32 [ %0, %cond.true ], [ 31, %entry ] +; CHECK-NEXT: ret +entry: + %tobool = icmp eq i32 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ] + ret i32 %cond +} + + +define signext i16 @test3c(i16 signext %A) { +; CHECK-LABEL: @test3c( +; CHECK: call i16 @llvm.ctlz.i16(i16 %A, i1 true) +; CHECK: phi i16 [ %0, %cond.true ], [ 15, %entry ] +; CHECK-NEXT: ret +entry: + %tobool = icmp eq i16 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ] + ret i16 %cond +} + + +define i64 @test1d(i64 %A) { +; CHECK-LABEL: @test1d( +; CHECK: call i64 @llvm.cttz.i64(i64 %A, i1 true) +; CHECK: phi i64 [ %0, %cond.true ], [ 63, %entry ] +; CHECK-NEXT: ret +entry: + %tobool = icmp eq i64 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ] + ret i64 %cond +} + + +define i32 @test2d(i32 %A) { +; CHECK-LABEL: @test2d( +; CHECK: call i32 @llvm.cttz.i32(i32 %A, i1 true) +; CHECK: phi i32 [ %0, %cond.true ], [ 31, %entry ] +; CHECK-NEXT: ret +entry: + %tobool = icmp eq i32 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ] + ret i32 %cond +} + + +define signext i16 @test3d(i16 signext %A) { +; CHECK-LABEL: @test3d( +; CHECK: call i16 @llvm.cttz.i16(i16 %A, i1 true) +; CHECK: phi i16 [ %0, %cond.true ], [ 15, %entry ] +; CHECK-NEXT: ret +entry: + %tobool = icmp eq i16 %A, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true) + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ] + ret i16 %cond +} + + +declare i64 @llvm.ctlz.i64(i64, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare i16 @llvm.ctlz.i16(i16, i1) +declare i64 @llvm.cttz.i64(i64, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare i16 @llvm.cttz.i16(i16, i1)