Index: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp @@ -4008,15 +4008,41 @@ // See if ThenBB contains only one instruction (excluding the // terminator and DbgInfoIntrinsic calls). IntrinsicInst *II = nullptr; + CastInst *CI = nullptr; for (BasicBlock::iterator I = ThenBB->begin(), E = std::prev(ThenBB->end()); I != E; ++I) { // Skip debug info. if (isa(I)) continue; - if (II) - // Avoid speculating more than one instruction. - return false; + // Check if this is a zero extension or a truncate of a previously + // matched call to intrinsic cttz/ctlz. + if (II) { + // Early exit if we already found a "free" zero extend/truncate. + if (CI) + return false; + + Type *SrcTy = II->getType(); + Type *DestTy = I->getType(); + Value *V; + + if (match(cast(I), m_ZExt(m_Value(V))) && V == II) { + // Speculate this zero extend only if it is "free" for the target. + if (TLI.isZExtFree(SrcTy, DestTy)) { + CI = cast(I); + continue; + } + } else if (match(cast(I), m_Trunc(m_Value(V))) && V == II) { + // Speculate this truncate only if it is "free" for the target. + if (TLI.isTruncateFree(SrcTy, DestTy)) { + CI = cast(I); + continue; + } + } else { + // Avoid speculating more than one instruction. + return false; + } + } // See if this is a call to intrinsic cttz/ctlz. if (match(cast(I), m_Intrinsic())) { @@ -4041,11 +4067,14 @@ Value *ThenV = PN->getIncomingValueForBlock(ThenBB); Value *OrigV = PN->getIncomingValueForBlock(EntryBB); - if (!OrigV || ThenV != II) + if (!OrigV) return false; + if (ThenV != II && (!CI || ThenV != CI)) + return false; + if (ConstantInt *CInt = dyn_cast(OrigV)) { - unsigned BitWidth = ThenV->getType()->getIntegerBitWidth(); + unsigned BitWidth = II->getType()->getIntegerBitWidth(); // Don't try to simplify this phi node if 'ThenV' is a cttz/ctlz // intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits @@ -4070,7 +4099,7 @@ ConstantInt::getFalse(II->getContext()) }; Module *M = EntryBB->getParent()->getParent(); Value *IF = Intrinsic::getDeclaration(M, II->getIntrinsicID(), Ty); - IRBuilder<> Builder(BrInst); + IRBuilder<> Builder(II); Instruction *NewI = Builder.CreateCall(IF, Args); // Replace the old call to cttz/ctlz. Index: llvm/trunk/test/CodeGen/X86/cttz-ctlz.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/cttz-ctlz.ll +++ llvm/trunk/test/CodeGen/X86/cttz-ctlz.ll @@ -241,6 +241,178 @@ ret i16 %cond } +; The following tests verify that calls to cttz/ctlz are speculated even if +; basic block %cond.true has an extra zero extend/truncate which is "free" +; for the target. + +define i64 @test1e(i32 %x) { +; ALL-LABEL: @test1e( +; LZCNT: icmp eq i32 %x, 0 +; LZCNT: call i32 @llvm.cttz.i32(i32 %x, i1 true) +; BMI: call i32 @llvm.cttz.i32(i32 %x, i1 false) +; GENERIC: icmp eq i32 %x, 0 +; GENERIC: call i32 @llvm.cttz.i32(i32 %x, i1 true) +entry: + %tobool = icmp eq i32 %x, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %phitmp2 = zext i32 %0 to i64 + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i64 [ %phitmp2, %cond.true ], [ 32, %entry ] + ret i64 %cond +} + +define i32 @test2e(i64 %x) { +; ALL-LABEL: @test2e( +; LZCNT: icmp eq i64 %x, 0 +; LZCNT: call i64 @llvm.cttz.i64(i64 %x, i1 true) +; BMI: call i64 @llvm.cttz.i64(i64 %x, i1 false) +; GENERIC: icmp eq i64 %x, 0 +; GENERIC: call i64 @llvm.cttz.i64(i64 %x, i1 true) +entry: + %tobool = icmp eq i64 %x, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) + %cast = trunc i64 %0 to i32 + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i32 [ %cast, %cond.true ], [ 64, %entry ] + ret i32 %cond +} + +define i64 @test3e(i32 %x) { +; ALL-LABEL: @test3e( +; BMI: icmp eq i32 %x, 0 +; BMI: call i32 @llvm.ctlz.i32(i32 %x, i1 true) +; LZCNT: call i32 @llvm.ctlz.i32(i32 %x, i1 false) +; GENERIC: icmp eq i32 %x, 0 +; GENERIC: call i32 @llvm.ctlz.i32(i32 %x, i1 true) +entry: + %tobool = icmp eq i32 %x, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + %phitmp2 = zext i32 %0 to i64 + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i64 [ %phitmp2, %cond.true ], [ 32, %entry ] + ret i64 %cond +} + +define i32 @test4e(i64 %x) { +; ALL-LABEL: @test4e( +; BMI: icmp eq i64 %x, 0 +; BMI: call i64 @llvm.ctlz.i64(i64 %x, i1 true) +; LZCNT: call i64 @llvm.ctlz.i64(i64 %x, i1 false) +; GENERIC: icmp eq i64 %x, 0 +; GENERIC: call i64 @llvm.ctlz.i64(i64 %x, i1 true) +entry: + %tobool = icmp eq i64 %x, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) + %cast = trunc i64 %0 to i32 + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i32 [ %cast, %cond.true ], [ 64, %entry ] + ret i32 %cond +} + +define i16 @test5e(i64 %x) { +; ALL-LABEL: @test5e( +; BMI: icmp eq i64 %x, 0 +; BMI: call i64 @llvm.ctlz.i64(i64 %x, i1 true) +; LZCNT: call i64 @llvm.ctlz.i64(i64 %x, i1 false) +; GENERIC: icmp eq i64 %x, 0 +; GENERIC: call i64 @llvm.ctlz.i64(i64 %x, i1 true) +entry: + %tobool = icmp eq i64 %x, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) + %cast = trunc i64 %0 to i16 + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i16 [ %cast, %cond.true ], [ 64, %entry ] + ret i16 %cond +} + +define i16 @test6e(i32 %x) { +; ALL-LABEL: @test6e( +; BMI: icmp eq i32 %x, 0 +; BMI: call i32 @llvm.ctlz.i32(i32 %x, i1 true) +; LZCNT: call i32 @llvm.ctlz.i32(i32 %x, i1 false) +; GENERIC: icmp eq i32 %x, 0 +; GENERIC: call i32 @llvm.ctlz.i32(i32 %x, i1 true) +entry: + %tobool = icmp eq i32 %x, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + %cast = trunc i32 %0 to i16 + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i16 [ %cast, %cond.true ], [ 32, %entry ] + ret i16 %cond +} + +define i16 @test7e(i64 %x) { +; ALL-LABEL: @test7e( +; LZCNT: icmp eq i64 %x, 0 +; LZCNT: call i64 @llvm.cttz.i64(i64 %x, i1 true) +; BMI: call i64 @llvm.cttz.i64(i64 %x, i1 false) +; GENERIC: icmp eq i64 %x, 0 +; GENERIC: call i64 @llvm.cttz.i64(i64 %x, i1 true) +entry: + %tobool = icmp eq i64 %x, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) + %cast = trunc i64 %0 to i16 + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i16 [ %cast, %cond.true ], [ 64, %entry ] + ret i16 %cond +} + +define i16 @test8e(i32 %x) { +; ALL-LABEL: @test8e( +; LZCNT: icmp eq i32 %x, 0 +; LZCNT: call i32 @llvm.cttz.i32(i32 %x, i1 true) +; BMI: call i32 @llvm.cttz.i32(i32 %x, i1 false) +; GENERIC: icmp eq i32 %x, 0 +; GENERIC: call i32 @llvm.cttz.i32(i32 %x, i1 true) +entry: + %tobool = icmp eq i32 %x, 0 + br i1 %tobool, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %cast = trunc i32 %0 to i16 + br label %cond.end + +cond.end: ; preds = %entry, %cond.true + %cond = phi i16 [ %cast, %cond.true ], [ 32, %entry ] + ret i16 %cond +} + declare i64 @llvm.ctlz.i64(i64, i1) declare i32 @llvm.ctlz.i32(i32, i1)