Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -194,6 +194,11 @@ int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) const; + /// \brief Return if the target has a way to compute cttz/ctlz that is + /// defnied when the argument is zero. + bool hasZeroDefinedCtlz() const; + bool hasZeroDefinedCttz() const; + /// \brief Estimate the cost of a given IR user when lowered. /// /// This can estimate the cost of either a ConstantExpr or Instruction when @@ -717,6 +722,8 @@ ArrayRef ParamTys) = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) = 0; + virtual bool hasZeroDefinedCtlz() const = 0; + virtual bool hasZeroDefinedCttz() const = 0; virtual int getUserCost(const User *U) = 0; virtual bool hasBranchDivergence() = 0; virtual bool isSourceOfDivergence(const Value *V) = 0; @@ -874,6 +881,12 @@ ArrayRef Arguments) override { return Impl.getIntrinsicCost(IID, RetTy, Arguments); } + bool hasZeroDefinedCtlz() const override { + return Impl.hasZeroDefinedCtlz(); + } + bool hasZeroDefinedCttz() const override { + return Impl.hasZeroDefinedCttz(); + } int getUserCost(const User *U) override { return Impl.getUserCost(U); } bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } bool isSourceOfDivergence(const Value *V) override { Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -167,6 +167,10 @@ } } + bool hasZeroDefinedCtlz() const { return false; } + + bool hasZeroDefinedCttz() const { return false; } + bool hasBranchDivergence() { return false; } bool isSourceOfDivergence(const Value *V) { return false; } Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -83,6 +83,14 @@ return Cost; } +bool TargetTransformInfo::hasZeroDefinedCtlz() const { + return TTIImpl->hasZeroDefinedCtlz(); +} + +bool TargetTransformInfo::hasZeroDefinedCttz() const { + return TTIImpl->hasZeroDefinedCttz(); +} + int TargetTransformInfo::getUserCost(const User *U) const { int Cost = TTIImpl->getUserCost(U); assert(Cost >= 0 && "TTI should not produce negative costs!"); Index: lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- lib/Target/X86/X86TargetTransformInfo.h +++ lib/Target/X86/X86TargetTransformInfo.h @@ -46,6 +46,8 @@ /// \name Scalar TTI Implementations /// @{ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); + bool hasZeroDefinedCtlz() const; + bool hasZeroDefinedCttz() const; /// @} Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -66,6 +66,14 @@ return ST->hasPOPCNT() ? TTI::PSK_FastHardware : TTI::PSK_Software; } +bool X86TTIImpl::hasZeroDefinedCtlz() const { + return ST->hasLZCNT(); +} + +bool X86TTIImpl::hasZeroDefinedCttz() const { + return ST->hasBMI(); +} + unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) { if (Vector && !ST->hasSSE1()) return 0; Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -15,10 +15,13 @@ #ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H #define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H +#include "llvm/Transforms/InstCombine/InstCombineWorklist.h" + #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetFolder.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" @@ -27,7 +30,6 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" -#include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #define DEBUG_TYPE "instcombine" @@ -186,6 +188,7 @@ // Optional analyses. When non-null, these can both be used to do better // combining and will be updated to reflect any changes. LoopInfo *LI; + TargetTransformInfo *TTI; bool MadeIRChange; @@ -193,10 +196,11 @@ InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder, bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, - DominatorTree &DT, const DataLayout &DL, LoopInfo *LI) + DominatorTree &DT, const DataLayout &DL, + TargetTransformInfo *TTI, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), - DL(DL), LI(LI), MadeIRChange(false) {} + DL(DL), LI(LI), TTI(TTI), MadeIRChange(false) {} /// \brief Run the combiner over the entire worklist until it is empty. /// @@ -213,6 +217,8 @@ TargetLibraryInfo &getTargetLibraryInfo() const { return TLI; } + TargetTransformInfo *getTargetTransformInfo() const { return TTI; } + // Visitation implementation - Implement instruction combining for different // instruction types. The semantics are as follows: // Return Value: Index: lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSelect.cpp +++ lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -363,8 +363,13 @@ /// /// into: /// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) -static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, - InstCombiner::BuilderTy *Builder) { +static Value *foldSelectCttzCtlz(SelectInst &SI, ICmpInst *ICI, + Value *TrueVal, Value *FalseVal, + InstCombiner::BuilderTy *Builder, + TargetTransformInfo *TTI) { + if (!TTI) + return nullptr; + ICmpInst::Predicate Pred = ICI->getPredicate(); Value *CmpLHS = ICI->getOperand(0); Value *CmpRHS = ICI->getOperand(1); @@ -390,18 +395,35 @@ if (!match(ValueOnZero, m_SpecificInt(SizeOfInBits))) return nullptr; + if (!match(Count, m_Intrinsic(m_Specific(CmpLHS))) && + !match(Count, m_Intrinsic(m_Specific(CmpLHS)))) + return nullptr; + + IntrinsicInst *II = cast(Count); + // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the // input to the cttz/ctlz is used as LHS for the compare instruction. - if (match(Count, m_Intrinsic(m_Specific(CmpLHS))) || - match(Count, m_Intrinsic(m_Specific(CmpLHS)))) { - IntrinsicInst *II = cast(Count); - IRBuilder<> Builder(II); + if ((II->getIntrinsicID() == Intrinsic::ctlz && TTI->hasZeroDefinedCtlz()) || + (II->getIntrinsicID() == Intrinsic::cttz && TTI->hasZeroDefinedCttz())) { // Explicitly clear the 'undef_on_zero' flag. IntrinsicInst *NewI = cast(II->clone()); - Type *Ty = NewI->getArgOperand(1)->getType(); - NewI->setArgOperand(1, Constant::getNullValue(Ty)); - Builder.Insert(NewI); - return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType()); + NewI->setArgOperand(1, Builder->getFalse()); + Builder->Insert(NewI); + return Builder->CreateZExtOrTrunc(NewI, ValueOnZero->getType()); + } + + // If the select filter 0 and teh traget doesn't have a zero defined + // cttz/ctlz, we can still convert to use the zero undefined version. + if (match(II->getArgOperand(1), m_Zero())) { + // Explicitly clear the 'undef_on_zero' flag. + IntrinsicInst *NewI = cast(II->clone()); + NewI->setArgOperand(1, Builder->getTrue()); + Builder->Insert(NewI); + auto *OldV = SI.getOperand((Pred == ICmpInst::ICMP_NE) ? 1 : 2); + auto *NewV = Builder->CreateZExtOrTrunc(NewI, ValueOnZero->getType(), + OldV->getName()); + SI.setOperand((Pred == ICmpInst::ICMP_NE) ? 1 : 2, NewV); + return &SI; } return nullptr; @@ -650,8 +672,9 @@ if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder)) return replaceInstUsesWith(SI, V); - if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder)) - return replaceInstUsesWith(SI, V); + if (Value *V = foldSelectCttzCtlz(SI, ICI, TrueVal, FalseVal, Builder, + getTargetTransformInfo())) + return (&SI == V) ? &SI : replaceInstUsesWith(SI, V); return Changed ? &SI : nullptr; } Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -50,6 +50,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" @@ -3113,7 +3114,7 @@ static bool combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, - TargetLibraryInfo &TLI, DominatorTree &DT, + TargetLibraryInfo &TLI, TargetTransformInfo *TTI, DominatorTree &DT, bool ExpensiveCombines = true, LoopInfo *LI = nullptr) { auto &DL = F.getParent()->getDataLayout(); @@ -3145,7 +3146,7 @@ bool Changed = prepareICWorklistFromFunction(F, DL, &TLI, Worklist); InstCombiner IC(Worklist, &Builder, F.optForMinSize(), ExpensiveCombines, - AA, AC, TLI, DT, DL, LI); + AA, AC, TLI, DT, DL, TTI, LI); Changed |= IC.run(); if (!Changed) @@ -3162,10 +3163,11 @@ auto &TLI = AM.getResult(F); auto *LI = AM.getCachedResult(F); + auto *TTI = AM.getCachedResult(F); // FIXME: The AliasAnalysis is not yet supported in the new pass manager - if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, DT, - ExpensiveCombines, LI)) + if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, TTI, + DT, ExpensiveCombines, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3202,9 +3204,11 @@ // Optional analyses. auto *LIWP = getAnalysisIfAvailable(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + auto *TTIWP = getAnalysisIfAvailable(); + auto *TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr; - return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, - ExpensiveCombines, LI); + return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, + DT, ExpensiveCombines, LI); } char InstructionCombiningPass::ID = 0; Index: test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll =================================================================== --- test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll +++ test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll @@ -1,13 +1,20 @@ -; RUN: opt -instcombine -S < %s | FileCheck %s +; RUN: opt -S -instcombine < %s | FileCheck %s --check-prefix=UNDEF +; RUN: opt -S -instcombine -mattr=+bmi < %s | FileCheck %s --check-prefix=TZDEF +; RUN: opt -S -instcombine -mattr=+lzcnt < %s | FileCheck %s --check-prefix=LZDEF + +target triple = "x86_64-unknown-unknown" ; This test is to verify that the instruction combiner is able to fold ; a cttz/ctlz followed by a icmp + select into a single cttz/ctlz with ; the 'is_zero_undef' flag cleared. define i16 @test1(i16 %x) { -; CHECK-LABEL: @test1( -; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) -; CHECK-NEXT: ret i16 [[VAR]] +; UNDEF-LABEL: @test1( +; UNDEF: tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) + +; LZDEF-LABEL: @test1( +; LZDEF: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) +; LZDEF-NEXT: ret i16 [[VAR]] entry: %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) %tobool = icmp ne i16 %x, 0 @@ -16,9 +23,12 @@ } define i32 @test2(i32 %x) { -; CHECK-LABEL: @test2( -; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) -; CHECK-NEXT: ret i32 [[VAR]] +; UNDEF-LABEL: @test2( +; UNDEF: tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + +; LZDEF-LABEL: @test2( +; LZDEF: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) +; LZDEF-NEXT: ret i32 [[VAR]] entry: %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) %tobool = icmp ne i32 %x, 0 @@ -27,9 +37,12 @@ } define i64 @test3(i64 %x) { -; CHECK-LABEL: @test3( -; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) -; CHECK-NEXT: ret i64 [[VAR]] +; UNDEF-LABEL: @test3( +; UNDEF: tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) + +; LZDEF-LABEL: @test3( +; LZDEF: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) +; LZDEF-NEXT: ret i64 [[VAR]] entry: %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) %tobool = icmp ne i64 %x, 0 @@ -38,9 +51,12 @@ } define i16 @test4(i16 %x) { -; CHECK-LABEL: @test4( -; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) -; CHECK-NEXT: ret i16 [[VAR]] +; UNDEF-LABEL: @test4( +; UNDEF: tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) + +; LZDEF-LABEL: @test4( +; LZDEF: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) +; LZDEF-NEXT: ret i16 [[VAR]] entry: %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) %tobool = icmp eq i16 %x, 0 @@ -49,9 +65,12 @@ } define i32 @test5(i32 %x) { -; CHECK-LABEL: @test5( -; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) -; CHECK-NEXT: ret i32 [[VAR]] +; UNDEF-LABEL: @test5( +; UNDEF: tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + +; LZDEF-LABEL: @test5( +; LZDEF: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) +; LZDEF-NEXT: ret i32 [[VAR]] entry: %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) %tobool = icmp eq i32 %x, 0 @@ -60,9 +79,12 @@ } define i64 @test6(i64 %x) { -; CHECK-LABEL: @test6( -; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) -; CHECK-NEXT: ret i64 [[VAR]] +; UNDEF-LABEL: @test6( +; UNDEF: tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) + +; LZDEF-LABEL: @test6( +; LZDEF: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) +; LZDEF-NEXT: ret i64 [[VAR]] entry: %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) %tobool = icmp eq i64 %x, 0 @@ -71,9 +93,12 @@ } define i16 @test1b(i16 %x) { -; CHECK-LABEL: @test1b( -; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) -; CHECK-NEXT: ret i16 [[VAR]] +; UNDEF-LABEL: @test1b( +; UNDEF: tail call i16 @llvm.cttz.i16(i16 %x, i1 true) + +; TZDEF-LABEL: @test1b( +; TZDEF: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) +; TZDEF-NEXT: ret i16 [[VAR]] entry: %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true) %tobool = icmp ne i16 %x, 0 @@ -82,9 +107,12 @@ } define i32 @test2b(i32 %x) { -; CHECK-LABEL: @test2b( -; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) -; CHECK-NEXT: ret i32 [[VAR]] +; UNDEF-LABEL: @test2b( +; UNDEF: tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + +; TZDEF-LABEL: @test2b( +; TZDEF: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +; TZDEF-NEXT: ret i32 [[VAR]] entry: %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) %tobool = icmp ne i32 %x, 0 @@ -93,9 +121,12 @@ } define i64 @test3b(i64 %x) { -; CHECK-LABEL: @test3b( -; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) -; CHECK-NEXT: ret i64 [[VAR]] +; UNDEF-LABEL: @test3b( +; UNDEF: tail call i64 @llvm.cttz.i64(i64 %x, i1 true) + +; TZDEF-LABEL: @test3b( +; TZDEF: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) +; TZDEF-NEXT: ret i64 [[VAR]] entry: %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) %tobool = icmp ne i64 %x, 0 @@ -104,9 +135,12 @@ } define i16 @test4b(i16 %x) { -; CHECK-LABEL: @test4b( -; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) -; CHECK-NEXT: ret i16 [[VAR]] +; UNDEF-LABEL: @test4b( +; UNDEF: tail call i16 @llvm.cttz.i16(i16 %x, i1 true) + +; TZDEF-LABEL: @test4b( +; TZDEF: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) +; TZDEF-NEXT: ret i16 [[VAR]] entry: %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true) %tobool = icmp eq i16 %x, 0 @@ -115,9 +149,12 @@ } define i32 @test5b(i32 %x) { -; CHECK-LABEL: @test5b( -; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) -; CHECK-NEXT: ret i32 [[VAR]] +; UNDEF-LABEL: @test5b( +; UNDEF: tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + +; TZDEF-LABEL: @test5b( +; TZDEF: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +; TZDEF-NEXT: ret i32 [[VAR]] entry: %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) %tobool = icmp eq i32 %x, 0 @@ -126,9 +163,12 @@ } define i64 @test6b(i64 %x) { -; CHECK-LABEL: @test6b( -; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) -; CHECK-NEXT: ret i64 [[VAR]] +; UNDEF-LABEL: @test6b( +; UNDEF: tail call i64 @llvm.cttz.i64(i64 %x, i1 true) + +; TZDEF-LABEL: @test6b( +; TZDEF: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) +; TZDEF-NEXT: ret i64 [[VAR]] entry: %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) %tobool = icmp eq i64 %x, 0 @@ -137,10 +177,13 @@ } define i32 @test1c(i16 %x) { -; CHECK-LABEL: @test1c( -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) -; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i32 -; CHECK-NEXT: ret i32 [[VAR2]] +; UNDEF-LABEL: @test1c( +; UNDEF: tail call i16 @llvm.cttz.i16(i16 %x, i1 true) + +; TZDEF-LABEL: @test1c( +; TZDEF: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) +; TZDEF-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i32 +; TZDEF-NEXT: ret i32 [[VAR2]] entry: %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true) %cast2 = zext i16 %0 to i32 @@ -150,10 +193,13 @@ } define i64 @test2c(i16 %x) { -; CHECK-LABEL: @test2c( -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) -; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i64 -; CHECK-NEXT: ret i64 [[VAR2]] +; UNDEF-LABEL: @test2c( +; UNDEF: tail call i16 @llvm.cttz.i16(i16 %x, i1 true) + +; TZDEF-LABEL: @test2c( +; TZDEF: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) +; TZDEF-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i64 +; TZDEF-NEXT: ret i64 [[VAR2]] entry: %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true) %conv = zext i16 %0 to i64 @@ -163,10 +209,13 @@ } define i64 @test3c(i32 %x) { -; CHECK-LABEL: @test3c( -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) -; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64 -; CHECK-NEXT: ret i64 [[VAR2]] +; UNDEF-LABEL: @test3c( +; UNDEF: tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + +; TZDEF-LABEL: @test3c( +; TZDEF: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +; TZDEF-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64 +; TZDEF-NEXT: ret i64 [[VAR2]] entry: %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) %conv = zext i32 %0 to i64 @@ -176,10 +225,13 @@ } define i32 @test4c(i16 %x) { -; CHECK-LABEL: @test4c( -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) -; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i32 -; CHECK-NEXT: ret i32 [[VAR2]] +; UNDEF-LABEL: @test4c( +; UNDEF: tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) + +; LZDEF-LABEL: @test4c( +; LZDEF: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) +; LZDEF-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i32 +; LZDEF-NEXT: ret i32 [[VAR2]] entry: %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) %cast = zext i16 %0 to i32 @@ -189,10 +241,13 @@ } define i64 @test5c(i16 %x) { -; CHECK-LABEL: @test5c( -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) -; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i64 -; CHECK-NEXT: ret i64 [[VAR2]] +; UNDEF-LABEL: @test5c( +; UNDEF: tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) + +; LZDEF-LABEL: @test5c( +; LZDEF: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) +; LZDEF-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i64 +; LZDEF-NEXT: ret i64 [[VAR2]] entry: %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) %cast = zext i16 %0 to i64 @@ -202,10 +257,13 @@ } define i64 @test6c(i32 %x) { -; CHECK-LABEL: @test6c( -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) -; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64 -; CHECK-NEXT: ret i64 [[VAR2]] +; UNDEF-LABEL: @test6c( +; UNDEF: tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + +; LZDEF-LABEL: @test6c( +; LZDEF: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) +; LZDEF-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64 +; LZDEF-NEXT: ret i64 [[VAR2]] entry: %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) %cast = zext i32 %0 to i64 @@ -215,10 +273,13 @@ } define i16 @test1d(i64 %x) { -; CHECK-LABEL: @test1d( -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) -; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i16 -; CHECK-NEXT: ret i16 [[VAR2]] +; UNDEF-LABEL: @test1d( +; UNDEF: tail call i64 @llvm.cttz.i64(i64 %x, i1 true) + +; TZDEF-LABEL: @test1d( +; TZDEF: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) +; TZDEF-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i16 +; TZDEF-NEXT: ret i16 [[VAR2]] entry: %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) %conv = trunc i64 %0 to i16 @@ -228,10 +289,13 @@ } define i32 @test2d(i64 %x) { -; CHECK-LABEL: @test2d( -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) -; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i32 -; CHECK-NEXT: ret i32 [[VAR2]] +; UNDEF-LABEL: @test2d( +; UNDEF: tail call i64 @llvm.cttz.i64(i64 %x, i1 true) + +; TZDEF-LABEL: @test2d( +; TZDEF: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) +; TZDEF-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i32 +; TZDEF-NEXT: ret i32 [[VAR2]] entry: %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) %cast = trunc i64 %0 to i32 @@ -241,10 +305,13 @@ } define i16 @test3d(i32 %x) { -; CHECK-LABEL: @test3d( -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) -; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16 -; CHECK-NEXT: ret i16 [[VAR2]] +; UNDEF-LABEL: @test3d( +; UNDEF: tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + +; TZDEF-LABEL: @test3d( +; TZDEF: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +; TZDEF-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16 +; TZDEF-NEXT: ret i16 [[VAR2]] entry: %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) %cast = trunc i32 %0 to i16 @@ -254,10 +321,13 @@ } define i16 @test4d(i64 %x) { -; CHECK-LABEL: @test4d( -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) -; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i16 -; CHECK-NEXT: ret i16 [[VAR2]] +; UNDEF-LABEL: @test4d( +; UNDEF: tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) + +; LZDEF-LABEL: @test4d( +; LZDEF: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) +; LZDEF-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i16 +; LZDEF-NEXT: ret i16 [[VAR2]] entry: %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) %cast = trunc i64 %0 to i16 @@ -267,10 +337,13 @@ } define i32 @test5d(i64 %x) { -; CHECK-LABEL: @test5d( -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) -; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i32 -; CHECK-NEXT: ret i32 [[VAR2]] +; UNDEF-LABEL: @test5d( +; UNDEF: tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) + +; LZDEF-LABEL: @test5d( +; LZDEF: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) +; LZDEF-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i32 +; LZDEF-NEXT: ret i32 [[VAR2]] entry: %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) %cast = trunc i64 %0 to i32 @@ -280,10 +353,13 @@ } define i16 @test6d(i32 %x) { -; CHECK-LABEL: @test6d( -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) -; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16 -; CHECK-NEXT: ret i16 [[VAR2]] +; UNDEF-LABEL: @test6d( +; UNDEF: tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + +; LZDEF-LABEL: @test6d( +; LZDEF: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) +; LZDEF-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16 +; LZDEF-NEXT: ret i16 [[VAR2]] entry: %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) %cast = trunc i32 %0 to i16 @@ -293,10 +369,13 @@ } define i64 @select_bug1(i32 %x) { -; CHECK-LABEL: @select_bug1( -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) -; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64 -; CHECK-NEXT: ret i64 [[VAR2]] +; UNDEF-LABEL: @select_bug1( +; UNDEF: tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + +; TZDEF-LABEL: @select_bug1( +; TZDEF: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +; TZDEF-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64 +; TZDEF-NEXT: ret i64 [[VAR2]] entry: %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) %conv = zext i32 %0 to i64 @@ -306,10 +385,13 @@ } define i16 @select_bug2(i32 %x) { -; CHECK-LABEL: @select_bug2( -; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) -; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16 -; CHECK-NEXT: ret i16 [[VAR2]] +; UNDEF-LABEL: @select_bug2( +; UNDEF: tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + +; TZDEF-LABEL: @select_bug2( +; TZDEF: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +; TZDEF-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16 +; TZDEF-NEXT: ret i16 [[VAR2]] entry: %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) %conv = trunc i32 %0 to i16 @@ -318,7 +400,6 @@ ret i16 %cond } - declare i16 @llvm.ctlz.i16(i16, i1) declare i32 @llvm.ctlz.i32(i32, i1) declare i64 @llvm.ctlz.i64(i64, i1)