Index: lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSelect.cpp +++ lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -437,6 +437,66 @@ return Builder->CreateOr(V, Y); } +/// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single +/// call to cttz/ctlz with flag 'is_zero_undef' cleared. +/// +/// For example, we can fold the following code sequence: +/// \code +/// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) +/// %1 = icmp ne i32 %x, 0 +/// %2 = select i1 %1, i32 %0, i32 32 +/// \code +/// +/// into: +/// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, + InstCombiner::BuilderTy *Builder) { + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); + + // Check if the condition value compares a value for equality against zero. + if (!ICI->isEquality() || !match(CmpRHS, m_Zero())) + return nullptr; + + Value *Count = FalseVal; + Value *ValueOnZero = TrueVal; + if (Pred == ICmpInst::ICMP_NE) + std::swap(Count, ValueOnZero); + + // Skip zero extend/truncate. + Value *V = nullptr; + if (match(Count, m_ZExt(m_Value(V))) || + match(Count, m_Trunc(m_Value(V)))) + Count = V; + + // Check if the value propagated on zero is a constant number equal to the + // sizeof in bits of 'Count'. + unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits(); + if (!match(ValueOnZero, m_SpecificInt(SizeOfInBits))) + return nullptr; + + // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the + // input to the cttz/ctlz is used as LHS for the compare instruction. + if (match(Count, m_Intrinsic(m_Specific(CmpLHS))) || + match(Count, m_Intrinsic(m_Specific(CmpLHS)))) { + IntrinsicInst *II = cast(Count); + IRBuilder<> Builder(II); + if (cast(II->getArgOperand(1))->isOne()) { + // Explicitly clear the 'undef_on_zero' flag. + IntrinsicInst *NewI = cast(II->clone()); + Type *Ty = NewI->getArgOperand(1)->getType(); + NewI->setArgOperand(1, Constant::getNullValue(Ty)); + Builder.Insert(NewI); + Count = NewI; + } + + return Builder.CreateZExtOrTrunc(Count, ValueOnZero->getType()); + } + + return nullptr; +} + /// visitSelectInstWithICmp - Visit a SelectInst that has an /// ICmpInst as its first operand. /// @@ -665,6 +725,9 @@ if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder)) return ReplaceInstUsesWith(SI, V); + if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder)) + return ReplaceInstUsesWith(SI, V); + return Changed ? &SI : nullptr; } Index: test/Transforms/InstCombine/intrinsics.ll =================================================================== --- test/Transforms/InstCombine/intrinsics.ll +++ test/Transforms/InstCombine/intrinsics.ll @@ -349,7 +349,8 @@ ret i32 %s ; CHECK-LABEL: @ctlz_select( -; CHECK: select i1 %tobool, i32 %ctlz, i32 32 +; CHECK-NEXT: call i32 @llvm.ctlz.i32(i32 %Value, i1 false) +; CHECK-NEXT: ret i32 } define i32 @cttz_select(i32 %Value) nounwind { @@ -359,5 +360,6 @@ ret i32 %s ; CHECK-LABEL: @cttz_select( -; CHECK: select i1 %tobool, i32 %cttz, i32 32 +; CHECK-NEXT: call i32 @llvm.cttz.i32(i32 %Value, i1 false) +; CHECK-NEXT: ret i32 } Index: test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll =================================================================== --- test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll +++ test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll @@ -0,0 +1,300 @@ +; RUN: opt -instcombine -S < %s | FileCheck %s + +; This test is to verify that the instruction combiner is able to fold +; a cttz/ctlz followed by a icmp + select into a single cttz/ctlz with +; the 'is_zero_undef' flag cleared. + +define i16 @test1(i16 %x) { +; CHECK-LABEL: @test1( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) +; CHECK-NEXT: ret i16 [[VAR]] +entry: + %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) + %tobool = icmp ne i16 %x, 0 + %cond = select i1 %tobool, i16 %0, i16 16 + ret i16 %cond +} + +define i32 @test2(i32 %x) { +; CHECK-LABEL: @test2( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) +; CHECK-NEXT: ret i32 [[VAR]] +entry: + %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + %tobool = icmp ne i32 %x, 0 + %cond = select i1 %tobool, i32 %0, i32 32 + ret i32 %cond +} + +define i64 @test3(i64 %x) { +; CHECK-LABEL: @test3( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) +; CHECK-NEXT: ret i64 [[VAR]] +entry: + %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) + %tobool = icmp ne i64 %x, 0 + %cond = select i1 %tobool, i64 %0, i64 64 + ret i64 %cond +} + +define i16 @test4(i16 %x) { +; CHECK-LABEL: @test4( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) +; CHECK-NEXT: ret i16 [[VAR]] +entry: + %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) + %tobool = icmp eq i16 %x, 0 + %cond = select i1 %tobool, i16 16, i16 %0 + ret i16 %cond +} + +define i32 @test5(i32 %x) { +; CHECK-LABEL: @test5( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) +; CHECK-NEXT: ret i32 [[VAR]] +entry: + %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + %tobool = icmp eq i32 %x, 0 + %cond = select i1 %tobool, i32 32, i32 %0 + ret i32 %cond +} + +define i64 @test6(i64 %x) { +; CHECK-LABEL: @test6( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) +; CHECK-NEXT: ret i64 [[VAR]] +entry: + %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) + %tobool = icmp eq i64 %x, 0 + %cond = select i1 %tobool, i64 64, i64 %0 + ret i64 %cond +} + +define i16 @test1b(i16 %x) { +; CHECK-LABEL: @test1b( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) +; CHECK-NEXT: ret i16 [[VAR]] +entry: + %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true) + %tobool = icmp ne i16 %x, 0 + %cond = select i1 %tobool, i16 %0, i16 16 + ret i16 %cond +} + +define i32 @test2b(i32 %x) { +; CHECK-LABEL: @test2b( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +; CHECK-NEXT: ret i32 [[VAR]] +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %tobool = icmp ne i32 %x, 0 + %cond = select i1 %tobool, i32 %0, i32 32 + ret i32 %cond +} + +define i64 @test3b(i64 %x) { +; CHECK-LABEL: @test3b( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) +; CHECK-NEXT: ret i64 [[VAR]] +entry: + %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) + %tobool = icmp ne i64 %x, 0 + %cond = select i1 %tobool, i64 %0, i64 64 + ret i64 %cond +} + +define i16 @test4b(i16 %x) { +; CHECK-LABEL: @test4b( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) +; CHECK-NEXT: ret i16 [[VAR]] +entry: + %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true) + %tobool = icmp eq i16 %x, 0 + %cond = select i1 %tobool, i16 16, i16 %0 + ret i16 %cond +} + +define i32 @test5b(i32 %x) { +; CHECK-LABEL: @test5b( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +; CHECK-NEXT: ret i32 [[VAR]] +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %tobool = icmp eq i32 %x, 0 + %cond = select i1 %tobool, i32 32, i32 %0 + ret i32 %cond +} + +define i64 @test6b(i64 %x) { +; CHECK-LABEL: @test6b( +; CHECK: [[VAR:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) +; CHECK-NEXT: ret i64 [[VAR]] +entry: + %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) + %tobool = icmp eq i64 %x, 0 + %cond = select i1 %tobool, i64 64, i64 %0 + ret i64 %cond +} + +define i32 @test1c(i16 %x) { +; CHECK-LABEL: @test1c( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i32 +; CHECK-NEXT: ret i32 [[VAR2]] +entry: + %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true) + %cast2 = zext i16 %0 to i32 + %tobool = icmp ne i16 %x, 0 + %cond = select i1 %tobool, i32 %cast2, i32 16 + ret i32 %cond +} + +define i64 @test2c(i16 %x) { +; CHECK-LABEL: @test2c( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i64 +; CHECK-NEXT: ret i64 [[VAR2]] +entry: + %0 = tail call i16 @llvm.cttz.i16(i16 %x, i1 true) + %conv = zext i16 %0 to i64 + %tobool = icmp ne i16 %x, 0 + %cond = select i1 %tobool, i64 %conv, i64 16 + ret i64 %cond +} + +define i64 @test3c(i32 %x) { +; CHECK-LABEL: @test3c( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64 +; CHECK-NEXT: ret i64 [[VAR2]] +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %conv = zext i32 %0 to i64 + %tobool = icmp ne i32 %x, 0 + %cond = select i1 %tobool, i64 %conv, i64 32 + ret i64 %cond +} + +define i32 @test4c(i16 %x) { +; CHECK-LABEL: @test4c( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i32 +; CHECK-NEXT: ret i32 [[VAR2]] +entry: + %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) + %cast = zext i16 %0 to i32 + %tobool = icmp ne i16 %x, 0 + %cond = select i1 %tobool, i32 %cast, i32 16 + ret i32 %cond +} + +define i64 @test5c(i16 %x) { +; CHECK-LABEL: @test5c( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i16 [[VAR1]] to i64 +; CHECK-NEXT: ret i64 [[VAR2]] +entry: + %0 = tail call i16 @llvm.ctlz.i16(i16 %x, i1 true) + %cast = zext i16 %0 to i64 + %tobool = icmp ne i16 %x, 0 + %cond = select i1 %tobool, i64 %cast, i64 16 + ret i64 %cond +} + +define i64 @test6c(i32 %x) { +; CHECK-LABEL: @test6c( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = zext i32 [[VAR1]] to i64 +; CHECK-NEXT: ret i64 [[VAR2]] +entry: + %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + %cast = zext i32 %0 to i64 + %tobool = icmp ne i32 %x, 0 + %cond = select i1 %tobool, i64 %cast, i64 32 + ret i64 %cond +} + +define i16 @test1d(i64 %x) { +; CHECK-LABEL: @test1d( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i16 +; CHECK-NEXT: ret i16 [[VAR2]] +entry: + %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) + %conv = trunc i64 %0 to i16 + %tobool = icmp ne i64 %x, 0 + %cond = select i1 %tobool, i16 %conv, i16 64 + ret i16 %cond +} + +define i32 @test2d(i64 %x) { +; CHECK-LABEL: @test2d( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i32 +; CHECK-NEXT: ret i32 [[VAR2]] +entry: + %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) + %cast = trunc i64 %0 to i32 + %tobool = icmp ne i64 %x, 0 + %cond = select i1 %tobool, i32 %cast, i32 64 + ret i32 %cond +} + +define i16 @test3d(i32 %x) { +; CHECK-LABEL: @test3d( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16 +; CHECK-NEXT: ret i16 [[VAR2]] +entry: + %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) + %cast = trunc i32 %0 to i16 + %tobool = icmp ne i32 %x, 0 + %cond = select i1 %tobool, i16 %cast, i16 32 + ret i16 %cond +} + +define i16 @test4d(i64 %x) { +; CHECK-LABEL: @test4d( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i16 +; CHECK-NEXT: ret i16 [[VAR2]] +entry: + %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) + %cast = trunc i64 %0 to i16 + %tobool = icmp ne i64 %x, 0 + %cond = select i1 %tobool, i16 %cast, i16 64 + ret i16 %cond +} + +define i32 @test5d(i64 %x) { +; CHECK-LABEL: @test5d( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i64 [[VAR1]] to i32 +; CHECK-NEXT: ret i32 [[VAR2]] +entry: + %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) + %cast = trunc i64 %0 to i32 + %tobool = icmp ne i64 %x, 0 + %cond = select i1 %tobool, i32 %cast, i32 64 + ret i32 %cond +} + +define i16 @test6d(i32 %x) { +; CHECK-LABEL: @test6d( +; CHECK: [[VAR1:%[a-zA-Z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) +; CHECK-NEXT: [[VAR2:%[a-zA-Z0-9]+]] = trunc i32 [[VAR1]] to i16 +; CHECK-NEXT: ret i16 [[VAR2]] +entry: + %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) + %cast = trunc i32 %0 to i16 + %tobool = icmp ne i32 %x, 0 + %cond = select i1 %tobool, i16 %cast, i16 32 + ret i16 %cond +} + +declare i16 @llvm.ctlz.i16(i16, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare i64 @llvm.ctlz.i64(i64, i1) +declare i16 @llvm.cttz.i16(i16, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare i64 @llvm.cttz.i64(i64, i1)