diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1153,12 +1153,14 @@ // FIXME: If necessary, this should go in target-specific overrides. if (VF == 1 && RetVF == 1 && getTLI()->isCheapToSpeculateCttz()) return TargetTransformInfo::TCC_Basic; + return TargetTransformInfo::TCC_Expensive; break; case Intrinsic::ctlz: // FIXME: If necessary, this should go in target-specific overrides. if (VF == 1 && RetVF == 1 && getTLI()->isCheapToSpeculateCtlz()) return TargetTransformInfo::TCC_Basic; + return TargetTransformInfo::TCC_Expensive; break; case Intrinsic::memcpy: diff --git a/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll b/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll --- a/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll @@ -407,22 +407,64 @@ } define i16 @test9_loop(i32 %x, i16* %ptr) { -; ALL-LABEL: @test9_loop( -; ALL-NEXT: entry: -; ALL-NEXT: br label [[LOOP_HEADER:%.*]] -; ALL: loop.header: -; ALL-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_HEADER]] ] -; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; ALL-NEXT: [[XOR:%.*]] = xor i32 [[X]], -1 -; ALL-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[XOR]], i1 true) -; ALL-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 -; ALL-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]] -; ALL-NEXT: store i16 [[COND]], i16* [[PTR:%.*]], align 2 -; ALL-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; ALL-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 100 -; ALL-NEXT: br i1 [[EC]], label [[LOOP_EXIT:%.*]], label [[LOOP_HEADER]] -; ALL: loop.exit: -; ALL-NEXT: ret i16 [[COND]] +; BMI-LABEL: @test9_loop( +; BMI-NEXT: entry: +; BMI-NEXT: br label [[LOOP_HEADER:%.*]] +; BMI: loop.header: +; BMI-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_HEADER]] ] +; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 +; BMI-NEXT: [[XOR:%.*]] = xor i32 [[X]], -1 +; BMI-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[XOR]], i1 true) +; BMI-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 +; BMI-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]] +; BMI-NEXT: store i16 [[COND]], i16* [[PTR:%.*]], align 2 +; BMI-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; BMI-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 100 +; BMI-NEXT: br i1 [[EC]], label [[LOOP_EXIT:%.*]], label [[LOOP_HEADER]] +; BMI: loop.exit: +; BMI-NEXT: ret i16 [[COND]] +; +; LZCNT-LABEL: @test9_loop( +; LZCNT-NEXT: entry: +; LZCNT-NEXT: br label [[LOOP_HEADER:%.*]] +; LZCNT: loop.header: +; LZCNT-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[COND_END:%.*]] ] +; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 +; LZCNT-NEXT: br i1 [[TOBOOL]], label [[COND_END]], label [[COND_TRUE:%.*]] +; LZCNT: cond.true: +; LZCNT-NEXT: [[XOR:%.*]] = xor i32 [[X]], -1 +; LZCNT-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[XOR]], i1 true) +; LZCNT-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 +; LZCNT-NEXT: br label [[COND_END]] +; LZCNT: cond.end: +; LZCNT-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[LOOP_HEADER]] ] +; LZCNT-NEXT: store i16 [[COND]], i16* [[PTR:%.*]], align 2 +; LZCNT-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; LZCNT-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 100 +; LZCNT-NEXT: br i1 [[EC]], label [[LOOP_EXIT:%.*]], label [[LOOP_HEADER]] +; LZCNT: loop.exit: +; LZCNT-NEXT: ret i16 [[COND]] +; +; GENERIC-LABEL: @test9_loop( +; GENERIC-NEXT: entry: +; GENERIC-NEXT: br label [[LOOP_HEADER:%.*]] +; GENERIC: loop.header: +; GENERIC-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[COND_END:%.*]] ] +; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 +; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END]], label [[COND_TRUE:%.*]] +; GENERIC: cond.true: +; GENERIC-NEXT: [[XOR:%.*]] = xor i32 [[X]], -1 +; GENERIC-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[XOR]], i1 true) +; GENERIC-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 +; GENERIC-NEXT: br label [[COND_END]] +; GENERIC: cond.end: +; GENERIC-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[LOOP_HEADER]] ] +; GENERIC-NEXT: store i16 [[COND]], i16* [[PTR:%.*]], align 2 +; GENERIC-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; GENERIC-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 100 +; GENERIC-NEXT: br i1 [[EC]], label [[LOOP_EXIT:%.*]], label [[LOOP_HEADER]] +; GENERIC: loop.exit: +; GENERIC-NEXT: ret i16 [[COND]] ; entry: br label %loop.header