Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -404,7 +404,7 @@ // FIXME: Add version of these floating point intrinsics which allow non-default // rounding modes and FP exception handling. -let IntrProperties = [IntrNoMem] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable] in { def int_fma : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; @@ -440,10 +440,12 @@ } def int_minnum : Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, Commutative] + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, Commutative] >; def int_maxnum : Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, Commutative] + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, Commutative] >; // NOTE: these are internal interfaces. @@ -455,7 +457,7 @@ // Internal interface for object size checking def int_objectsize : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, llvm_i1_ty, llvm_i1_ty], - [IntrNoMem]>, + [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__builtin_object_size">; //===--------------- Constrained Floating Point Intrinsics ----------------===// @@ -500,7 +502,7 @@ // // None of these intrinsics accesses memory at all. -let IntrProperties = [IntrNoMem] in { +let IntrProperties = [IntrNoMem, IntrSpeculatable] in { def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; @@ -511,10 +513,11 @@ //===------------------------ Debugger Intrinsics -------------------------===// // -// None of these intrinsics accesses memory at all...but that doesn't mean the -// optimizers can change them aggressively. Special handling needed in a few -// places. -let IntrProperties = [IntrNoMem] in { +// None of these intrinsics accesses memory at all...but that doesn't +// mean the optimizers can change them aggressively. Special handling +// needed in a few places. These synthetic intrinsics have no +// side-effects and just mark information about their operands. +let IntrProperties = [IntrNoMem, IntrSpeculatable] in { def int_dbg_declare : Intrinsic<[], [llvm_metadata_ty, llvm_metadata_ty, @@ -592,24 +595,24 @@ // Expose the carry flag from add operations on two integrals. def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; + [IntrNoMem, IntrSpeculatable]>; //===------------------------- Memory Use Markers -------------------------===// // @@ -633,7 +636,7 @@ // it can be CSE only if memory didn't change between 2 barriers call, // which is valid. // The argument also can't be marked with 'returned' attribute, because -// it would remove barrier. +// it would remove barrier. def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -3321,67 +3321,10 @@ case Instruction::Call: { auto *CI = cast(Inst); const Function *Callee = CI->getCalledFunction(); - if (Callee && Callee->isSpeculatable()) - return true; - if (const IntrinsicInst *II = dyn_cast(Inst)) { - switch (II->getIntrinsicID()) { - // These synthetic intrinsics have no side-effects and just mark - // information about their operands. - // FIXME: There are other no-op synthetic instructions that potentially - // should be considered at least *safe* to speculate... - // FIXME: The speculatable attribute should be added to all these - // intrinsics and this case statement should be removed. - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - return true; - case Intrinsic::bitreverse: - case Intrinsic::bswap: - case Intrinsic::ctlz: - case Intrinsic::ctpop: - case Intrinsic::cttz: - case Intrinsic::objectsize: - case Intrinsic::sadd_with_overflow: - case Intrinsic::smul_with_overflow: - case Intrinsic::ssub_with_overflow: - case Intrinsic::uadd_with_overflow: - case Intrinsic::umul_with_overflow: - case Intrinsic::usub_with_overflow: - return true; - // These intrinsics are defined to have the same behavior as libm - // functions except for setting errno. - case Intrinsic::sqrt: - case Intrinsic::fma: - case Intrinsic::fmuladd: - return true; - // These intrinsics are defined to have the same behavior as libm - // functions, and the corresponding libm functions never set errno. - case Intrinsic::trunc: - case Intrinsic::copysign: - case Intrinsic::fabs: - case Intrinsic::minnum: - case Intrinsic::maxnum: - return true; - // These intrinsics are defined to have the same behavior as libm - // functions, which never overflow when operating on the IEEE754 types - // that we support, and never set errno otherwise. - case Intrinsic::ceil: - case Intrinsic::floor: - case Intrinsic::nearbyint: - case Intrinsic::rint: - case Intrinsic::round: - return true; - // These intrinsics do not correspond to any libm function, and - // do not set errno. - case Intrinsic::powi: - return true; - // TODO: are convert_{from,to}_fp16 safe? - // TODO: can we list target-specific intrinsics here? - default: break; - } - } - return false; // The called function could have undefined behavior or - // side-effects, even if marked readnone nounwind. + // The called function could have undefined behavior or side-effects, even + // if marked readnone nounwind. + return Callee && Callee->isSpeculatable(); } case Instruction::VAArg: case Instruction::Alloca: Index: lib/Transforms/Utils/BuildLibCalls.cpp =================================================================== --- lib/Transforms/Utils/BuildLibCalls.cpp +++ lib/Transforms/Utils/BuildLibCalls.cpp @@ -889,7 +889,13 @@ Value *Callee = M->getOrInsertFunction(Name, Op->getType(), Op->getType()); CallInst *CI = B.CreateCall(Callee, Op, Name); - CI->setAttributes(Attrs); + + // The incoming attribute set may have come from a speculatable intrinsic, but + // is being replaced with a library call which is not allowed to be + // speculatable. + CI->setAttributes(Attrs.removeAttribute(B.getContext(), + AttributeList::FunctionIndex, + Attribute::Speculatable)); if (const Function *F = dyn_cast(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); Index: test/Feature/intrinsics.ll =================================================================== --- test/Feature/intrinsics.ll +++ test/Feature/intrinsics.ll @@ -69,5 +69,5 @@ ret void } -; CHECK: attributes #0 = { nounwind readnone } +; CHECK: attributes #0 = { nounwind readnone speculatable } ; CHECK: attributes #1 = { noreturn nounwind } Index: test/Transforms/BBVectorize/simple-int.ll =================================================================== --- test/Transforms/BBVectorize/simple-int.ll +++ test/Transforms/BBVectorize/simple-int.ll @@ -503,4 +503,4 @@ ; CHECK: declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) #0 ; CHECK: declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0 ; CHECK: declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) #0 -; CHECK: attributes #0 = { nounwind readnone } +; CHECK: attributes #0 = { nounwind readnone speculatable } Index: test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll =================================================================== --- test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll +++ test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll @@ -39,7 +39,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone ; CHECK: attributes #0 = { nounwind ssp } -; CHECK: attributes #1 = { nounwind readnone } +; CHECK: attributes #1 = { nounwind readnone speculatable } ; CHECK: attributes #2 = { noinline nounwind ssp } ; CHECK: attributes [[NUW]] = { nounwind } Index: test/Transforms/InstCombine/amdgcn-intrinsics.ll =================================================================== --- test/Transforms/InstCombine/amdgcn-intrinsics.ll +++ test/Transforms/InstCombine/amdgcn-intrinsics.ll @@ -1259,7 +1259,7 @@ } ; CHECK-LABEL: @icmp_constant_inputs_true( -; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4 +; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #5 define i64 @icmp_constant_inputs_true() { %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34) ret i64 %result @@ -1524,7 +1524,7 @@ } ; CHECK-LABEL: @fcmp_constant_inputs_true( -; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4 +; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #5 define i64 @fcmp_constant_inputs_true() { %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4) ret i64 %result @@ -1537,4 +1537,4 @@ ret i64 %result } -; CHECK: attributes #4 = { convergent } +; CHECK: attributes #5 = { convergent } Index: test/Transforms/InstCombine/intrinsics.ll =================================================================== --- test/Transforms/InstCombine/intrinsics.ll +++ test/Transforms/InstCombine/intrinsics.ll @@ -284,7 +284,7 @@ define i1 @cttz_knownbits(i32 %arg) { ; CHECK-LABEL: @cttz_knownbits( ; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG:%.*]], 4 -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) #0 +; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.cttz.i32(i32 [[OR]], i1 true) ; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[CNT]], 4 ; CHECK-NEXT: ret i1 [[RES]] ; @@ -307,7 +307,7 @@ define i1 @ctlz_knownbits(i8 %arg) { ; CHECK-LABEL: @ctlz_knownbits( ; CHECK-NEXT: [[OR:%.*]] = or i8 [[ARG:%.*]], 32 -; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) #0 +; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctlz.i8(i8 [[OR]], i1 true) ; CHECK-NEXT: [[RES:%.*]] = icmp eq i8 [[CNT]], 4 ; CHECK-NEXT: ret i1 [[RES]] ; Index: test/Transforms/InstCombine/pow-sqrt.ll =================================================================== --- test/Transforms/InstCombine/pow-sqrt.ll +++ test/Transforms/InstCombine/pow-sqrt.ll @@ -6,7 +6,7 @@ } ; CHECK-LABEL: define double @pow_half( -; CHECK-NEXT: %sqrt = call fast double @sqrt(double %x) +; CHECK-NEXT: %sqrt = call fast double @sqrt(double %x) #1 ; CHECK-NEXT: ret double %sqrt define double @pow_neghalf(double %x) { @@ -15,8 +15,11 @@ } ; CHECK-LABEL: define double @pow_neghalf( -; CHECK-NEXT: %sqrt = call fast double @sqrt(double %x) #0 +; CHECK-NEXT: %sqrt = call fast double @sqrt(double %x) #1 ; CHECK-NEXT: %sqrtrecip = fdiv fast double 1.000000e+00, %sqrt ; CHECK-NEXT: ret double %sqrtrecip -declare double @llvm.pow.f64(double, double) +declare double @llvm.pow.f64(double, double) #0 + +attributes #0 = { nounwind readnone speculatable } +attributes #1 = { nounwind readnone } Index: test/Transforms/InstCombine/sub-xor.ll =================================================================== --- test/Transforms/InstCombine/sub-xor.ll +++ test/Transforms/InstCombine/sub-xor.ll @@ -27,7 +27,7 @@ define i32 @test2(i32 %x) nounwind { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[COUNT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) #0 +; CHECK-NEXT: [[COUNT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) ; CHECK-NEXT: [[SUB:%.*]] = xor i32 [[COUNT]], 31 ; CHECK-NEXT: ret i32 [[SUB]] ; Index: test/Transforms/ObjCARC/basic.ll =================================================================== --- test/Transforms/ObjCARC/basic.ll +++ test/Transforms/ObjCARC/basic.ll @@ -3049,6 +3049,6 @@ !4 = !DIFile(filename: "path/to/file", directory: "/path/to/dir") !5 = !{i32 2, !"Debug Info Version", i32 3} -; CHECK: attributes #0 = { nounwind readnone } +; CHECK: attributes #0 = { nounwind readnone speculatable } ; CHECK: attributes [[NUW]] = { nounwind } ; CHECK: ![[RELEASE]] = !{} Index: test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll =================================================================== --- test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll +++ test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll @@ -105,7 +105,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone ; CHECK: attributes #0 = { ssp uwtable } -; CHECK: attributes #1 = { nounwind readnone } +; CHECK: attributes #1 = { nounwind readnone speculatable } ; CHECK: attributes #2 = { nonlazybind } ; CHECK: attributes #3 = { noinline ssp uwtable } ; CHECK: attributes [[NUW]] = { nounwind } Index: test/Transforms/SLPVectorizer/X86/call.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/call.ll +++ test/Transforms/SLPVectorizer/X86/call.ll @@ -147,5 +147,5 @@ ; CHECK: declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) [[ATTR0]] ; CHECK: declare <2 x double> @llvm.exp2.v2f64(<2 x double>) [[ATTR0]] -; CHECK: attributes [[ATTR0]] = { nounwind readnone } +; CHECK: attributes [[ATTR0]] = { nounwind readnone speculatable }