Index: llvm/include/llvm/IR/CallSite.h =================================================================== --- llvm/include/llvm/IR/CallSite.h +++ llvm/include/llvm/IR/CallSite.h @@ -146,6 +146,13 @@ return static_cast(0); } + /// Return if this call is to an intrinsic. + bool isIntrinsic() const { + if (auto *F = getCalledFunction()) + return F->isIntrinsic(); + return false; + } + /// Determine whether the passed iterator points to the callee operand's Use. bool isCallee(Value::const_user_iterator UI) const { return isCallee(&UI.getUse()); Index: llvm/lib/Transforms/Scalar/ConstantHoisting.cpp =================================================================== --- llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -491,7 +491,7 @@ // take constant variables is lower than `TargetTransformInfo::TCC_Basic`. // So it's safe for us to collect constant candidates from all // IntrinsicInsts. - if (canReplaceOperandWithVariable(Inst, Idx) || isa(Inst)) { + if (canReplaceOperandWithVariable(Inst, Idx)) { collectConstantCandidates(ConstCandMap, Inst, Idx); } } // end of for all operands Index: llvm/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/lib/Transforms/Utils/Local.cpp +++ llvm/lib/Transforms/Utils/Local.cpp @@ -2935,21 +2935,39 @@ default: return true; case Instruction::Call: - case Instruction::Invoke: + case Instruction::Invoke: { + ImmutableCallSite CS(I); + // Can't handle inline asm. Skip it. - if (isa(ImmutableCallSite(I).getCalledValue())) - return false; - // Many arithmetic intrinsics have no issue taking a - // variable, however it's hard to distingish these from - // specials such as @llvm.frameaddress that require a constant. - if (isa(I)) + if (CS.isInlineAsm()) return false; // Constant bundle operands may need to retain their constant-ness for // correctness. - if (ImmutableCallSite(I).isBundleOperand(OpIdx)) + if (CS.isBundleOperand(OpIdx)) return false; - return true; + + if (OpIdx < CS.getNumArgOperands()) { + // Some variadic intrinsics require constants in the variadic arguments, + // which currently aren't markable as immarg. + if (CS.isIntrinsic() && OpIdx >= CS.getFunctionType()->getNumParams()) { + // This is known to be OK for stackmap. + return CS.getIntrinsicID() == Intrinsic::experimental_stackmap; + } + + // gcroot is a special case, since it requires a constant argument which + // isn't also required to be a simple ConstantInt. + if (CS.getIntrinsicID() == Intrinsic::gcroot) + return false; + + // Some intrinsic operands are required to be immediates. + return !CS.paramHasAttr(OpIdx, Attribute::ImmArg); + } + + // It is never allowed to replace the call argument to an intrinsic, but it + // may be possible for a call. + return !CS.isIntrinsic(); + } case Instruction::ShuffleVector: // Shufflevector masks are constant. return OpIdx != 2; Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1445,6 +1445,13 @@ return false; } +// TODO: Refine this. This should avoid cases like turning constant memcpy sizes +// into variables. +static bool replacingOperandWithVariableIsCheap(const Instruction *I, + int OpIdx) { + return !isa(I); +} + // All instructions in Insts belong to different blocks that all unconditionally // branch to a common successor. Analyze each instruction and return true if it // would be possible to sink them into their successor, creating one common @@ -1522,7 +1529,8 @@ return false; for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) { - if (I0->getOperand(OI)->getType()->isTokenTy()) + Value *Op = I0->getOperand(OI); + if (Op->getType()->isTokenTy()) // Don't touch any operand of token type. return false; @@ -1531,7 +1539,8 @@ return I->getOperand(OI) == I0->getOperand(OI); }; if (!all_of(Insts, SameAsI0)) { - if (!canReplaceOperandWithVariable(I0, OI)) + if ((isa(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) || + !canReplaceOperandWithVariable(I0, OI)) // We can't create a PHI from this GEP. return false; // Don't create indirect calls! The called value is the final operand. Index: llvm/test/Transforms/GVNSink/indirect-call.ll =================================================================== --- llvm/test/Transforms/GVNSink/indirect-call.ll +++ llvm/test/Transforms/GVNSink/indirect-call.ll @@ -68,3 +68,27 @@ %tobool4 = icmp ne i8 %obeys.0, 0 ret i1 %tobool4 } + +; Make sure no indirect call is introduced from direct calls +declare i8 @ext2(i1) +define zeroext i1 @test4(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + %cmp = icmp uge i32 %blksA, %nblks + br i1 %flag, label %if.then, label %if.else + +; CHECK-LABEL: test4 +; CHECK: call i8 @ext( +; CHECK: call i8 @ext2( +if.then: + %frombool1 = call i8 @ext(i1 %cmp) + br label %if.end + +if.else: + %frombool3 = call i8 @ext2(i1 %cmp) + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} Index: llvm/test/Transforms/SimplifyCFG/sink-common-code.ll =================================================================== --- llvm/test/Transforms/SimplifyCFG/sink-common-code.ll +++ llvm/test/Transforms/SimplifyCFG/sink-common-code.ll @@ -291,12 +291,12 @@ if.then: %dummy = add i32 %w, 5 - %sv1 = call i32 @llvm.ctlz.i32(i32 %x) + %sv1 = call i32 @llvm.ctlz.i32(i32 %x, i1 false) br label %if.end if.else: %dummy1 = add i32 %w, 6 - %sv2 = call i32 @llvm.cttz.i32(i32 %x) + %sv2 = call i32 @llvm.cttz.i32(i32 %x, i1 false) br label %if.end if.end: @@ -304,8 +304,8 @@ ret i32 1 } -declare i32 @llvm.ctlz.i32(i32 %x) readnone -declare i32 @llvm.cttz.i32(i32 %x) readnone +declare i32 @llvm.ctlz.i32(i32 %x, i1 immarg) readnone +declare i32 @llvm.cttz.i32(i32 %x, i1 immarg) readnone ; CHECK-LABEL: test12 ; CHECK: call i32 @llvm.ctlz @@ -769,6 +769,120 @@ ; CHECK-NOT: exact ; CHECK: } + +; FIXME: Should turn into select +; CHECK-LABEL: @allow_intrinsic_remove_constant( +; CHECK: %sv1 = call float @llvm.fma.f32(float %dummy, float 2.000000e+00, float 1.000000e+00) +; CHECK: %sv2 = call float @llvm.fma.f32(float 2.000000e+00, float %dummy1, float 1.000000e+00) +define float @allow_intrinsic_remove_constant(i1 zeroext %flag, float %w, float %x, float %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = fadd float %w, 4.0 + %sv1 = call float @llvm.fma.f32(float %dummy, float 2.0, float 1.0) + br label %if.end + +if.else: + %dummy1 = fadd float %w, 8.0 + %sv2 = call float @llvm.fma.f32(float 2.0, float %dummy1, float 1.0) + br label %if.end + +if.end: + %p = phi float [ %sv1, %if.then ], [ %sv2, %if.else ] + ret float %p +} + +declare float @llvm.fma.f32(float, float, float) + +; CHECK-LABEL: @no_remove_constant_immarg( +; CHECK: call i32 @llvm.ctlz.i32(i32 %x, i1 true) +; CHECK: call i32 @llvm.ctlz.i32(i32 %x, i1 false) +define i32 @no_remove_constant_immarg(i1 zeroext %flag, i32 %w, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %w, 5 + %sv1 = call i32 @llvm.ctlz.i32(i32 %x, i1 true) + br label %if.end + +if.else: + %dummy1 = add i32 %w, 6 + %sv2 = call i32 @llvm.ctlz.i32(i32 %x, i1 false) + br label %if.end + +if.end: + %p = phi i32 [ %sv1, %if.then ], [ %sv2, %if.else ] + ret i32 1 +} + +declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) + +; Make sure a memcpy size isn't replaced with a variable +; CHECK-LABEL: @no_replace_memcpy_size( +; CHECK: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) +; CHECK: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 4096, i1 false) +define void @no_replace_memcpy_size(i1 zeroext %flag, i8 addrspace(1)* %dst, i8 addrspace(1)* %src) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) + br label %if.end + +if.else: + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 4096, i1 false) + br label %if.end + +if.end: + ret void +} + +declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) + +; Make sure a memmove size isn't replaced with a variable +; CHECK-LABEL: @no_replace_memmove_size( +; CHECK: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) +; CHECK: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 4096, i1 false) +define void @no_replace_memmove_size(i1 zeroext %flag, i8 addrspace(1)* %dst, i8 addrspace(1)* %src) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) + br label %if.end + +if.else: + call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 4096, i1 false) + br label %if.end + +if.end: + ret void +} + +declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i1) + +; Make sure a memset size isn't replaced with a variable +; CHECK-LABEL: @no_replace_memset_size( +; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 0, i64 1024, i1 false) +; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 0, i64 4096, i1 false) +define void @no_replace_memset_size(i1 zeroext %flag, i8 addrspace(1)* %dst) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 0, i64 1024, i1 false) + br label %if.end + +if.else: + call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 0, i64 4096, i1 false) + br label %if.end + +if.end: + ret void +} + ; Check that simplifycfg doesn't sink and merge inline-asm instructions. define i32 @test_inline_asm1(i32 %c, i32 %r6) { @@ -913,7 +1027,6 @@ declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) - ; CHECK: ![[$TBAA]] = !{![[TYPE:[0-9]]], ![[TYPE]], i64 0} ; CHECK: ![[TYPE]] = !{!"float", ![[TEXT:[0-9]]]} ; CHECK: ![[TEXT]] = !{!"an example type tree"} Index: llvm/unittests/Transforms/Utils/LocalTest.cpp =================================================================== --- llvm/unittests/Transforms/Utils/LocalTest.cpp +++ llvm/unittests/Transforms/Utils/LocalTest.cpp @@ -1001,3 +1001,63 @@ // %test.bb is expected to be simplified by FoldCondBranchOnPHI. EXPECT_TRUE(simplifyCFG(TestBB, TTI, Options)); } + +TEST(Local, CanReplaceOperandWithVariable) { + LLVMContext Ctx; + Module M("test_module", Ctx); + IRBuilder<> B(Ctx); + + FunctionType *FnType = + FunctionType::get(Type::getVoidTy(Ctx), {}, false); + + FunctionType *VarArgFnType = + FunctionType::get(Type::getVoidTy(Ctx), {B.getInt32Ty()}, true); + + Function *TestBody = Function::Create(FnType, GlobalValue::ExternalLinkage, + 0, "", &M); + + BasicBlock *BB0 = BasicBlock::Create(Ctx, "", TestBody); + B.SetInsertPoint(BB0); + + Value *Intrin = M.getOrInsertFunction("llvm.foo", FnType).getCallee(); + Value *Func = M.getOrInsertFunction("foo", FnType).getCallee(); + Value *VarArgFunc + = M.getOrInsertFunction("foo.vararg", VarArgFnType).getCallee(); + Value *VarArgIntrin + = M.getOrInsertFunction("llvm.foo.vararg", VarArgFnType).getCallee(); + + auto *CallToIntrin = B.CreateCall(Intrin); + auto *CallToFunc = B.CreateCall(Func); + + // Test if it's valid to replace the callee operand. + EXPECT_FALSE(canReplaceOperandWithVariable(CallToIntrin, 0)); + EXPECT_TRUE(canReplaceOperandWithVariable(CallToFunc, 0)); + + // That it's invalid to replace an argument in the variadic argument list for + // an intrinsic, but OK for a normal function. + auto *CallToVarArgFunc = B.CreateCall( + VarArgFunc, {B.getInt32(0), B.getInt32(1), B.getInt32(2)}); + EXPECT_TRUE(canReplaceOperandWithVariable(CallToVarArgFunc, 0)); + EXPECT_TRUE(canReplaceOperandWithVariable(CallToVarArgFunc, 1)); + EXPECT_TRUE(canReplaceOperandWithVariable(CallToVarArgFunc, 2)); + EXPECT_TRUE(canReplaceOperandWithVariable(CallToVarArgFunc, 3)); + + auto *CallToVarArgIntrin = B.CreateCall( + VarArgIntrin, {B.getInt32(0), B.getInt32(1), B.getInt32(2)}); + EXPECT_TRUE(canReplaceOperandWithVariable(CallToVarArgIntrin, 0)); + EXPECT_FALSE(canReplaceOperandWithVariable(CallToVarArgIntrin, 1)); + EXPECT_FALSE(canReplaceOperandWithVariable(CallToVarArgIntrin, 2)); + EXPECT_FALSE(canReplaceOperandWithVariable(CallToVarArgIntrin, 3)); + + // Test that it's invalid to replace gcroot operands, even though it can't use + // immarg. + Type *PtrPtr = B.getInt8Ty()->getPointerTo(0); + Value *Alloca = B.CreateAlloca(PtrPtr, (unsigned)0); + CallInst *GCRoot = B.CreateIntrinsic(Intrinsic::gcroot, {}, + {Alloca, Constant::getNullValue(PtrPtr)}); + EXPECT_TRUE(canReplaceOperandWithVariable(GCRoot, 0)); // Alloca + EXPECT_FALSE(canReplaceOperandWithVariable(GCRoot, 1)); + EXPECT_FALSE(canReplaceOperandWithVariable(GCRoot, 2)); + + BB0->dropAllReferences(); +}