Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -3849,6 +3849,14 @@ } Address addr = base.getAddress(); + if (auto *ClassDef = dyn_cast(rec)) { + if (CGM.getCodeGenOpts().StrictVTablePointers && + ClassDef->isDynamicClass()) { + auto *stripped = Builder.CreateStripInvariantGroup(addr.getPointer()); + addr = Address(stripped, addr.getAlignment()); + } + } + unsigned RecordCVR = base.getVRQualifiers(); if (rec->isUnion()) { // For unions, there is no pointer adjustment. Index: clang/lib/CodeGen/CGExprScalar.cpp =================================================================== --- clang/lib/CodeGen/CGExprScalar.cpp +++ clang/lib/CodeGen/CGExprScalar.cpp @@ -1618,6 +1618,13 @@ CE->getLocStart()); } + const CXXRecordDecl *SourceClassDecl = + E->getType().getTypePtr()->getPointeeCXXRecordDecl(); + if (CGF.CGM.getCodeGenOpts().StrictVTablePointers && SourceClassDecl && + SourceClassDecl->isDynamicClass()) { + Src = Builder.CreateStripInvariantGroup(Src); + } + return Builder.CreateBitCast(Src, DstTy); } case CK_AddressSpaceConversion: { @@ -1754,12 +1761,25 @@ llvm::Value* IntResult = Builder.CreateIntCast(Src, MiddleTy, InputSigned, "conv"); - return Builder.CreateIntToPtr(IntResult, DestLLVMTy); + auto *IntToPtr = Builder.CreateIntToPtr(IntResult, DestLLVMTy); + + if (CGF.CGM.getCodeGenOpts().StrictVTablePointers) + return Builder.CreateLaunderInvariantGroup(IntToPtr); + + return IntToPtr; } - case CK_PointerToIntegral: + case CK_PointerToIntegral: { assert(!DestTy->isBooleanType() && "bool should use PointerToBool"); - return Builder.CreatePtrToInt(Visit(E), ConvertType(DestTy)); + auto *PtrExpr = Visit(E); + const CXXRecordDecl *ClassDecl = + E->getType().getTypePtr()->getPointeeCXXRecordDecl(); + if (CGF.CGM.getCodeGenOpts().StrictVTablePointers && ClassDecl && + ClassDecl->isDynamicClass()) { + PtrExpr = Builder.CreateStripInvariantGroup(PtrExpr); + } + return Builder.CreatePtrToInt(PtrExpr, ConvertType(DestTy)); + } case CK_ToVoid: { CGF.EmitIgnoredExpr(E); return nullptr; @@ -3238,6 +3258,22 @@ Result = Builder.CreateICmp(SICmpOpc, LHS, RHS, "cmp"); } else { // Unsigned integers and pointers. + + if (CGF.CGM.getCodeGenOpts().StrictVTablePointers && + !isa(LHS) && + !isa(RHS)) { + // Based on comparisons of pointers to dynamic objects, the optimizer + // can replace one pointer with another, which might be incorrect in + // presence of invariant groups. Comparison with null is safe. + + if (auto *RD = LHSTy->getPointeeCXXRecordDecl()) + if (!RD->isCompleteDefinition() || RD->isDynamicClass()) + LHS = Builder.CreateStripInvariantGroup(LHS); + if (auto *RD = RHSTy->getPointeeCXXRecordDecl()) + if (!RD->isCompleteDefinition() || RD->isDynamicClass()) + RHS = Builder.CreateStripInvariantGroup(RHS); + } + Result = Builder.CreateICmp(UICmpOpc, LHS, RHS, "cmp"); } Index: clang/test/CodeGenCXX/strict-vtable-pointers.cpp =================================================================== --- clang/test/CodeGenCXX/strict-vtable-pointers.cpp +++ clang/test/CodeGenCXX/strict-vtable-pointers.cpp @@ -5,7 +5,8 @@ // RUN: FileCheck --check-prefix=CHECK-LINK-REQ %s < %t.ll typedef __typeof__(sizeof(0)) size_t; -void *operator new(size_t, void*) throw(); +void *operator new(size_t, void *) throw(); +using uintptr_t = unsigned long long; struct NotTrivialDtor { ~NotTrivialDtor(); @@ -17,7 +18,7 @@ }; struct DynamicDerived : DynamicBase1 { - void foo(); + void foo() override; }; struct DynamicBase2 { @@ -28,8 +29,8 @@ }; struct DynamicDerivedMultiple : DynamicBase1, DynamicBase2 { - virtual void foo(); - virtual void bar(); + void foo() override; + void bar() override; }; struct StaticBase { @@ -47,9 +48,8 @@ struct DynamicFromVirtualStatic2 : virtual StaticBase { }; -struct DynamicFrom2Virtuals : - DynamicFromVirtualStatic1, - DynamicFromVirtualStatic2 { +struct DynamicFrom2Virtuals : DynamicFromVirtualStatic1, + DynamicFromVirtualStatic2 { }; // CHECK-NEW-LABEL: define void @_Z12LocalObjectsv() @@ -89,7 +89,6 @@ // CHECK-CTORS: call i8* @llvm.launder.invariant.group.p0i8( // CHECK-CTORS-LABEL: {{^}}} - // CHECK-NEW-LABEL: define void @_Z9Pointers1v() // CHECK-NEW-NOT: @llvm.launder.invariant.group.p0i8( // CHECK-NEW-LABEL: call void @_ZN12DynamicBase1C1Ev( @@ -134,7 +133,6 @@ // CHECK-CTORS-NOT: call i8* @llvm.launder.invariant.group.p0i8( // CHECK-CTORS-LABEL: {{^}}} - struct DynamicDerived; // CHECK-CTORS-LABEL: define linkonce_odr void @_ZN14DynamicDerivedC2Ev( @@ -164,14 +162,12 @@ // CHECK-CTORS: call void @_ZN12DynamicBase2C2Ev( // CHECK-CTORS-NOT: @llvm.launder.invariant.group.p0i8 - // CHECK-CTORS: %[[THIS10:.*]] = bitcast %struct.DynamicDerivedMultiple* %[[THIS0]] to i32 (...)*** // CHECK-CTORS: store {{.*}} @_ZTV22DynamicDerivedMultiple, i32 0, inrange i32 0, i32 2) {{.*}} %[[THIS10]] // CHECK-CTORS: %[[THIS11:.*]] = bitcast %struct.DynamicDerivedMultiple* %[[THIS0]] to i8* // CHECK-CTORS: %[[THIS_ADD:.*]] = getelementptr inbounds i8, i8* %[[THIS11]], i64 16 // CHECK-CTORS: %[[THIS12:.*]] = bitcast i8* %[[THIS_ADD]] to i32 (...)*** - // CHECK-CTORS: store {{.*}} @_ZTV22DynamicDerivedMultiple, i32 0, inrange i32 1, i32 2) {{.*}} %[[THIS12]] // CHECK-CTORS-LABEL: {{^}}} @@ -182,9 +178,10 @@ struct A { virtual void foo(); + int m; }; struct B : A { - virtual void foo(); + void foo() override; }; union U { @@ -209,7 +206,7 @@ // CHECK-NEW: call i8* @llvm.launder.invariant.group.p0i8(i8* // CHECK-NEW: call void @_Z2g2P1A(%struct.A* g2(&u->b); - // CHECK-NEW: call void @_Z9changeToAP1U(%union.U* + // CHECK-NEW: call void @_Z9changeToAP1U(%union.U* changeToA(u); // CHECK-NEW: call i8* @llvm.launder.invariant.group.p0i8(i8* // call void @_Z2g2P1A(%struct.A* %a) @@ -294,12 +291,166 @@ take(u.v3); } +// CHECK-NEW-LABEL: define void @_Z7comparev() +void compare() { + A *a = new A; + a->foo(); + // CHECK-NEW: call i8* @llvm.launder.invariant.group.p0i8(i8* + A *b = new (a) B; + + // CHECK-NEW: %[[a:.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* + // CHECK-NEW: %[[a2:.*]] = bitcast i8* %[[a]] to %struct.A* + // CHECK-NEW: %[[b:.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* + // CHECK-NEW: %[[b2:.*]] = bitcast i8* %[[b]] to %struct.A* + // CHECK-NEW: %cmp = icmp eq %struct.A* %[[a2]], %[[b2]] + if (a == b) + b->foo(); +} + +// CHECK-NEW-LABEL: compare2 +bool compare2(A *a, A *a2) { + // CHECK-NEW: %[[a:.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* + // CHECK-NEW: %[[a2:.*]] = bitcast i8* %[[a]] to %struct.A* + // CHECK-NEW: %[[b:.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* + // CHECK-NEW: %[[b2:.*]] = bitcast i8* %[[b]] to %struct.A* + // CHECK-NEW: %cmp = icmp ult %struct.A* %[[a2]], %[[b2]] + return a < a2; +} +// CHECK-NEW-LABEL: compareIntPointers +bool compareIntPointers(int *a, int *b) { + // CHECK-NEW-NOT: call i8* @llvm.strip.invariant.group + return a == b; +} + +struct HoldingOtherVirtuals { + B b; +}; + +// There is no need to add barriers for comparision of pointer to classes +// that are not dynamic. +// CHECK-NEW-LABEL: compare5 +bool compare5(HoldingOtherVirtuals *a, HoldingOtherVirtuals *b) { + // CHECK-NEW-NOT: call i8* @llvm.strip.invariant.group + return a == b; +} +// CHECK-NEW-LABEL: compareNull +bool compareNull(A *a) { + // CHECK-NEW-NOT: call i8* @llvm.strip.invariant.group + + if (a != nullptr) + return false; + if (!a) + return false; + return a == nullptr; +} + +struct X; +// We have to also introduce the barriers if comparing pointers to incomplete +// objects +// CHECK-NEW-LABEL: define zeroext i1 @_Z8compare4P1XS0_ +bool compare4(X *x, X *x2) { + // CHECK-NEW: %[[x:.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* + // CHECK-NEW: %[[xp:.*]] = bitcast i8* %[[x]] to %struct.X* + // CHECK-NEW: %[[x2:.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* + // CHECK-NEW: %[[x2p:.*]] = bitcast i8* %[[x2]] to %struct.X* + // CHECK-NEW: %cmp = icmp eq %struct.X* %[[xp]], %[[x2p]] + return x == x2; +} + +// CHECK-NEW-LABEL: define void @_Z7member1P20HoldingOtherVirtuals( +void member1(HoldingOtherVirtuals *p) { + + // CHECK-NEW-NOT: call i8* @llvm.strip.invariant.group.p0i8( + (void)p->b; +} + +// CHECK-NEW-LABEL: member2 +void member2(A *a) { + // CHECK-NEW: call i8* @llvm.strip.invariant.group.p0i8 + (void)a->m; +} + +// Check if from comparison of addresses of member we can't infer the equality +// of ap and bp. +// CHECK-NEW-LABEL: @_Z18testCompareMembersv( +void testCompareMembers() { + // CHECK-NEW: [[AP:%.*]] = alloca %struct.A* + // CHECK-NEW: [[APM:%.*]] = alloca i32* + // CHECK-NEW: [[BP:%.*]] = alloca %struct.B* + // CHECK-NEW: [[BPM:%.*]] = alloca i32* + + A *ap = new A; + // CHECK-NEW: call void %{{.*}}(%struct.A* %{{.*}}) + ap->foo(); + // CHECK-NEW: [[TMP7:%.*]] = load %struct.A*, %struct.A** [[AP]] + // CHECK-NEW: [[TMP8:%.*]] = bitcast %struct.A* [[TMP7]] to i8* + // CHECK-NEW: [[TMP9:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* [[TMP8]]) + // CHECK-NEW: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.A* + // CHECK-NEW: [[M:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], %struct.A* [[TMP10]], i32 0, i32 1 + // CHECK-NEW: store i32* [[M]], i32** [[APM]] + int *const apm = &ap->m; + + B *bp = new (ap) B; + + // CHECK-NEW: [[TMP20:%.*]] = load %struct.B*, %struct.B** [[BP]] + // CHECK-NEW: [[TMP21:%.*]] = bitcast %struct.B* [[TMP20]] to %struct.A* + // CHECK-NEW: [[TMP22:%.*]] = bitcast %struct.A* [[TMP21]] to i8* + // CHECK-NEW: [[TMP23:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* [[TMP22]]) + // CHECK-NEW: [[TMP24:%.*]] = bitcast i8* [[TMP23]] to %struct.A* + // CHECK-NEW: [[M4:%.*]] = getelementptr inbounds [[STRUCT_A]], %struct.A* [[TMP24]], i32 0, i32 1 + // CHECK-NEW: store i32* [[M4]], i32** [[BPM]] + int *const bpm = &bp->m; + + // CHECK-NEW: [[TMP25:%.*]] = load i32*, i32** [[APM]] + // CHECK-NEW: [[TMP26:%.*]] = load i32*, i32** [[BPM]] + // CHECK-NEW-NOT: strip.invariant.group + // CHECK-NEW-NOT: launder.invariant.group + // CHECK-NEW: [[CMP:%.*]] = icmp eq i32* [[TMP25]], [[TMP26]] + if (apm == bpm) { + bp->foo(); + } +} + +// CHECK-NEW-LABEL: define void @_Z9testCast1v() +void testCast1() { + A *a = new A; + + // Here we get rid of dynamic info + // CHECK-NEW: call i8* @llvm.strip.invariant.group + auto *v = (void *)a; + + // CHECK-NEW: call i8* @llvm.strip.invariant.group + auto i2 = (uintptr_t)a; + (void)i2; + + // CHECK-NEW-NOT: @llvm.strip.invariant.group + // CHECK-NEW-NOT: @llvm.launder.invariant.group + + // The information is already stripped + auto i = (uintptr_t)v; +} + +// CHECK-NEW-LABEL: define void @_Z9testCast2y( +void testCast2(uintptr_t i) { + // CHECK-NEW-NOT: @llvm.strip.invariant.group + // CHECK-NEW: @llvm.launder.invariant.group + A *a3 = (A *)i; + (void)a3; + + // CHECK-NEW: @llvm.launder.invariant.group + auto *v2 = (void *)i; + // CHECK-NEW-NOT: @llvm.launder.invariant.group + A *a2 = (A *)v2; + (void)a2; + + // CHECK-NEW-LABEL: ret void +} + /** DTORS **/ // CHECK-DTORS-LABEL: define linkonce_odr void @_ZN10StaticBaseD2Ev( // CHECK-DTORS-NOT: call i8* @llvm.launder.invariant.group.p0i8( // CHECK-DTORS-LABEL: {{^}}} - // CHECK-DTORS-LABEL: define linkonce_odr void @_ZN25DynamicFromVirtualStatic2D2Ev( // CHECK-DTORS-NOT: invariant.barrier // CHECK-DTORS-LABEL: {{^}}} @@ -308,7 +459,6 @@ // CHECK-DTORS-NOT: call i8* @llvm.launder.invariant.group.p0i8( // CHECK-DTORS-LABEL: {{^}}} - // CHECK-DTORS-LABEL: define linkonce_odr void @_ZN22DynamicDerivedMultipleD2Ev( // CHECK-DTORS-LABEL: define linkonce_odr void @_ZN12DynamicBase2D2Ev( @@ -323,10 +473,8 @@ // CHECK-DTORS-NOT: call i8* @llvm.launder.invariant.group.p0i8( // CHECK-DTORS-LABEL: {{^}}} - // CHECK-LINK-REQ: !llvm.module.flags = !{![[FIRST:[0-9]+]], ![[SEC:[0-9]+]]{{.*}}} // CHECK-LINK-REQ: ![[FIRST]] = !{i32 1, !"StrictVTablePointers", i32 1} // CHECK-LINK-REQ: ![[SEC]] = !{i32 3, !"StrictVTablePointersRequirement", ![[META:.*]]} // CHECK-LINK-REQ: ![[META]] = !{!"StrictVTablePointers", i32 1} - Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -12924,16 +12924,16 @@ """"""""" The '``llvm.launder.invariant.group``' intrinsic can be used when an invariant -established by invariant.group metadata no longer holds, to obtain a new pointer -value that does not carry the invariant information. It is an experimental +established by ``invariant.group`` metadata no longer holds, to obtain a new pointer +value that does carries fresh invariant group information. It is an experimental intrinsic, which means that its semantics might change in the future. Arguments: """""""""" -The ``llvm.launder.invariant.group`` takes only one argument, which is -the pointer to the memory for which the ``invariant.group`` no longer holds. +The ``llvm.launder.invariant.group`` takes only one argument, which is a pointer +to the memory. Semantics: """""""""" @@ -12942,6 +12942,43 @@ for the purposes of ``load``/``store`` ``invariant.group`` metadata. It does not read any accessible memory and the execution can be speculated. +'``llvm.strip.invariant.group``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. The memory object can belong to any address +space. The returned pointer must belong to the same address space as the +argument. + +:: + + declare i8* @llvm.strip.invariant.group.p0i8(i8* ) + +Overview: +""""""""" + +The '``llvm.strip.invariant.group``' intrinsic can be used when an invariant +established by ``invariant.group`` metadata no longer holds, to obtain a new pointer +value that does not carry the invariant information. It is an experimental +intrinsic, which means that its semantics might change in the future. + + +Arguments: +"""""""""" + +The ``llvm.strip.invariant.group`` takes only one argument, which is a pointer +to the memory. + +Semantics: +"""""""""" + +Returns another pointer that aliases its argument but which has no associated +``invariant.group`` metadata. +It does not read any memory and can be speculated. + + + .. _constrainedfp: Constrained Floating-Point Intrinsics Index: llvm/include/llvm/IR/IRBuilder.h =================================================================== --- llvm/include/llvm/IR/IRBuilder.h +++ llvm/include/llvm/IR/IRBuilder.h @@ -1968,6 +1968,7 @@ Value *CreateLaunderInvariantGroup(Value *Ptr) { assert(isa(Ptr->getType()) && "launder.invariant.group only applies to pointers."); + // FIXME: we could potentially avoid casts to/from i8*. auto *PtrType = Ptr->getType(); auto *Int8PtrTy = getInt8PtrTy(PtrType->getPointerAddressSpace()); if (PtrType != Int8PtrTy) @@ -1988,6 +1989,34 @@ return Fn; } + /// \brief Create a strip.invariant.group intrinsic call. If Ptr type is + /// different from pointer to i8, it's casted to pointer to i8 in the same + /// address space before call and casted back to Ptr type after call. + Value *CreateStripInvariantGroup(Value *Ptr) { + assert(isa(Ptr->getType()) && + "strip.invariant.group only applies to pointers."); + + // FIXME: we could potentially avoid casts to/from i8*. + auto *PtrType = Ptr->getType(); + auto *Int8PtrTy = getInt8PtrTy(PtrType->getPointerAddressSpace()); + if (PtrType != Int8PtrTy) + Ptr = CreateBitCast(Ptr, Int8PtrTy); + Module *M = BB->getParent()->getParent(); + Function *FnStripInvariantGroup = Intrinsic::getDeclaration( + M, Intrinsic::strip_invariant_group, {Int8PtrTy}); + + assert(FnStripInvariantGroup->getReturnType() == Int8PtrTy && + FnStripInvariantGroup->getFunctionType()->getParamType(0) == + Int8PtrTy && + "StripInvariantGroup should take and return the same type"); + + CallInst *Fn = CreateCall(FnStripInvariantGroup, {Ptr}); + + if (PtrType != Int8PtrTy) + return CreateBitCast(Fn, PtrType); + return Fn; + } + /// Return a vector value that contains \arg V broadcasted to \p /// NumElts elements. Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = "") { Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -728,6 +728,11 @@ [LLVMMatchType<0>], [IntrInaccessibleMemOnly, IntrSpeculatable]>; + +def int_strip_invariant_group : Intrinsic<[llvm_anyptr_ty], + [LLVMMatchType<0>], + [IntrSpeculatable, IntrNoMem]>; + //===------------------------ Stackmap Intrinsics -------------------------===// // def int_experimental_stackmap : Intrinsic<[], Index: llvm/lib/Analysis/CaptureTracking.cpp =================================================================== --- llvm/lib/Analysis/CaptureTracking.cpp +++ llvm/lib/Analysis/CaptureTracking.cpp @@ -247,7 +247,7 @@ if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy()) break; - // launder.invariant.group only captures pointer by returning it, + // {launder,strip}.invariant.group only captures pointer by returning it, // so the pointer wasn't captured if returned pointer is not captured. // This intrinsic is not marked as nocapture, because it would require // to mark the argument as returned, which would make the launder useless. @@ -255,7 +255,8 @@ // marked with nocapture do not capture. This means that places like // GetUnderlyingObject in ValueTracking or DecomposeGEPExpression // in BasicAA also assume this aliasing property of the launder. - if (CS.getIntrinsicID() == Intrinsic::launder_invariant_group) { + if (CS.getIntrinsicID() == Intrinsic::launder_invariant_group || + CS.getIntrinsicID() == Intrinsic::strip_invariant_group) { AddUses(I); break; } Index: llvm/lib/Analysis/ConstantFolding.cpp =================================================================== --- llvm/lib/Analysis/ConstantFolding.cpp +++ llvm/lib/Analysis/ConstantFolding.cpp @@ -1393,6 +1393,7 @@ case Intrinsic::fmuladd: case Intrinsic::copysign: case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: case Intrinsic::round: case Intrinsic::masked_load: case Intrinsic::sadd_with_overflow: @@ -1596,14 +1597,16 @@ return Constant::getNullValue(Ty); if (IntrinsicID == Intrinsic::bswap || IntrinsicID == Intrinsic::bitreverse || - IntrinsicID == Intrinsic::launder_invariant_group) + IntrinsicID == Intrinsic::launder_invariant_group || + IntrinsicID == Intrinsic::strip_invariant_group) return Operands[0]; } if (isa(Operands[0]) && Operands[0]->getType()->getPointerAddressSpace() == 0) { - // launder(null) == null iff in addrspace 0 - if (IntrinsicID == Intrinsic::launder_invariant_group) + // launder(null) == null == strip(null) iff in addrspace 0 + if (IntrinsicID == Intrinsic::launder_invariant_group || + IntrinsicID == Intrinsic::strip_invariant_group) return Operands[0]; return nullptr; } Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -3390,7 +3390,8 @@ if (const Value *RV = CS.getReturnedArgOperand()) return RV; // This can be used only as a aliasing property. - if (CS.getIntrinsicID() == Intrinsic::launder_invariant_group) + if (CS.getIntrinsicID() == Intrinsic::launder_invariant_group || + CS.getIntrinsicID() == Intrinsic::strip_invariant_group) return CS.getArgOperand(0); return nullptr; } Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1694,6 +1694,7 @@ return true; } case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: II->replaceAllUsesWith(II->getArgOperand(0)); II->eraseFromParent(); return true; Index: llvm/lib/CodeGen/SelectionDAG/FastISel.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1446,6 +1446,7 @@ return true; } case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: case Intrinsic::expect: { unsigned ResultReg = getRegForValue(II->getArgOperand(0)); if (!ResultReg) Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5750,6 +5750,7 @@ case Intrinsic::annotation: case Intrinsic::ptr_annotation: case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return nullptr; Index: llvm/lib/IR/Value.cpp =================================================================== --- llvm/lib/IR/Value.cpp +++ llvm/lib/IR/Value.cpp @@ -521,7 +521,8 @@ // but it can't be marked with returned attribute, that's why it needs // special case. if (StripKind == PSK_ZeroIndicesAndAliasesAndInvariantGroups && - CS.getIntrinsicID() == Intrinsic::launder_invariant_group) { + (CS.getIntrinsicID() == Intrinsic::launder_invariant_group || + CS.getIntrinsicID() == Intrinsic::strip_invariant_group)) { V = CS.getArgOperand(0); continue; } Index: llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -457,6 +457,7 @@ case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: case Intrinsic::objectsize: return true; default: @@ -883,6 +884,7 @@ case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: Intr->eraseFromParent(); // FIXME: I think the invariant marker should still theoretically apply, // but the intrinsics need to be changed to accept pointers with any Index: llvm/test/Analysis/ValueTracking/invariant.group.ll =================================================================== --- llvm/test/Analysis/ValueTracking/invariant.group.ll +++ llvm/test/Analysis/ValueTracking/invariant.group.ll @@ -1,7 +1,7 @@ ; RUN: opt -S -instsimplify -instcombine < %s | FileCheck %s -; CHECK-LABEL: define void @checkNonnull() -define void @checkNonnull() { +; CHECK-LABEL: define void @checkNonnullLaunder() +define void @checkNonnullLaunder() { ; CHECK: %p = call i8* @llvm.launder.invariant.group.p0i8(i8* nonnull %0) ; CHECK: %p2 = call i8* @llvm.launder.invariant.group.p0i8(i8* nonnull %p) ; CHECK: call void @use(i8* nonnull %p2) @@ -15,5 +15,22 @@ ret void } +; CHECK-LABEL: define void @checkNonnullStrip() +define void @checkNonnullStrip() { +; CHECK: %p = call i8* @llvm.strip.invariant.group.p0i8(i8* nonnull %0) +; CHECK: %p2 = call i8* @llvm.strip.invariant.group.p0i8(i8* nonnull %p) +; CHECK: call void @use(i8* nonnull %p2) +entry: + %0 = alloca i8, align 8 + + %p = call i8* @llvm.strip.invariant.group.p0i8(i8* %0) + %p2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %p) + call void @use(i8* %p2) + + ret void +} + declare i8* @llvm.launder.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) + declare void @use(i8*) Index: llvm/test/CodeGen/Generic/intrinsics.ll =================================================================== --- llvm/test/CodeGen/Generic/intrinsics.ll +++ llvm/test/CodeGen/Generic/intrinsics.ll @@ -41,11 +41,19 @@ declare i8* @llvm.launder.invariant.group(i8*) -define i8* @barrier(i8* %p) { +define i8* @launder(i8* %p) { %q = call i8* @llvm.launder.invariant.group(i8* %p) ret i8* %q } +declare i8* @llvm.strip.invariant.group(i8*) + +define i8* @strip(i8* %p) { + %q = call i8* @llvm.strip.invariant.group(i8* %p) + ret i8* %q +} + + ; sideeffect declare void @llvm.sideeffect() Index: llvm/test/Other/invariant.group.ll =================================================================== --- llvm/test/Other/invariant.group.ll +++ llvm/test/Other/invariant.group.ll @@ -77,8 +77,14 @@ define void @dontProveEquality(i8* %a) { %b = call i8* @llvm.launder.invariant.group.p0i8(i8* %a) %r = icmp eq i8* %b, %a -;CHECK: call void @useBool(i1 %r) +; CHECK: call void @useBool(i1 %r) call void @useBool(i1 %r) + + %b2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) + %r2 = icmp eq i8* %b2, %a +; CHECK: call void @useBool(i1 %r2) + call void @useBool(i1 %r2) + ret void } @@ -90,5 +96,9 @@ ; CHECK-NEXT: declare i8* @llvm.launder.invariant.group.p0i8(i8*) declare i8* @llvm.launder.invariant.group.p0i8(i8*) -!0 = !{} +; CHECK: Function Attrs: nounwind readnone speculatable{{$}} +; CHECK-NEXT: declare i8* @llvm.strip.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) + +!0 = !{} \ No newline at end of file Index: llvm/test/Transforms/CodeGenPrepare/invariant.group.ll =================================================================== --- llvm/test/Transforms/CodeGenPrepare/invariant.group.ll +++ llvm/test/Transforms/CodeGenPrepare/invariant.group.ll @@ -7,8 +7,8 @@ enter: ; CHECK-NOT: !invariant.group ; CHECK-NOT: @llvm.launder.invariant.group.p0i8( - ; CHECK: %val = load i8, i8* @tmp, !tbaa - %val = load i8, i8* @tmp, !invariant.group !0, !tbaa !{!1, !1, i64 0} + ; CHECK: %val = load i8, i8* @tmp{{$}} + %val = load i8, i8* @tmp, !invariant.group !0 %ptr = call i8* @llvm.launder.invariant.group.p0i8(i8* @tmp) ; CHECK: store i8 42, i8* @tmp{{$}} @@ -18,7 +18,23 @@ } ; CHECK-LABEL: } -declare i8* @llvm.launder.invariant.group.p0i8(i8*) +; CHECK-LABEL: define void @foo2() { +define void @foo2() { +enter: + ; CHECK-NOT: !invariant.group + ; CHECK-NOT: @llvm.strip.invariant.group.p0i8( + ; CHECK: %val = load i8, i8* @tmp{{$}} + %val = load i8, i8* @tmp, !invariant.group !0 + %ptr = call i8* @llvm.strip.invariant.group.p0i8(i8* @tmp) + + ; CHECK: store i8 42, i8* @tmp{{$}} + store i8 42, i8* %ptr, !invariant.group !0 + ret void +} +; CHECK-LABEL: } + + +declare i8* @llvm.launder.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) !0 = !{} -!1 = !{!"x", !0} Index: llvm/test/Transforms/DeadStoreElimination/launder.invariant.group.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/launder.invariant.group.ll +++ llvm/test/Transforms/DeadStoreElimination/launder.invariant.group.ll @@ -27,4 +27,39 @@ ret void } +; CHECK-LABEL: void @skip3Barriers(i8* %ptr) +define void @skip3Barriers(i8* %ptr) { +; CHECK-NOT: store i8 42 + store i8 42, i8* %ptr +; CHECK: %ptr2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr) + %ptr2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr) +; CHECK-NOT: store i8 43 + store i8 43, i8* %ptr2 + %ptr3 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr2) + %ptr4 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr3) + +; CHECK: store i8 44 + store i8 44, i8* %ptr4 + ret void +} + +; CHECK-LABEL: void @skip4Barriers(i8* %ptr) +define void @skip4Barriers(i8* %ptr) { +; CHECK-NOT: store i8 42 + store i8 42, i8* %ptr +; CHECK: %ptr2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr) + %ptr2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr) +; CHECK-NOT: store i8 43 + store i8 43, i8* %ptr2 + %ptr3 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr2) + %ptr4 = call i8* @llvm.strip.invariant.group.p0i8(i8* %ptr3) + %ptr5 = call i8* @llvm.launder.invariant.group.p0i8(i8* %ptr3) + +; CHECK: store i8 44 + store i8 44, i8* %ptr5 + ret void +} + + declare i8* @llvm.launder.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) \ No newline at end of file Index: llvm/test/Transforms/FunctionAttrs/nocapture.ll =================================================================== --- llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -237,4 +237,21 @@ ret void } +; CHECK: @nocaptureStrip(i8* nocapture %p) +define void @nocaptureStrip(i8* %p) { +entry: + %b = call i8* @llvm.strip.invariant.group.p0i8(i8* %p) + store i8 42, i8* %b + ret void +} + +@g3 = global i8* null +; CHECK: define void @captureStrip(i8* %p) +define void @captureStrip(i8* %p) { + %b = call i8* @llvm.strip.invariant.group.p0i8(i8* %p) + store i8* %b, i8** @g3 + ret void +} + declare i8* @llvm.launder.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) Index: llvm/test/Transforms/GVN/invariant.group.ll =================================================================== --- llvm/test/Transforms/GVN/invariant.group.ll +++ llvm/test/Transforms/GVN/invariant.group.ll @@ -51,6 +51,18 @@ ret i8 %b } +; CHECK-LABEL: define i1 @proveEqualityForStrip( +define i1 @proveEqualityForStrip(i8* %a) { +; FIXME: The first call could be also removed by GVN. Right now +; DCE removes it. The second call is CSE'd with the first one. +; CHECK: %b1 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) + %b1 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) +; CHECK-NOT: llvm.strip.invariant.group + %b2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) + %r = icmp eq i8* %b1, %b2 +; CHECK: ret i1 true + ret i1 %r +} ; CHECK-LABEL: define i8 @unoptimizable1() { define i8 @unoptimizable1() { entry: @@ -437,10 +449,10 @@ declare void @fooBit(i1*, i1) declare i8* @llvm.launder.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) + -; Function Attrs: nounwind -declare void @llvm.assume(i1 %cmp.vtables) #0 +declare void @llvm.assume(i1 %cmp.vtables) -attributes #0 = { nounwind } !0 = !{} \ No newline at end of file Index: llvm/test/Transforms/GlobalOpt/invariant.group.ll =================================================================== --- llvm/test/Transforms/GlobalOpt/invariant.group.ll +++ llvm/test/Transforms/GlobalOpt/invariant.group.ll @@ -27,15 +27,15 @@ define void @_optimizable() { enter: %valptr = alloca i32 - + %val = call i32 @TheAnswerToLifeTheUniverseAndEverything() store i32 %val, i32* @tmp store i32 %val, i32* %valptr - + %0 = bitcast i32* %valptr to i8* %barr = call i8* @llvm.launder.invariant.group(i8* %0) %1 = bitcast i8* %barr to i32* - + %val2 = load i32, i32* %1 store i32 %val2, i32* @tmp2 ret void @@ -43,30 +43,30 @@ ; We can't step through launder.invariant.group here, because that would change ; this load in @usage_of_globals() -; val = load i32, i32* %ptrVal, !invariant.group !0 -; into +; val = load i32, i32* %ptrVal, !invariant.group !0 +; into ; %val = load i32, i32* @tmp3, !invariant.group !0 -; and then we could assume that %val and %val2 to be the same, which coud be +; and then we could assume that %val and %val2 to be the same, which coud be ; false, because @changeTmp3ValAndCallBarrierInside() may change the value ; of @tmp3. define void @_not_optimizable() { enter: store i32 13, i32* @tmp3, !invariant.group !0 - + %0 = bitcast i32* @tmp3 to i8* %barr = call i8* @llvm.launder.invariant.group(i8* %0) %1 = bitcast i8* %barr to i32* - + store i32* %1, i32** @ptrToTmp3 store i32 42, i32* %1, !invariant.group !0 - + ret void } define void @usage_of_globals() { entry: %ptrVal = load i32*, i32** @ptrToTmp3 %val = load i32, i32* %ptrVal, !invariant.group !0 - + call void @changeTmp3ValAndCallBarrierInside() %val2 = load i32, i32* @tmp3, !invariant.group !0 ret void; Index: llvm/test/Transforms/InstCombine/invariant.group.ll =================================================================== --- llvm/test/Transforms/InstCombine/invariant.group.ll +++ llvm/test/Transforms/InstCombine/invariant.group.ll @@ -1,5 +1,6 @@ ; RUN: opt -instcombine -S < %s | FileCheck %s + ; CHECK-LABEL: define i8* @simplifyNullLaunder() define i8* @simplifyNullLaunder() { ; CHECK-NEXT: ret i8* null @@ -29,6 +30,39 @@ ret i8 addrspace(42)* %b2 } - declare i8* @llvm.launder.invariant.group.p0i8(i8*) declare i8 addrspace(42)* @llvm.launder.invariant.group.p42i8(i8 addrspace(42)*) + + +; CHECK-LABEL: define i8* @simplifyNullStrip() +define i8* @simplifyNullStrip() { +; CHECK-NEXT: ret i8* null + %b2 = call i8* @llvm.strip.invariant.group.p0i8(i8* null) + ret i8* %b2 +} + +; CHECK-LABEL: define i8 addrspace(42)* @dontsimplifyNullStripForDifferentAddrspace() +define i8 addrspace(42)* @dontsimplifyNullStripForDifferentAddrspace() { +; CHECK: %b2 = call i8 addrspace(42)* @llvm.strip.invariant.group.p42i8(i8 addrspace(42)* null) +; CHECK: ret i8 addrspace(42)* %b2 + %b2 = call i8 addrspace(42)* @llvm.strip.invariant.group.p42i8(i8 addrspace(42)* null) + ret i8 addrspace(42)* %b2 +} + +; CHECK-LABEL: define i8* @simplifyUndefStrip() +define i8* @simplifyUndefStrip() { +; CHECK-NEXT: ret i8* undef + %b2 = call i8* @llvm.strip.invariant.group.p0i8(i8* undef) + ret i8* %b2 +} + +; CHECK-LABEL: define i8 addrspace(42)* @simplifyUndefStrip2() +define i8 addrspace(42)* @simplifyUndefStrip2() { +; CHECK-NEXT: ret i8 addrspace(42)* undef + %b2 = call i8 addrspace(42)* @llvm.strip.invariant.group.p42i8(i8 addrspace(42)* undef) + ret i8 addrspace(42)* %b2 +} + +declare i8* @llvm.strip.invariant.group.p0i8(i8*) +declare i8 addrspace(42)* @llvm.strip.invariant.group.p42i8(i8 addrspace(42)*) + Index: llvm/test/Transforms/NewGVN/invariant.group.ll =================================================================== --- llvm/test/Transforms/NewGVN/invariant.group.ll +++ llvm/test/Transforms/NewGVN/invariant.group.ll @@ -52,6 +52,19 @@ ret i8 %b } +; CHECK-LABEL: define i1 @proveEqualityForStrip( +define i1 @proveEqualityForStrip(i8* %a) { +; FIXME: The first call could be also removed by GVN. Right now +; DCE removes it. The second call is CSE'd with the first one. +; CHECK: %b1 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) + %b1 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) +; CHECK-NOT: llvm.strip.invariant.group + %b2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a) + %r = icmp eq i8* %b1, %b2 +; CHECK: ret i1 true + ret i1 %r +} + ; CHECK-LABEL: define i8 @unoptimizable1() { define i8 @unoptimizable1() { entry: