Index: include/llvm/IR/Instructions.h =================================================================== --- include/llvm/IR/Instructions.h +++ include/llvm/IR/Instructions.h @@ -4897,7 +4897,7 @@ void setPreservedInvariantInfo(PreservedInvariantInfo &Preserved, BasicBlock::iterator &ScanBackwardFrom, - Value *Query, Value *QueryObj, BasicBlock *BB); + Value *Query, Value* QueryObj, BasicBlock *BB); } // End llvm namespace Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -523,7 +523,7 @@ [llvm_i64_ty, llvm_ptr_ty], [IntrReadWriteArgMem, NoCapture<1>]>; def int_invariant_start : Intrinsic<[llvm_descriptor_ty], - [llvm_i64_ty, llvm_ptr_ty], + [llvm_i64_ty, llvm_ptr_ty, llvm_vararg_ty], [IntrReadWriteArgMem, NoCapture<1>]>; def int_invariant_end : Intrinsic<[], [llvm_descriptor_ty, llvm_i64_ty, Index: lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- lib/Analysis/BasicAliasAnalysis.cpp +++ lib/Analysis/BasicAliasAnalysis.cpp @@ -467,6 +467,60 @@ return V; } +/// Checks that a given intrinsic_start call writeonce-covers a given +/// memory location pointer. +static bool checkCoverage(IntrinsicInst *II, + const llvm::Value *LocPtr, const DataLayout &DL) { + if (!II) return false; + + assert(II->getIntrinsicID() == Intrinsic::invariant_start && + "Given intrinsic instruction must be invariant_start"); + + // If the instruction has no offset, then LocPtr is pointing to constant + // (writeonce) memory, and thus is either a global variable, an alloca + // instruction, or a GEP instruction accessing either one of those. + if (II->getNumArgOperands() <= 2) + return true; + + unsigned GEPOffsetBits = 0; + bool GEPOffsetChanged = false; + APInt GEPOffset; + + // Otherwise, LocPtr itself is not pointing to constant memory, but + // sub-locations at offsets are constant. In addition, LocPtr must be + // a GEP instruction. + if (const GetElementPtrInst *GEP = dyn_cast(LocPtr)) { + GEPOffsetBits = DL.getPointerTypeSizeInBits(GEP->getType()); + GEPOffset = APInt(GEPOffsetBits, 0); + GEPOffsetChanged = GEP->accumulateConstantOffset(DL, GEPOffset); + } else if (const GEPOperator *GEP = dyn_cast(LocPtr)) { + GEPOffsetBits = DL.getPointerTypeSizeInBits(GEP->getType()); + GEPOffset = APInt(GEPOffsetBits, 0); + GEPOffsetChanged = GEP->accumulateConstantOffset(DL, GEPOffset); + } + if (GEPOffsetChanged) { + GEPOffset *= APInt(GEPOffsetBits, 8); + for (unsigned i = 2; i < II->getNumArgOperands(); i += 2) { + llvm::ConstantInt *FESize = + dyn_cast_or_null(II->getArgOperand(i)); + llvm::ConstantInt *FEOffset = + dyn_cast_or_null(II->getArgOperand(i+1)); + assert(FEOffset && FESize && + "Frontend must store these offsets (and corresponding sizes) " + "in bits as ConstantInts."); + const APInt &Offset = FEOffset->getValue(); + + if (Offset.ule(GEPOffset) && + GEPOffset.ule(Offset + FESize->getZExtValue())) + return true; + } + } + + // If the GEP offset has not changed, then LocPtr must be pointing to + // the begining of allocated memory, which is not writeonce. + return false; +} + /// Returns whether the given pointer value points to memory that is local to /// the function, with global constants being considered local to all /// functions. @@ -478,7 +532,9 @@ SmallVector Worklist; Worklist.push_back(Loc.Ptr); do { - const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL); + //const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL); + auto LocPtr = Worklist.pop_back_val(); + const Value *V = GetUnderlyingObject(LocPtr, DL); if (!Visited.insert(V).second) { Visited.clear(); return AAResultBase::pointsToConstantMemory(Loc, OrLocal); @@ -489,7 +545,7 @@ continue; if (const AllocaInst *AI = dyn_cast(V)) - if (AI->getInvariantStartInstruction()) + if (checkCoverage(AI->getInvariantStartInstruction(), LocPtr, DL)) continue; // A global constant counts as local memory for our purposes. @@ -497,7 +553,8 @@ // Note: this doesn't require GV to be "ODR" because it isn't legal for a // global to be marked constant in some modules and non-constant in // others. GV may even be a declaration, not a definition. - if (!GV->isConstant() && !GV->getInvariantStartInstruction()) { + if (!GV->isConstant() && + !checkCoverage(GV->getInvariantStartInstruction(), LocPtr, DL)) { Visited.clear(); return AAResultBase::pointsToConstantMemory(Loc, OrLocal); } Index: lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- lib/Analysis/MemoryDependenceAnalysis.cpp +++ lib/Analysis/MemoryDependenceAnalysis.cpp @@ -449,12 +449,11 @@ while (ScanIt != BB->begin()) { Instruction *Inst = --ScanIt; - if (IntrinsicInst *II = dyn_cast(Inst)) + if (IntrinsicInst *II = dyn_cast(Inst)) { // Debug intrinsics don't (and can't) cause dependencies. if (isa(II)) continue; // Same for invariant intrinsics. - if (IntrinsicInst *II = dyn_cast(Inst)) { if (II->getIntrinsicID() == Intrinsic::invariant_start) { if (II == Preserved.II) // We did not skip any instruction earlier. So, we must express that @@ -640,6 +639,7 @@ if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); + if (isInvariantLoad) continue; // Be conservative if the accessed pointer may alias the allocation. Index: test/Feature/memorymarkers.ll =================================================================== --- test/Feature/memorymarkers.ll +++ test/Feature/memorymarkers.ll @@ -6,7 +6,7 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind -declare {}* @llvm.invariant.start(i64, i8* nocapture) readonly nounwind +declare {}* @llvm.invariant.start(i64, i8* nocapture, ...) readonly nounwind declare void @llvm.invariant.end({}*, i64, i8* nocapture) nounwind define i32 @_Z4foo2v() nounwind { @@ -24,7 +24,7 @@ store i32 5, i32* %1, align 4 ;; Constructor has finished here. - %inv = call {}* @llvm.invariant.start(i64 8, i8* %y) + %inv = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 8, i8* %y) call void @_Z3barRKi(i32* %0) nounwind %2 = load i32, i32* %0, align 8 Index: test/Transforms/GlobalOpt/invariant-nodatalayout.ll =================================================================== --- test/Transforms/GlobalOpt/invariant-nodatalayout.ll +++ test/Transforms/GlobalOpt/invariant-nodatalayout.ll @@ -1,14 +1,14 @@ ; RUN: opt -globalopt -S -o - < %s | FileCheck %s ; The check here is that it doesn't crash. -declare {}* @llvm.invariant.start(i64 %size, i8* nocapture %ptr) +declare {}* @llvm.invariant.start(i64 %size, i8* nocapture %ptr, ...) @object1 = global { i32, i32 } zeroinitializer ; CHECK: @object1 = global { i32, i32 } zeroinitializer define void @ctor1() { %ptr = bitcast {i32, i32}* @object1 to i8* - call {}* @llvm.invariant.start(i64 4, i8* %ptr) + call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* %ptr) ret void } Index: test/Transforms/GlobalOpt/invariant.ll =================================================================== --- test/Transforms/GlobalOpt/invariant.ll +++ test/Transforms/GlobalOpt/invariant.ll @@ -3,10 +3,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -declare {}* @llvm.invariant.start(i64 %size, i8* nocapture %ptr) +declare {}* @llvm.invariant.start(i64 %size, i8* nocapture %ptr, ...) define void @test1(i8* %ptr) { - call {}* @llvm.invariant.start(i64 4, i8* %ptr) + call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* %ptr) ret void } @@ -25,7 +25,7 @@ define void @ctor2() { store i32 -1, i32* @object2 %A = bitcast i32* @object2 to i8* - %B = call {}* @llvm.invariant.start(i64 4, i8* %A) + %B = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* %A) %C = bitcast {}* %B to i8* ret void } @@ -36,7 +36,7 @@ define void @ctor3() { store i32 -1, i32* @object3 %A = bitcast i32* @object3 to i8* - call {}* @llvm.invariant.start(i64 3, i8* %A) + call {}* (i64, i8*, ...) @llvm.invariant.start(i64 3, i8* %A) ret void } @@ -46,7 +46,7 @@ define void @ctor4() { store i32 -1, i32* @object4 %A = bitcast i32* @object4 to i8* - call {}* @llvm.invariant.start(i64 -1, i8* %A) + call {}* (i64, i8*, ...) @llvm.invariant.start(i64 -1, i8* %A) ret void } Index: test/Transforms/InstCombine/invariant.ll =================================================================== --- test/Transforms/InstCombine/invariant.ll +++ test/Transforms/InstCombine/invariant.ll @@ -3,13 +3,13 @@ declare void @g(i8*) -declare {}* @llvm.invariant.start(i64, i8* nocapture) nounwind readonly +declare {}* @llvm.invariant.start(i64, i8* nocapture, ...) nounwind readonly define i8 @f() { %a = alloca i8 ; [#uses=4] store i8 0, i8* %a - %i = call {}* @llvm.invariant.start(i64 1, i8* %a) ; <{}*> [#uses=0] - ; CHECK: call {}* @llvm.invariant.start + %i = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 1, i8* %a) ; <{}*> [#uses=0] + ; CHECK: call {}* (i64, i8*, ...) @llvm.invariant.start call void @g(i8* %a) %r = load i8, i8* %a ; [#uses=1] ret i8 %r Index: test/Transforms/LoadElim/global-local-vars.ll =================================================================== --- test/Transforms/LoadElim/global-local-vars.ll +++ test/Transforms/LoadElim/global-local-vars.ll @@ -47,7 +47,7 @@ %1 = bitcast %struct.A* %j to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.A* @_ZL1i to i8*), i64 4, i32 4, i1 false) %2 = bitcast %struct.A* %j to i8* - %3 = call {}* @llvm.invariant.start(i64 4, i8* %2) + %3 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* %2) ; CHECK-NOT: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* %4 = bitcast %struct.A* %agg.tmp to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast (%struct.A* @_ZL1i to i8*), i64 4, i32 4, i1 false) @@ -245,7 +245,7 @@ ; CHECK-4-5A1-5B: load i32, i32* ; CHECK-5A2-5B: load i32, i32* ; CHECK-ALL: load i32, i32* - %3 = call {}* @llvm.invariant.start(i64 4, i8* %2) + %3 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* %2) ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* %4 = bitcast %struct.A* %agg.tmp to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* bitcast (%struct.A* @_ZL1i to i8*), i64 4, i32 4, i1 false) @@ -294,7 +294,7 @@ %call = call i32 @_Z3onev() call void @_ZN1AC1Ei(%struct.A* @_ZL1i, i32 %call) ; CHECKINL: store i32 {{.*}}, i32* - %0 = call {}* @llvm.invariant.start(i64 4, i8* bitcast (%struct.A* @_ZL1i to i8*)) + %0 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* bitcast (%struct.A* @_ZL1i to i8*)) ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* ret void } @@ -304,7 +304,7 @@ declare void @_Z3fooPK1A(%struct.A*) declare void @_Z4foo2PK1AS1_(%struct.A*, %struct.A*) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) -declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare {}* @llvm.invariant.start(i64, i8* nocapture, ...) declare void @llvm.invariant.end({}*, i64, i8* nocapture) declare void @llvm.lifetime.start(i64, i8* nocapture) declare void @llvm.lifetime.end(i64, i8* nocapture) Index: test/Transforms/LoadElim/global-vars.ll =================================================================== --- test/Transforms/LoadElim/global-vars.ll +++ test/Transforms/LoadElim/global-vars.ll @@ -27,8 +27,11 @@ ; RUN: opt < %s -globalopt -available-load-scan-limit=6 -instcombine -functionattrs -tailcallelim -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-4-5A-5B1 --check-prefix=CHECKLOAD-5B2 %struct.A = type { i32 } +%struct.B = type { i32, %struct.P } +%struct.P = type { i32 } @_ZL1i = internal global %struct.A zeroinitializer +@i = global %struct.B zeroinitializer @k = global %struct.A zeroinitializer @_ZL1j = internal global %struct.A zeroinitializer @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_global, i8* null }] @@ -259,30 +262,70 @@ ret void } +;; Example 1 with member objects. +;; void ex1m() { +;; A i(one()); +;; bar(i.b); // First load. +;; foo(&i.b); +;; bar(i.b); // No load. +define void @_Z4ex1mv() { +entry: + %agg.tmp = alloca %struct.P + %agg.tmp1 = alloca %struct.P + %0 = bitcast %struct.P* %agg.tmp to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.P* getelementptr inbounds (%struct.B, %struct.B* @i, i32 0, i32 1) to i8*), i64 4, i32 4, i1 false) + %coerce.dive = getelementptr inbounds %struct.P, %struct.P* %agg.tmp, i32 0, i32 0 + %1 = load i32, i32* %coerce.dive + ; CHECK: load i32, i32* + call void @_Z3bar1P(i32 %1) + call void @_Z3fooPK1P(%struct.P* getelementptr inbounds (%struct.B, %struct.B* @i, i32 0, i32 1)) + %2 = bitcast %struct.P* %agg.tmp1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* bitcast (%struct.P* getelementptr inbounds (%struct.B, %struct.B* @i, i32 0, i32 1) to i8*), i64 4, i32 4, i1 false) + %coerce.dive2 = getelementptr inbounds %struct.P, %struct.P* %agg.tmp1, i32 0, i32 0 + %3 = load i32, i32* %coerce.dive2 + ; CHECK-NOT: load i32, i32* + call void @_Z3bar1P(i32 %3) + ret void +} + + define internal void @__cxx_global_var_init() { entry: %call = call i32 @_Z3onev() call void @_ZN1AC1Ei(%struct.A* @_ZL1i, i32 %call) ; CHECKINL: store i32 {{.*}}, i32* - %0 = call {}* @llvm.invariant.start(i64 4, i8* bitcast (%struct.A* @_ZL1i to i8*)) + %0 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* bitcast (%struct.A* @_ZL1i to i8*)) ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* ret void } +define internal void @__cxx_global_var_init.3() { +entry: + %call = call i32 @_Z3onev() + call void @_ZN1BC1Ei(%struct.B* @i, i32 %call) + %0 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 8, i8* bitcast (%struct.B* @i to i8*), i64 8, i64 32) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* {{.*}}, + ret void +} + define internal void @__cxx_global_var_init.2() { entry: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.A* @_ZL1j to i8*), i8* bitcast (%struct.A* @_ZL1i to i8*), i64 4, i32 4, i1 false) - %0 = call {}* @llvm.invariant.start(i64 4, i8* bitcast (%struct.A* @_ZL1j to i8*)) + %0 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* bitcast (%struct.A* @_ZL1j to i8*)) ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* ret void } + declare i32 @_Z3onev() declare void @_Z3bar1A(i32) declare void @_Z3fooPK1A(%struct.A*) declare void @_Z4foo2PK1AS1_(%struct.A*, %struct.A*) +declare void @_Z3bar1P(i32) +declare void @_Z3fooPK1P(%struct.P*) +declare void @_ZN1BC1Ei(%struct.B*, i32) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) -declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare {}* @llvm.invariant.start(i64, i8* nocapture, ...) declare void @llvm.invariant.end({}*, i64, i8* nocapture) declare void @llvm.lifetime.start(i64, i8* nocapture) declare void @llvm.lifetime.end(i64, i8* nocapture) @@ -290,6 +333,7 @@ define internal void @_GLOBAL__sub_I_global() { entry: call void @__cxx_global_var_init() + call void @__cxx_global_var_init.3() call void @__cxx_global_var_init.2() call void @__cxx_global_var_init.1() ret void Index: test/Transforms/LoadElim/local-vars.ll =================================================================== --- test/Transforms/LoadElim/local-vars.ll +++ test/Transforms/LoadElim/local-vars.ll @@ -24,6 +24,8 @@ %struct.A = type { i32 } +%struct.B = type { i32, %struct.P } +%struct.P = type { i32 } ;; Example 1: Duplicate loads. ;; void ex1() { @@ -43,7 +45,7 @@ call void @_ZN1AC2Ei(%struct.A* %i, i32 %call) ; CHECKINL: store i32 {{.*}}, i32* %1 = bitcast %struct.A* %i to i8* - %2 = call {}* @llvm.invariant.start(i64 4, i8* %1) + %2 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* %1) ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* %3 = bitcast %struct.A* %agg.tmp to i8* %4 = bitcast %struct.A* %i to i8* @@ -88,7 +90,7 @@ call void @_ZN1AC2Ei(%struct.A* %i, i32 %call) ; CHECKINL: store i32 {{.*}}, i32* %1 = bitcast %struct.A* %i to i8* - %2 = call {}* @llvm.invariant.start(i64 4, i8* %1) + %2 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* %1) ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* %3 = bitcast %struct.A* %j to i8* call void @llvm.lifetime.start(i64 4, i8* %3) @@ -96,7 +98,7 @@ %5 = bitcast %struct.A* %i to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* %5, i64 4, i32 4, i1 false) %6 = bitcast %struct.A* %j to i8* - %7 = call {}* @llvm.invariant.start(i64 4, i8* %6) + %7 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* %6) ; CHECK-NOT: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* %8 = bitcast %struct.A* %agg.tmp to i8* %9 = bitcast %struct.A* %i to i8* @@ -145,7 +147,7 @@ call void @_ZN1AC2Ei(%struct.A* %i, i32 %call) ; CHECKINL: store i32 {{.*}}, i32* %1 = bitcast %struct.A* %i to i8* - %2 = call {}* @llvm.invariant.start(i64 4, i8* %1) + %2 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* %1) ; CHECKNOINL: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* ; CHECKINV3: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* ; CHECKINV-NOT: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* @@ -205,7 +207,7 @@ call void @_ZN1AC2Ei(%struct.A* %i, i32 %call) ; CHECKINL: store i32 {{.*}}, i32* %1 = bitcast %struct.A* %i to i8* - %2 = call {}* @llvm.invariant.start(i64 4, i8* %1) + %2 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* %1) ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* %3 = bitcast %struct.A* %k to i8* call void @llvm.lifetime.start(i64 4, i8* %3) @@ -273,7 +275,7 @@ call void @_ZN1AC2Ei(%struct.A* %i, i32 %call) ; CHECKINL: store i32 {{.*}}, i32* %1 = bitcast %struct.A* %i to i8* - %2 = call {}* @llvm.invariant.start(i64 4, i8* %1) + %2 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* %1) ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* %3 = bitcast %struct.A* %k to i8* call void @llvm.lifetime.start(i64 4, i8* %3) @@ -351,7 +353,7 @@ call void @_ZN1AC2Ei(%struct.A* %i, i32 %call) ; CHECKINL: store i32 {{.*}}, i32* %1 = bitcast %struct.A* %i to i8* - %2 = call {}* @llvm.invariant.start(i64 4, i8* %1) + %2 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* %1) ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* %3 = bitcast %struct.A* %j to i8* call void @llvm.lifetime.start(i64 4, i8* %3) @@ -359,7 +361,7 @@ %5 = bitcast %struct.A* %i to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* %5, i64 4, i32 4, i1 false) %6 = bitcast %struct.A* %j to i8* - %7 = call {}* @llvm.invariant.start(i64 4, i8* %6) + %7 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 4, i8* %6) ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* %8 = bitcast %struct.A* %agg.tmp to i8* %9 = bitcast %struct.A* %i to i8* @@ -409,12 +411,59 @@ ret void } +;; Example 1 with member objects. +;; void ex1m() { +;; A i(one()); +;; bar(i.b); // First load. +;; foo(&i.b); +;; bar(i.b); // No load. +define void @_Z4ex1mv() { +entry: + %i = alloca %struct.B + %agg.tmp = alloca %struct.P + %agg.tmp2 = alloca %struct.P + %0 = bitcast %struct.B* %i to i8* + call void @llvm.lifetime.start(i64 8, i8* %0) + %call = call i32 @_Z3onev() + call void @_ZN1BC1Ei(%struct.B* %i, i32 %call) + %1 = bitcast %struct.B* %i to i8* + %2 = call {}* (i64, i8*, ...) @llvm.invariant.start(i64 8, i8* %1, i64 8, i64 32) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* {{.*}}, + %b = getelementptr inbounds %struct.B, %struct.B* %i, i32 0, i32 1 + %3 = bitcast %struct.P* %agg.tmp to i8* + %4 = bitcast %struct.P* %b to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %4, i64 4, i32 4, i1 false) + %coerce.dive = getelementptr inbounds %struct.P, %struct.P* %agg.tmp, i32 0, i32 0 + %5 = load i32, i32* %coerce.dive + ; CHECKNOINL: load i32, i32* + ; CHECKINL: load i32, i32* + call void @_Z3bar1P(i32 %5) + %b1 = getelementptr inbounds %struct.B, %struct.B* %i, i32 0, i32 1 + call void @_Z3fooPK1P(%struct.P* %b1) + %b3 = getelementptr inbounds %struct.B, %struct.B* %i, i32 0, i32 1 + %6 = bitcast %struct.P* %agg.tmp2 to i8* + %7 = bitcast %struct.P* %b3 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* %7, i64 4, i32 4, i1 false) + %coerce.dive4 = getelementptr inbounds %struct.P, %struct.P* %agg.tmp2, i32 0, i32 0 + %8 = load i32, i32* %coerce.dive4 + ; CHECK-NOT: load i32, i32* + call void @_Z3bar1P(i32 %8) + call void @llvm.invariant.end({}* %2, i64 8, i8* %1) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + %9 = bitcast %struct.B* %i to i8* + call void @llvm.lifetime.end(i64 8, i8* %9) + ret void +} + declare i32 @_Z3onev() declare void @_Z3bar1A(i32) declare void @_Z3fooPK1A(%struct.A*) declare void @_Z4foo2PK1AS1_(%struct.A*, %struct.A*) +declare void @_Z3bar1P(i32) +declare void @_Z3fooPK1P(%struct.P*) +declare void @_ZN1BC1Ei(%struct.B*, i32) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) -declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare {}* @llvm.invariant.start(i64, i8* nocapture, ...) declare void @llvm.invariant.end({}*, i64, i8* nocapture) declare void @llvm.lifetime.start(i64, i8* nocapture) declare void @llvm.lifetime.end(i64, i8* nocapture)