Index: llvm/trunk/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/trunk/lib/Analysis/InlineCost.cpp +++ llvm/trunk/lib/Analysis/InlineCost.cpp @@ -371,7 +371,7 @@ /// Returns false if unable to compute the offset for any reason. Respects any /// simplified values known during the analysis of this callsite. bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { - unsigned IntPtrWidth = DL.getPointerSizeInBits(); + unsigned IntPtrWidth = DL.getPointerTypeSizeInBits(GEP.getType()); assert(IntPtrWidth == Offset.getBitWidth()); for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); @@ -450,8 +450,12 @@ // SROA if it *might* be used in an inappropriate manner. // Phi nodes are always zero-cost. - - APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits()); + // FIXME: Pointer sizes may differ between different address spaces, so do we + // need to use correct address space in the call to getPointerSizeInBits here? + // Or could we skip the getPointerSizeInBits call completely? As far as I can + // see the ZeroOffset is used as a dummy value, so we can probably use any + // bit width for the ZeroOffset? + APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0)); bool CheckSROA = I.getType()->isPointerTy(); // Track the constant or pointer with constant offset we've seen so far. @@ -641,7 +645,8 @@ // Track base/offset pairs when converted to a plain integer provided the // integer is large enough to represent the pointer. unsigned IntegerSize = I.getType()->getScalarSizeInBits(); - if (IntegerSize >= DL.getPointerSizeInBits()) { + unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace(); + if (IntegerSize >= DL.getPointerSizeInBits(AS)) { std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); if (BaseAndOffset.first) @@ -674,7 +679,7 @@ // modifications provided the integer is not too large. Value *Op = I.getOperand(0); unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); - if (IntegerSize <= DL.getPointerSizeInBits()) { + if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) { std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Op); if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; @@ -1608,7 +1613,8 @@ if (!V->getType()->isPointerTy()) return nullptr; - unsigned IntPtrWidth = DL.getPointerSizeInBits(); + unsigned AS = V->getType()->getPointerAddressSpace(); + unsigned IntPtrWidth = DL.getPointerSizeInBits(AS); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -1632,7 +1638,7 @@ assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (Visited.insert(V).second); - Type *IntPtrTy = DL.getIntPtrType(V->getContext()); + Type *IntPtrTy = DL.getIntPtrType(V->getContext(), AS); return cast(ConstantInt::get(IntPtrTy, Offset)); } @@ -1904,7 +1910,8 @@ // size of the byval type by the target's pointer size. PointerType *PTy = cast(CS.getArgument(I)->getType()); unsigned TypeSize = DL.getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = DL.getPointerSizeInBits(); + unsigned AS = PTy->getAddressSpace(); + unsigned PointerSize = DL.getPointerSizeInBits(AS); // Ceiling division. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; Index: llvm/trunk/test/Transforms/Inline/byval.ll =================================================================== --- llvm/trunk/test/Transforms/Inline/byval.ll +++ llvm/trunk/test/Transforms/Inline/byval.ll @@ -1,6 +1,8 @@ ; RUN: opt < %s -inline -S | FileCheck %s ; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s +target datalayout = "p:32:32-p1:64:64-p2:16:16-n16:32:64" + ; Inlining a byval struct should cause an explicit copy into an alloca. %struct.ss = type { i32, i64 } @@ -128,3 +130,27 @@ ; CHECK: store i32 0, i32* getelementptr inbounds (%struct.S0, %struct.S0* @b, i64 0, i32 0), align 4 ; CHECK-NOT: load i32, i32* getelementptr inbounds (%struct.S0, %struct.S0* @b, i64 0, i32 0), align 4 } + +%struct.S1 = type { i32 } + +@d = addrspace(1) global %struct.S1 { i32 1 }, align 4 +@c = common addrspace(1) global i32 0, align 4 + +define internal void @f5_as1(%struct.S1 addrspace(1)* byval nocapture readonly align 4 %p) { +entry: + store i32 0, i32 addrspace(1)* getelementptr inbounds (%struct.S1, %struct.S1 addrspace(1)* @d, i64 0, i32 0), align 4 + %f2 = getelementptr inbounds %struct.S1, %struct.S1 addrspace(1)* %p, i64 0, i32 0 + %0 = load i32, i32 addrspace(1)* %f2, align 4 + store i32 %0, i32 addrspace(1)* @c, align 4 + ret void +} + +define i32 @test5_as1() { +entry: + tail call void @f5_as1(%struct.S1 addrspace(1)* byval align 4 @d) + %0 = load i32, i32 addrspace(1)* @c, align 4 + ret i32 %0 +; CHECK: @test5_as1() +; CHECK: store i32 0, i32 addrspace(1)* getelementptr inbounds (%struct.S1, %struct.S1 addrspace(1)* @d, i64 0, i32 0), align 4 +; CHECK-NOT: load i32, i32 addrspace(1)* getelementptr inbounds (%struct.S1, %struct.S1 addrspace(1)* @d, i64 0, i32 0), align 4 +} Index: llvm/trunk/test/Transforms/Inline/ptr-diff.ll =================================================================== --- llvm/trunk/test/Transforms/Inline/ptr-diff.ll +++ llvm/trunk/test/Transforms/Inline/ptr-diff.ll @@ -30,6 +30,33 @@ ret i32 %t } +define i32 @outer1_as1(i32 addrspace(1)* %ptr) { +; CHECK-LABEL: @outer1_as1( +; CHECK-NOT: call +; CHECK: ret i32 + %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i32 0 + %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i32 42 + %result = call i32 @inner1_as1(i32 addrspace(1)* %ptr1, i32 addrspace(1)* %ptr2) + ret i32 %result +} + +; Make sure that the address space's larger size makes the ptrtoints +; not no-ops preventing inlining +define i32 @inner1_as1(i32 addrspace(1)* %begin, i32 addrspace(1)* %end) { + %begin.i = ptrtoint i32 addrspace(1)* %begin to i32 + %end.i = ptrtoint i32 addrspace(1)* %end to i32 + %distance = sub i32 %end.i, %begin.i + %icmp = icmp sle i32 %distance, 42 + br i1 %icmp, label %then, label %else + +then: + ret i32 3 + +else: + %t = load i32, i32 addrspace(1)* %begin + ret i32 %t +} + define i32 @outer2(i32* %ptr) { ; Test that an inbounds GEP disables this -- it isn't safe in general as ; wrapping changes the behavior of lessthan and greaterthan comparisons. @@ -59,6 +86,30 @@ ret i32 %t } +define i32 @outer3(i16* addrspace(1)* %ptr) { +; CHECK-LABEL: @outer3( +; CHECK-NOT: call i32 +; CHECK: ret i32 3 +; CHECK-LABEL: @inner3( + %result = call i32 @inner3(i16* addrspace(1)* %ptr) + ret i32 %result +} + +define i32 @inner3(i16* addrspace(1)* %ptr) { + call void @extern() + %ptr.i = ptrtoint i16* addrspace(1)* %ptr to i64 + %distance = sub i64 %ptr.i, %ptr.i + %icmp = icmp eq i64 %distance, 0 + br i1 %icmp, label %then, label %else + +then: + ret i32 3 + +else: + ret i32 5 +} + + ; The inttoptrs are free since it is a smaller integer to a larger ; pointer size define i32 @inttoptr_free_cost(i32 %a, i32 %b, i32 %c) {