Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -1324,7 +1324,7 @@ // Don't consider any GEPs through an i8* as natural unless the TargetTy is // an i8. - if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8)) + if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8)) return 0; Type *ElementTy = Ty->getElementType(); @@ -1425,7 +1425,8 @@ if (!OffsetPtr) { if (!Int8Ptr) { - Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(), + unsigned AS = Ptr->getType()->getPointerAddressSpace(); + Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AS), "raw_cast"); Int8PtrOffset = Offset; } @@ -2427,9 +2428,12 @@ assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr); bool IsDest = II.getRawDest() == OldPtr; + Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); + unsigned OtherAS = OtherPtr->getType()->getPointerAddressSpace(); + Type *OtherPtrTy = NewAllocaTy->getPointerTo(OtherAS); // Compute the relative offset within the transfer. - unsigned IntPtrWidth = DL.getPointerSizeInBits(); + unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS); APInt RelOffset(IntPtrWidth, NewBeginOffset - BeginOffset); unsigned Align = II.getAlignment(); @@ -2493,14 +2497,12 @@ // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after rewriting this instruction. - Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); if (AllocaInst *AI = dyn_cast(OtherPtr->stripInBoundsOffsets())) Pass.Worklist.insert(AI); if (EmitMemCpy) { - Type *OtherPtrTy = IsDest ? II.getRawSource()->getType() - : II.getRawDest()->getType(); + Type *OtherPtrTy = OtherPtr->getType(); // Compute the other pointer, folding as much as possible to produce // a single, simple GEP in most cases. @@ -2535,16 +2537,15 @@ IntegerType *SubIntTy = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; - Type *OtherPtrTy = NewAI.getType(); if (VecTy && !IsWholeAlloca) { if (NumElements == 1) OtherPtrTy = VecTy->getElementType(); else OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements); - OtherPtrTy = OtherPtrTy->getPointerTo(); + OtherPtrTy = OtherPtrTy->getPointerTo(OtherAS); } else if (IntTy && !IsWholeAlloca) { - OtherPtrTy = SubIntTy->getPointerTo(); + OtherPtrTy = SubIntTy->getPointerTo(OtherAS); } Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy); Index: test/Transforms/SROA/address-spaces.ll =================================================================== --- /dev/null +++ test/Transforms/SROA/address-spaces.ll @@ -0,0 +1,94 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) +declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture readonly, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1) +declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1) + + +; Make sure an illegal bitcast isn't introduced +define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1)* %b) { +; CHECK-LABEL: @test_address_space_1_1( +; CHECK: load <2 x i64> addrspace(1)* %a, align 2 +; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2 +; CHECK: ret void + %aa = alloca <2 x i64>, align 16 + %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)* + %aaptr = bitcast <2 x i64>* %aa to i8* + call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false) + %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)* + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false) + ret void +} + +define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16* %b) { +; CHECK-LABEL: @test_address_space_1_0( +; CHECK: load <2 x i64> addrspace(1)* %a, align 2 +; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2 +; CHECK: ret void + %aa = alloca <2 x i64>, align 16 + %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)* + %aaptr = bitcast <2 x i64>* %aa to i8* + call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false) + %bptr = bitcast i16* %b to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false) + ret void +} + +define void @test_address_space_0_1(<2 x i64>* %a, i16 addrspace(1)* %b) { +; CHECK-LABEL: @test_address_space_0_1( +; CHECK: load <2 x i64>* %a, align 2 +; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2 +; CHECK: ret void + %aa = alloca <2 x i64>, align 16 + %aptr = bitcast <2 x i64>* %a to i8* + %aaptr = bitcast <2 x i64>* %aa to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false) + %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)* + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false) + ret void +} + +%struct.struct_test_27.0.13 = type { i32, float, i64, i8, [4 x i32] } + +; Function Attrs: nounwind +define void @copy_struct([5 x i64] %in.coerce) { +; CHECK-LABEL: @copy_struct( +; CHECK-NOT: memcpy +for.end: + %in = alloca %struct.struct_test_27.0.13, align 8 + %0 = bitcast %struct.struct_test_27.0.13* %in to [5 x i64]* + store [5 x i64] %in.coerce, [5 x i64]* %0, align 8 + %scevgep9 = getelementptr %struct.struct_test_27.0.13* %in, i32 0, i32 4, i32 0 + %scevgep910 = bitcast i32* %scevgep9 to i8* + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* undef, i8* %scevgep910, i32 16, i32 4, i1 false) + ret void +} + +define void @test_address_space_gep_index(i8 addrspace(1)* %out) { +; CHECK-LABEL: @test_address_space_gep_index( +; CHECK-NOT: alloca + %a = alloca [16 x i8] + %raw1 = getelementptr inbounds [16 x i8]* %a, i32 0, i32 0 + %ptr1 = bitcast i8* %raw1 to double* + %raw2 = getelementptr inbounds [16 x i8]* %a, i32 0, i32 8 + %ptr2 = bitcast i8* %raw2 to double* + + call void @llvm.memcpy.p0i8.p1i8.i32(i8* %raw1, i8 addrspace(1)* %out, i32 16, i32 0, i1 false) +; CHECK: %[[val2:.*]] = load double addrspace(1)* %{{.*}}, align 1 +; CHECK: getelementptr inbounds i8 addrspace(1)* %out, i16 8 +; CHECK: %[[val1:.*]] = load double addrspace(1)* %{{.*}}, align 1 + %val1 = load double* %ptr2, align 1 + %val2 = load double* %ptr1, align 1 + + store double %val1, double* %ptr1, align 1 + store double %val2, double* %ptr2, align 1 + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %out, i8* %raw1, i32 16, i32 0, i1 false) +; CHECK: store double %[[val1]], double addrspace(1)* %{{.*}}, align 1 +; CHECK: getelementptr inbounds i8 addrspace(1)* %out, i16 8 +; CHECK: store double %[[val2]], double addrspace(1)* %{{.*}}, align 1 + ret void +; CHECK: ret void +} + Index: test/Transforms/SROA/basictest.ll =================================================================== --- test/Transforms/SROA/basictest.ll +++ test/Transforms/SROA/basictest.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -sroa -S | FileCheck %s ; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" declare void @llvm.lifetime.start(i64, i8* nocapture) declare void @llvm.lifetime.end(i64, i8* nocapture) @@ -404,6 +404,7 @@ } declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) nounwind declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind @@ -1150,6 +1151,24 @@ ; CHECK: ret } +define void @PR14105_as1({ [16 x i8] } addrspace(1)* %ptr) { +; Make sure this the right address space pointer is used for type check. +; CHECK-LABEL: @PR14105_as1( + +entry: + %a = alloca { [16 x i8] }, align 8 +; CHECK: alloca [16 x i8], align 8 + + %gep = getelementptr inbounds { [16 x i8] } addrspace(1)* %ptr, i64 -1 +; CHECK-NEXT: getelementptr inbounds { [16 x i8] } addrspace(1)* %ptr, i16 -1, i32 0, i64 0 + + %cast1 = bitcast { [16 x i8 ] } addrspace(1)* %gep to i8 addrspace(1)* + %cast2 = bitcast { [16 x i8 ] }* %a to i8* + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %cast1, i8* %cast2, i32 16, i32 8, i1 true) + ret void +; CHECK: ret +} + define void @PR14465() { ; Ensure that we don't crash when analyzing a alloca larger than the maximum ; integer type width (MAX_INT_BITS) supported by llvm (1048576*32 > (1<<23)-1). Index: test/Transforms/SROA/vector-promotion.ll =================================================================== --- test/Transforms/SROA/vector-promotion.ll +++ test/Transforms/SROA/vector-promotion.ll @@ -150,6 +150,53 @@ ; CHECK-NEXT: ret } +declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) nounwind + +; Same as test4 with a different sized address space pointer source. +define i32 @test4_as1(<4 x i32> %x, <4 x i32> %y, <4 x i32> addrspace(1)* %z) { +; CHECK-LABEL: @test4_as1( +entry: + %a = alloca [2 x <4 x i32>] +; CHECK-NOT: alloca + + %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0 + store <4 x i32> %x, <4 x i32>* %a.x + %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1 + store <4 x i32> %y, <4 x i32>* %a.y +; CHECK-NOT: store + + %a.y.cast = bitcast <4 x i32>* %a.y to i8* + %z.cast = bitcast <4 x i32> addrspace(1)* %z to i8 addrspace(1)* + call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.y.cast, i8 addrspace(1)* %z.cast, i32 16, i32 1, i1 false) +; CHECK-NOT: memcpy + + %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2 + %a.tmp1.cast = bitcast i32* %a.tmp1 to i8* + %z.tmp1 = getelementptr inbounds <4 x i32> addrspace(1)* %z, i16 0, i16 2 + %z.tmp1.cast = bitcast i32 addrspace(1)* %z.tmp1 to i8 addrspace(1)* + call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.tmp1.cast, i8 addrspace(1)* %z.tmp1.cast, i32 4, i32 1, i1 false) + %tmp1 = load i32* %a.tmp1 + %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3 + %tmp2 = load i32* %a.tmp2 + %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0 + %tmp3 = load i32* %a.tmp3 +; CHECK-NOT: memcpy +; CHECK: %[[load:.*]] = load <4 x i32> addrspace(1)* %z +; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32> addrspace(1)* %z, i64 0, i64 2 +; CHECK-NEXT: %[[element_load:.*]] = load i32 addrspace(1)* %[[gep]] +; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2 +; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2 +; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3 +; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0 + + %tmp4 = add i32 %tmp1, %tmp2 + %tmp5 = add i32 %tmp3, %tmp4 + ret i32 %tmp5 +; CHECK-NEXT: add +; CHECK-NEXT: add +; CHECK-NEXT: ret +} + define i32 @test5(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) { ; CHECK-LABEL: @test5( ; The same as the above, but with reversed source and destination for the