Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -1315,7 +1315,7 @@ // Don't consider any GEPs through an i8* as natural unless the TargetTy is // an i8. - if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8)) + if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8)) return 0; Type *ElementTy = Ty->getElementType(); @@ -1416,7 +1416,8 @@ if (!OffsetPtr) { if (!Int8Ptr) { - Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(), + unsigned AS = Ptr->getType()->getPointerAddressSpace(); + Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AS), "raw_cast"); Int8PtrOffset = Offset; } @@ -2092,6 +2093,8 @@ uint64_t Size = NewEndOffset - NewBeginOffset; + unsigned AS = LI.getPointerAddressSpace(); + Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8) : LI.getType(); bool IsPtrAdjusted = false; @@ -2105,7 +2108,7 @@ V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), LI.isVolatile(), "load"); } else { - Type *LTy = TargetTy->getPointerTo(); + Type *LTy = TargetTy->getPointerTo(AS); V = IRB.CreateAlignedLoad( getAdjustedAllocaPtr(IRB, NewBeginOffset, LTy), getOffsetTypeAlign(TargetTy, NewBeginOffset - NewAllocaBeginOffset), @@ -2130,7 +2133,7 @@ // the computed value, and then replace the placeholder with LI, leaving // LI only used for this computation. Value *Placeholder - = new LoadInst(UndefValue::get(LI.getType()->getPointerTo())); + = new LoadInst(UndefValue::get(LI.getType()->getPointerTo(AS))); V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset, "insert"); LI.replaceAllUsesWith(V); @@ -2241,8 +2244,9 @@ NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), SI.isVolatile()); } else { + unsigned AS = SI.getPointerAddressSpace(); Value *NewPtr = getAdjustedAllocaPtr(IRB, NewBeginOffset, - V->getType()->getPointerTo()); + V->getType()->getPointerTo(AS)); NewSI = IRB.CreateAlignedStore( V, NewPtr, getOffsetTypeAlign( V->getType(), NewBeginOffset - NewAllocaBeginOffset), @@ -2526,16 +2530,17 @@ IntegerType *SubIntTy = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; - Type *OtherPtrTy = NewAI.getType(); + unsigned OtherAS = OtherPtr->getType()->getPointerAddressSpace(); + Type *OtherPtrTy = NewAllocaTy->getPointerTo(OtherAS); if (VecTy && !IsWholeAlloca) { if (NumElements == 1) OtherPtrTy = VecTy->getElementType(); else OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements); - OtherPtrTy = OtherPtrTy->getPointerTo(); + OtherPtrTy = OtherPtrTy->getPointerTo(OtherAS); } else if (IntTy && !IsWholeAlloca) { - OtherPtrTy = SubIntTy->getPointerTo(); + OtherPtrTy = SubIntTy->getPointerTo(OtherAS); } Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, RelOffset, OtherPtrTy); Index: test/Transforms/SROA/address-spaces.ll =================================================================== --- /dev/null +++ test/Transforms/SROA/address-spaces.ll @@ -0,0 +1,94 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) +declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture readonly, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1) +declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1) + + +; Make sure an illegal bitcast isn't introduced +define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1)* %b) { +; CHECK-LABEL: @test_address_space_1_1( +; CHECK: load <2 x i64> addrspace(1)* %a, align 2 +; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2 +; CHECK: ret void + %aa = alloca <2 x i64>, align 16 + %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)* + %aaptr = bitcast <2 x i64>* %aa to i8* + call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false) + %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)* + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false) + ret void +} + +define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16* %b) { +; CHECK-LABEL: @test_address_space_1_0( +; CHECK: load <2 x i64> addrspace(1)* %a, align 2 +; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2 +; CHECK: ret void + %aa = alloca <2 x i64>, align 16 + %aptr = bitcast <2 x i64> addrspace(1)* %a to i8 addrspace(1)* + %aaptr = bitcast <2 x i64>* %aa to i8* + call void @llvm.memcpy.p0i8.p1i8.i32(i8* %aaptr, i8 addrspace(1)* %aptr, i32 16, i32 2, i1 false) + %bptr = bitcast i16* %b to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false) + ret void +} + +define void @test_address_space_0_1(<2 x i64>* %a, i16 addrspace(1)* %b) { +; CHECK-LABEL: @test_address_space_0_1( +; CHECK: load <2 x i64>* %a, align 2 +; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2 +; CHECK: ret void + %aa = alloca <2 x i64>, align 16 + %aptr = bitcast <2 x i64>* %a to i8* + %aaptr = bitcast <2 x i64>* %aa to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false) + %bptr = bitcast i16 addrspace(1)* %b to i8 addrspace(1)* + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %bptr, i8* %aaptr, i32 16, i32 2, i1 false) + ret void +} + +%struct.struct_test_27.0.13 = type { i32, float, i64, i8, [4 x i32] } + +; Function Attrs: nounwind +define void @copy_struct([5 x i64] %in.coerce) { +; CHECK-LABEL: @copy_struct( +; CHECK-NOT: memcpy +for.end: + %in = alloca %struct.struct_test_27.0.13, align 8 + %0 = bitcast %struct.struct_test_27.0.13* %in to [5 x i64]* + store [5 x i64] %in.coerce, [5 x i64]* %0, align 8 + %scevgep9 = getelementptr %struct.struct_test_27.0.13* %in, i32 0, i32 4, i32 0 + %scevgep910 = bitcast i32* %scevgep9 to i8* + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* undef, i8* %scevgep910, i32 16, i32 4, i1 false) + ret void +} + +define void @test_address_space_gep_index(i8 addrspace(1)* %out) { +; CHECK-LABEL: @test_address_space_gep_index( +; CHECK-NOT: alloca + %a = alloca [16 x i8] + %raw1 = getelementptr inbounds [16 x i8]* %a, i32 0, i32 0 + %ptr1 = bitcast i8* %raw1 to double* + %raw2 = getelementptr inbounds [16 x i8]* %a, i32 0, i32 8 + %ptr2 = bitcast i8* %raw2 to double* + + call void @llvm.memcpy.p0i8.p1i8.i32(i8* %raw1, i8 addrspace(1)* %out, i32 16, i32 0, i1 false) +; CHECK: %[[val2:.*]] = load double addrspace(1)* %{{.*}}, align 1 +; CHECK: getelementptr inbounds i8 addrspace(1)* %out, i16 8 +; CHECK: %[[val1:.*]] = load double addrspace(1)* %{{.*}}, align 1 + %val1 = load double* %ptr2, align 1 + %val2 = load double* %ptr1, align 1 + + store double %val1, double* %ptr1, align 1 + store double %val2, double* %ptr2, align 1 + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %out, i8* %raw1, i32 16, i32 0, i1 false) +; CHECK: store double %[[val1]], double addrspace(1)* %{{.*}}, align 1 +; CHECK: getelementptr inbounds i8 addrspace(1)* %out, i16 8 +; CHECK: store double %[[val2]], double addrspace(1)* %{{.*}}, align 1 + ret void +; CHECK: ret void +} + Index: test/Transforms/SROA/alignment.ll =================================================================== --- test/Transforms/SROA/alignment.ll +++ test/Transforms/SROA/alignment.ll @@ -1,7 +1,9 @@ ; RUN: opt < %s -sroa -S | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*, i8*, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p1i8.i32(i8*, i8 addrspace(1)*, i32, i32, i1) define void @test1({ i8, i8 }* %a, { i8, i8 }* %b) { ; CHECK-LABEL: @test1( @@ -28,6 +30,30 @@ ret void } +define void @test1_as1({ i8, i8 } addrspace(1)* %a, { i8, i8 } addrspace(1)* %b) { +; CHECK-LABEL: @test1_as1( +; CHECK: %[[gep_a0:.*]] = getelementptr inbounds { i8, i8 } addrspace(1)* %a, i16 0, i32 0 +; CHECK: %[[a0:.*]] = load i8 addrspace(1)* %[[gep_a0]], align 16 +; CHECK: %[[gep_a1:.*]] = getelementptr inbounds { i8, i8 } addrspace(1)* %a, i16 0, i32 1 +; CHECK: %[[a1:.*]] = load i8 addrspace(1)* %[[gep_a1]], align 1 +; CHECK: %[[gep_b0:.*]] = getelementptr inbounds { i8, i8 } addrspace(1)* %b, i16 0, i32 0 +; CHECK: store i8 %[[a0]], i8 addrspace(1)* %[[gep_b0]], align 16 +; CHECK: %[[gep_b1:.*]] = getelementptr inbounds { i8, i8 } addrspace(1)* %b, i16 0, i32 1 +; CHECK: store i8 %[[a1]], i8 addrspace(1)* %[[gep_b1]], align 1 +; CHECK: ret void +entry: + %alloca = alloca { i8, i8 }, align 16 + %gep_a = getelementptr { i8, i8 } addrspace(1)* %a, i32 0, i32 0 + %gep_alloca = getelementptr { i8, i8 }* %alloca, i32 0, i32 0 + %gep_b = getelementptr { i8, i8 } addrspace(1)* %b, i32 0, i32 0 + + store i8 420, i8* %gep_alloca, align 16 + + call void @llvm.memcpy.p0i8.p1i8.i32(i8* %gep_alloca, i8 addrspace(1)* %gep_a, i32 2, i32 16, i1 false) + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %gep_b, i8* %gep_alloca, i32 2, i32 16, i1 false) + ret void +} + define void @test2() { ; CHECK-LABEL: @test2( ; CHECK: alloca i16