Index: include/llvm/Analysis/PtrUseVisitor.h =================================================================== --- include/llvm/Analysis/PtrUseVisitor.h +++ include/llvm/Analysis/PtrUseVisitor.h @@ -256,6 +256,10 @@ enqueueUsers(BC); } + void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { + enqueueUsers(ASC); + } + void visitPtrToIntInst(PtrToIntInst &I) { PI.setEscaped(&I); } Index: lib/Analysis/PtrUseVisitor.cpp =================================================================== --- lib/Analysis/PtrUseVisitor.cpp +++ lib/Analysis/PtrUseVisitor.cpp @@ -34,5 +34,11 @@ if (!IsOffsetKnown) return false; - return GEPI.accumulateConstantOffset(DL, Offset); + APInt TmpOffset(DL.getIndexTypeSizeInBits(GEPI.getType()), 0); + if (GEPI.accumulateConstantOffset(DL, TmpOffset)) { + Offset += TmpOffset.sextOrTrunc(Offset.getBitWidth()); + return true; + } + + return false; } Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -713,6 +713,13 @@ return Base::visitBitCastInst(BC); } + void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { + if (ASC.use_empty()) + return markAsDead(ASC); + + return Base::visitAddrSpaceCastInst(ASC); + } + void visitGetElementPtrInst(GetElementPtrInst &GEPI) { if (GEPI.use_empty()) return markAsDead(GEPI); @@ -776,7 +783,10 @@ if (!IsOffsetKnown) return PI.setAborted(&LI); - const DataLayout &DL = LI.getModule()->getDataLayout(); + if (LI.isVolatile() && + LI.getPointerAddressSpace() != DL.getAllocaAddrSpace()) + return PI.setAborted(&LI); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); } @@ -788,7 +798,10 @@ if (!IsOffsetKnown) return PI.setAborted(&SI); - const DataLayout &DL = SI.getModule()->getDataLayout(); + if (SI.isVolatile() && + SI.getPointerAddressSpace() != DL.getAllocaAddrSpace()) + return PI.setAborted(&SI); + uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); // If this memory access can be shown to *statically* extend outside the @@ -823,6 +836,11 @@ if (!IsOffsetKnown) return PI.setAborted(&II); + // Don't replace this with a store with a different address space. TODO: + // Use a store with the casted new alloca? + if (II.isVolatile() && II.getDestAddressSpace() != DL.getAllocaAddrSpace()) + return PI.setAborted(&II); + insertUse(II, Offset, Length ? Length->getLimitedValue() : AllocSize - Offset.getLimitedValue(), (bool)Length); @@ -842,6 +860,13 @@ if (!IsOffsetKnown) return PI.setAborted(&II); + // Don't replace this with a load/store with a different address space. + // TODO: Use a store with the casted new alloca? + if (II.isVolatile() && + (II.getDestAddressSpace() != DL.getAllocaAddrSpace() || + II.getSourceAddressSpace() != DL.getAllocaAddrSpace())) + return PI.setAborted(&II); + // This side of the transfer is completely out-of-bounds, and so we can // nuke the entire transfer. However, we also need to nuke the other side // if already added to our partitions. @@ -949,7 +974,7 @@ if (!GEP->hasAllZeroIndices()) return GEP; } else if (!isa(I) && !isa(I) && - !isa(I)) { + !isa(I) && !isa(I)) { return I; } @@ -1561,7 +1586,8 @@ Value *Int8Ptr = nullptr; APInt Int8PtrOffset(Offset.getBitWidth(), 0); - Type *TargetTy = PointerTy->getPointerElementType(); + PointerType *TargetPtrTy = cast(PointerTy); + Type *TargetTy = TargetPtrTy->getElementType(); do { // First fold any existing GEPs into the offset. @@ -1630,8 +1656,11 @@ Ptr = OffsetPtr; // On the off chance we were targeting i8*, guard the bitcast here. - if (Ptr->getType() != PointerTy) - Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix + "sroa_cast"); + if (cast(Ptr->getType()) != TargetPtrTy) { + Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, + TargetPtrTy, + NamePrefix + "sroa_cast"); + } return Ptr; } @@ -3082,8 +3111,9 @@ continue; } - assert(isa(I) || isa(I) || - isa(I) || isa(I)); + assert(isa(I) || isa(I) || + isa(I) || isa(I) || + isa(I)); for (User *U : I->users()) if (Visited.insert(cast(U)).second) Uses.push_back(cast(U)); @@ -3384,6 +3414,11 @@ return false; } + bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { + enqueueUsers(ASC); + return false; + } + bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { enqueueUsers(GEPI); return false; Index: test/Transforms/SROA/addrspacecast.ll =================================================================== --- test/Transforms/SROA/addrspacecast.ll +++ test/Transforms/SROA/addrspacecast.ll @@ -10,12 +10,7 @@ define i64 @alloca_addrspacecast_bitcast(i64 %X) { ; CHECK-LABEL: @alloca_addrspacecast_bitcast( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A:%.*]] = alloca [8 x i8] -; CHECK-NEXT: [[A_CAST:%.*]] = addrspacecast [8 x i8]* [[A]] to [8 x i8] addrspace(1)* -; CHECK-NEXT: [[B:%.*]] = bitcast [8 x i8] addrspace(1)* [[A_CAST]] to i64 addrspace(1)* -; CHECK-NEXT: store i64 [[X:%.*]], i64 addrspace(1)* [[B]] -; CHECK-NEXT: [[Z:%.*]] = load i64, i64 addrspace(1)* [[B]] -; CHECK-NEXT: ret i64 [[Z]] +; CHECK-NEXT: ret i64 [[X:%.*]] ; entry: %A = alloca [8 x i8] @@ -29,12 +24,7 @@ define i64 @alloca_bitcast_addrspacecast(i64 %X) { ; CHECK-LABEL: @alloca_bitcast_addrspacecast( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A:%.*]] = alloca [8 x i8] -; CHECK-NEXT: [[A_CAST:%.*]] = bitcast [8 x i8]* [[A]] to i64* -; CHECK-NEXT: [[B:%.*]] = addrspacecast i64* [[A_CAST]] to i64 addrspace(1)* -; CHECK-NEXT: store i64 [[X:%.*]], i64 addrspace(1)* [[B]] -; CHECK-NEXT: [[Z:%.*]] = load i64, i64 addrspace(1)* [[B]] -; CHECK-NEXT: ret i64 [[Z]] +; CHECK-NEXT: ret i64 [[X:%.*]] ; entry: %A = alloca [8 x i8] @@ -48,15 +38,7 @@ define i64 @alloca_addrspacecast_gep(i64 %X) { ; CHECK-LABEL: @alloca_addrspacecast_gep( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_AS0:%.*]] = alloca [256 x i8], align 4 -; CHECK-NEXT: [[GEPA_AS0:%.*]] = getelementptr [256 x i8], [256 x i8]* [[A_AS0]], i16 0, i16 32 -; CHECK-NEXT: [[GEPA_AS0_BC:%.*]] = bitcast i8* [[GEPA_AS0]] to i64* -; CHECK-NEXT: store i64 [[X:%.*]], i64* [[GEPA_AS0_BC]], align 4 -; CHECK-NEXT: [[A_AS1:%.*]] = addrspacecast [256 x i8]* [[A_AS0]] to [256 x i8] addrspace(1)* -; CHECK-NEXT: [[GEPA_AS1:%.*]] = getelementptr [256 x i8], [256 x i8] addrspace(1)* [[A_AS1]], i16 0, i16 32 -; CHECK-NEXT: [[GEPA_AS1_BC:%.*]] = bitcast i8 addrspace(1)* [[GEPA_AS1]] to i64 addrspace(1)* -; CHECK-NEXT: [[Z:%.*]] = load i64, i64 addrspace(1)* [[GEPA_AS1_BC]], align 4 -; CHECK-NEXT: ret i64 [[Z]] +; CHECK-NEXT: ret i64 [[X:%.*]] ; entry: %A.as0 = alloca [256 x i8], align 4 @@ -76,13 +58,7 @@ define i64 @alloca_gep_addrspacecast(i64 %X) { ; CHECK-LABEL: @alloca_gep_addrspacecast( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_AS0:%.*]] = alloca [256 x i8], align 4 -; CHECK-NEXT: [[GEPA_AS0:%.*]] = getelementptr [256 x i8], [256 x i8]* [[A_AS0]], i16 0, i16 32 -; CHECK-NEXT: [[GEPA_AS0_BC:%.*]] = bitcast i8* [[GEPA_AS0]] to i64* -; CHECK-NEXT: store i64 [[X:%.*]], i64* [[GEPA_AS0_BC]], align 4 -; CHECK-NEXT: [[GEPA_AS1_BC:%.*]] = addrspacecast i64* [[GEPA_AS0_BC]] to i64 addrspace(1)* -; CHECK-NEXT: [[Z:%.*]] = load i64, i64 addrspace(1)* [[GEPA_AS1_BC]], align 4 -; CHECK-NEXT: ret i64 [[Z]] +; CHECK-NEXT: ret i64 [[X:%.*]] ; entry: %A.as0 = alloca [256 x i8], align 4 @@ -99,16 +75,7 @@ define i64 @alloca_gep_addrspacecast_gep(i64 %X) { ; CHECK-LABEL: @alloca_gep_addrspacecast_gep( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_AS0:%.*]] = alloca [256 x i8], align 4 -; CHECK-NEXT: [[GEPA_AS0:%.*]] = getelementptr [256 x i8], [256 x i8]* [[A_AS0]], i16 0, i16 32 -; CHECK-NEXT: [[GEPA_AS0_BC:%.*]] = bitcast i8* [[GEPA_AS0]] to i64* -; CHECK-NEXT: store i64 [[X:%.*]], i64* [[GEPA_AS0_BC]], align 4 -; CHECK-NEXT: [[GEPB_AS0:%.*]] = getelementptr [256 x i8], [256 x i8]* [[A_AS0]], i16 0, i16 16 -; CHECK-NEXT: [[GEPB_AS1:%.*]] = addrspacecast i8* [[GEPB_AS0]] to i8 addrspace(1)* -; CHECK-NEXT: [[GEPC_AS1:%.*]] = getelementptr i8, i8 addrspace(1)* [[GEPB_AS1]], i16 16 -; CHECK-NEXT: [[GEPC_AS1_BC:%.*]] = bitcast i8 addrspace(1)* [[GEPC_AS1]] to i64 addrspace(1)* -; CHECK-NEXT: [[Z:%.*]] = load i64, i64 addrspace(1)* [[GEPC_AS1_BC]], align 4 -; CHECK-NEXT: ret i64 [[Z]] +; CHECK-NEXT: ret i64 [[X:%.*]] ; entry: %A.as0 = alloca [256 x i8], align 4 @@ -287,11 +254,6 @@ define void @select_addrspacecast(i1 %a, i1 %b) { ; CHECK-LABEL: @select_addrspacecast( -; CHECK-NEXT: [[C:%.*]] = alloca i64, align 8 -; CHECK-NEXT: [[P_0_C:%.*]] = select i1 undef, i64* [[C]], i64* [[C]] -; CHECK-NEXT: [[ASC:%.*]] = addrspacecast i64* [[P_0_C]] to i64 addrspace(1)* -; CHECK-NEXT: [[COND_IN:%.*]] = select i1 undef, i64 addrspace(1)* [[ASC]], i64 addrspace(1)* [[ASC]] -; CHECK-NEXT: [[COND:%.*]] = load i64, i64 addrspace(1)* [[COND_IN]], align 8 ; CHECK-NEXT: ret void ; %c = alloca i64, align 8 @@ -306,9 +268,8 @@ define void @select_addrspacecast_const_op(i1 %a, i1 %b) { ; CHECK-LABEL: @select_addrspacecast_const_op( ; CHECK-NEXT: [[C:%.*]] = alloca i64, align 8 -; CHECK-NEXT: [[P_0_C:%.*]] = select i1 undef, i64* [[C]], i64* [[C]] -; CHECK-NEXT: [[ASC:%.*]] = addrspacecast i64* [[P_0_C]] to i64 addrspace(1)* -; CHECK-NEXT: [[COND_IN:%.*]] = select i1 undef, i64 addrspace(1)* [[ASC]], i64 addrspace(1)* null +; CHECK-NEXT: [[C_0_ASC_SROA_CAST:%.*]] = addrspacecast i64* [[C]] to i64 addrspace(1)* +; CHECK-NEXT: [[COND_IN:%.*]] = select i1 undef, i64 addrspace(1)* [[C_0_ASC_SROA_CAST]], i64 addrspace(1)* null ; CHECK-NEXT: [[COND:%.*]] = load i64, i64 addrspace(1)* [[COND_IN]], align 8 ; CHECK-NEXT: ret void ; @@ -325,11 +286,8 @@ define void @select_addrspacecast_gv(i1 %a, i1 %b) { ; CHECK-LABEL: @select_addrspacecast_gv( -; CHECK-NEXT: [[C:%.*]] = alloca i64, align 8 -; CHECK-NEXT: [[P_0_C:%.*]] = select i1 undef, i64* [[C]], i64* [[C]] -; CHECK-NEXT: [[ASC:%.*]] = addrspacecast i64* [[P_0_C]] to i64 addrspace(1)* -; CHECK-NEXT: [[COND_IN:%.*]] = select i1 undef, i64 addrspace(1)* [[ASC]], i64 addrspace(1)* @gv -; CHECK-NEXT: [[COND:%.*]] = load i64, i64 addrspace(1)* [[COND_IN]], align 8 +; CHECK-NEXT: [[COND_SROA_SPECULATE_LOAD_FALSE:%.*]] = load i64, i64 addrspace(1)* @gv, align 8 +; CHECK-NEXT: [[COND_SROA_SPECULATED:%.*]] = select i1 undef, i64 undef, i64 [[COND_SROA_SPECULATE_LOAD_FALSE]] ; CHECK-NEXT: ret void ; %c = alloca i64, align 8 Index: test/Transforms/SROA/basictest.ll =================================================================== --- test/Transforms/SROA/basictest.ll +++ test/Transforms/SROA/basictest.ll @@ -65,6 +65,67 @@ ret i64 %Z } +define i64 @test2_addrspacecast(i64 %X) { +; CHECK-LABEL: @test2_addrspacecast( +; CHECK-NOT: alloca +; CHECK: ret i64 %X + +entry: + %A = alloca [8 x i8] + %B = addrspacecast [8 x i8]* %A to i64 addrspace(1)* + store i64 %X, i64 addrspace(1)* %B + br label %L2 + +L2: + %Z = load i64, i64 addrspace(1)* %B + ret i64 %Z +} + +define i64 @test2_addrspacecast_gep(i64 %X, i16 %idx) { +; CHECK-LABEL: @test2_addrspacecast_gep( +; CHECK-NOT: alloca +; CHECK: ret i64 %X + +entry: + %A = alloca [256 x i8] + %B = addrspacecast [256 x i8]* %A to i64 addrspace(1)* + %gepA = getelementptr [256 x i8], [256 x i8]* %A, i16 0, i16 32 + %gepB = getelementptr i64, i64 addrspace(1)* %B, i16 4 + store i64 %X, i64 addrspace(1)* %gepB, align 1 + br label %L2 + +L2: + %gepA.bc = bitcast i8* %gepA to i64* + %Z = load i64, i64* %gepA.bc, align 1 + ret i64 %Z +} + +; Avoid crashing when load/storing at at different offsets. +define i64 @test2_addrspacecast_gep_offset(i64 %X) { +; CHECK-LABEL: @test2_addrspacecast_gep_offset( +; CHECK: %A.sroa.0 = alloca [10 x i8] +; CHECK: %A.sroa.0.2.gepB.sroa_idx = getelementptr inbounds [10 x i8], [10 x i8]* %A.sroa.0, i16 0, i16 2 +; CHECK-NEXT: %A.sroa.0.2.gepB.sroa_cast = addrspacecast i8* %A.sroa.0.2.gepB.sroa_idx to i64 addrspace(1)* +; CHECK-NEXT: store i64 %X, i64 addrspace(1)* %A.sroa.0.2.gepB.sroa_cast, align 1 +; CHECK: br + +; CHECK: %A.sroa.0.0.gepA.bc.sroa_cast = bitcast [10 x i8]* %A.sroa.0 to i64* +; CHECK: %A.sroa.0.0.A.sroa.0.30.Z = load i64, i64* %A.sroa.0.0.gepA.bc.sroa_cast, align 1 +; CHECK-NEXT: ret +entry: + %A = alloca [256 x i8] + %B = addrspacecast [256 x i8]* %A to i64 addrspace(1)* + %gepA = getelementptr [256 x i8], [256 x i8]* %A, i16 0, i16 30 + %gepB = getelementptr i64, i64 addrspace(1)* %B, i16 4 + store i64 %X, i64 addrspace(1)* %gepB, align 1 + br label %L2 + +L2: + %gepA.bc = bitcast i8* %gepA to i64* + %Z = load i64, i64* %gepA.bc, align 1 + ret i64 %Z +} + define void @test3(i8* %dst, i8* align 8 %src) { ; CHECK-LABEL: @test3( @@ -426,6 +487,25 @@ ret i16 %val } +define i16 @test5_multi_addrspace_access() { +; CHECK-LABEL: @test5_multi_addrspace_access( +; CHECK-NOT: alloca float +; CHECK: %[[cast:.*]] = bitcast float 0.0{{.*}} to i32 +; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[cast]], 16 +; CHECK-NEXT: %[[trunc:.*]] = trunc i32 %[[shr]] to i16 +; CHECK-NEXT: ret i16 %[[trunc]] + +entry: + %a = alloca [4 x i8] + %fptr = bitcast [4 x i8]* %a to float* + %fptr.as1 = addrspacecast float* %fptr to float addrspace(1)* + store float 0.0, float addrspace(1)* %fptr.as1 + %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 2 + %iptr = bitcast i8* %ptr to i16* + %val = load i16, i16* %iptr + ret i16 %val +} + define i32 @test6() { ; CHECK-LABEL: @test6( ; CHECK: alloca i32 @@ -825,6 +905,27 @@ ret i32 undef } +declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) nounwind + +define i32 @test19_addrspacecast(%opaque* %x) { +; This input will cause us to try to compute a natural GEP when rewriting +; pointers in such a way that we try to GEP through the opaque type. Previously, +; a check for an unsized type was missing and this crashed. Ensure it behaves +; reasonably now. +; CHECK-LABEL: @test19_addrspacecast( +; CHECK-NOT: alloca +; CHECK: ret i32 undef + +entry: + %a = alloca { i64, i8* } + %cast1 = addrspacecast %opaque* %x to i8 addrspace(1)* + %cast2 = bitcast { i64, i8* }* %a to i8* + call void @llvm.memcpy.p0i8.p1i8.i32(i8* %cast2, i8 addrspace(1)* %cast1, i32 16, i32 1, i1 false) + %gep = getelementptr inbounds { i64, i8* }, { i64, i8* }* %a, i32 0, i32 0 + %val = load i64, i64* %gep + ret i32 undef +} + define i32 @test20() { ; Ensure we can track negative offsets (before the beginning of the alloca) and ; negative relative offsets from offsets starting past the end of the alloca. @@ -1154,14 +1255,15 @@ define void @PR14105_as1({ [16 x i8] } addrspace(1)* %ptr) { ; Make sure this the right address space pointer is used for type check. ; CHECK-LABEL: @PR14105_as1( +; CHECK: alloca { [16 x i8] }, align 8 +; CHECK-NEXT: %gep = getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i64 -1 +; CHECK-NEXT: %cast1 = bitcast { [16 x i8] } addrspace(1)* %gep to i8 addrspace(1)* +; CHECK-NEXT: %cast2 = bitcast { [16 x i8] }* %a to i8* +; CHECK-NEXT: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 8 %cast1, i8* align 8 %cast2, i32 16, i1 true) entry: %a = alloca { [16 x i8] }, align 8 -; CHECK: alloca [16 x i8], align 8 - %gep = getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i64 -1 -; CHECK-NEXT: getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i16 -1, i32 0, i16 0 - %cast1 = bitcast { [16 x i8 ] } addrspace(1)* %gep to i8 addrspace(1)* %cast2 = bitcast { [16 x i8 ] }* %a to i8* call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 8 %cast1, i8* align 8 %cast2, i32 16, i1 true) Index: test/Transforms/SROA/phi-and-select.ll =================================================================== --- test/Transforms/SROA/phi-and-select.ll +++ test/Transforms/SROA/phi-and-select.ll @@ -53,6 +53,56 @@ ret i32 %result } +; If bitcast isn't considered a safe phi/select use, the alloca +; remains as an array. +; FIXME: Why isn't this identical to test2? + +; CHECK-LABEL: @test2_bitcast( +; CHECK: alloca i32 +; CHECK-NEXT: alloca i32 + +; CHECK: %select = select i1 %cond, i32* %a.sroa.3, i32* %a.sroa.0 +; CHECK-NEXT: %select.bc = bitcast i32* %select to float* +; CHECK-NEXT: %result = load float, float* %select.bc, align 4 +define float @test2_bitcast() { +entry: + %a = alloca [2 x i32] + %a0 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0 + %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1 + store i32 0, i32* %a0 + store i32 1, i32* %a1 + %v0 = load i32, i32* %a0 + %v1 = load i32, i32* %a1 + %cond = icmp sle i32 %v0, %v1 + %select = select i1 %cond, i32* %a1, i32* %a0 + %select.bc = bitcast i32* %select to float* + %result = load float, float* %select.bc + ret float %result +} + +; CHECK-LABEL: @test2_addrspacecast( +; CHECK: alloca i32 +; CHECK-NEXT: alloca i32 + +; CHECK: %select = select i1 %cond, i32* %a.sroa.3, i32* %a.sroa.0 +; CHECK-NEXT: %select.asc = addrspacecast i32* %select to i32 addrspace(1)* +; CHECK-NEXT: load i32, i32 addrspace(1)* %select.asc, align 4 +define i32 @test2_addrspacecast() { +entry: + %a = alloca [2 x i32] + %a0 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 0 + %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1 + store i32 0, i32* %a0 + store i32 1, i32* %a1 + %v0 = load i32, i32* %a0 + %v1 = load i32, i32* %a1 + %cond = icmp sle i32 %v0, %v1 + %select = select i1 %cond, i32* %a1, i32* %a0 + %select.asc = addrspacecast i32* %select to i32 addrspace(1)* + %result = load i32, i32 addrspace(1)* %select.asc + ret i32 %result +} + define i32 @test3(i32 %x) { ; CHECK-LABEL: @test3( entry: