Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -783,10 +783,6 @@ if (!IsOffsetKnown) return PI.setAborted(&LI); - if (LI.isVolatile() && - LI.getPointerAddressSpace() != DL.getAllocaAddrSpace()) - return PI.setAborted(&LI); - uint64_t Size = DL.getTypeStoreSize(LI.getType()); return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); } @@ -798,10 +794,6 @@ if (!IsOffsetKnown) return PI.setAborted(&SI); - if (SI.isVolatile() && - SI.getPointerAddressSpace() != DL.getAllocaAddrSpace()) - return PI.setAborted(&SI); - uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); // If this memory access can be shown to *statically* extend outside the @@ -836,11 +828,6 @@ if (!IsOffsetKnown) return PI.setAborted(&II); - // Don't replace this with a store with a different address space. TODO: - // Use a store with the casted new alloca? - if (II.isVolatile() && II.getDestAddressSpace() != DL.getAllocaAddrSpace()) - return PI.setAborted(&II); - insertUse(II, Offset, Length ? Length->getLimitedValue() : AllocSize - Offset.getLimitedValue(), (bool)Length); @@ -860,13 +847,6 @@ if (!IsOffsetKnown) return PI.setAborted(&II); - // Don't replace this with a load/store with a different address space. - // TODO: Use a store with the casted new alloca? - if (II.isVolatile() && - (II.getDestAddressSpace() != DL.getAllocaAddrSpace() || - II.getSourceAddressSpace() != DL.getAllocaAddrSpace())) - return PI.setAborted(&II); - // This side of the transfer is completely out-of-bounds, and so we can // nuke the entire transfer. However, we also need to nuke the other side // if already added to our partitions. @@ -2294,6 +2274,16 @@ // the insertion point is set to point to the user. IRBuilderTy IRB; + // Return the new alloca, addrspacecasted if required to avoid changing the + // addrspace of a volatile access. + Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) { + if (!IsVolatile || AddrSpace == NewAI.getType()->getPointerAddressSpace()) + return &NewAI; + + Type *AccessTy = NewAI.getAllocatedType()->getPointerTo(AddrSpace); + return IRB.CreateAddrSpaceCast(&NewAI, AccessTy); + } + public: AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass, AllocaInst &OldAI, AllocaInst &NewAI, @@ -2492,7 +2482,10 @@ (canConvertValue(DL, NewAllocaTy, TargetTy) || (IsLoadPastEnd && NewAllocaTy->isIntegerTy() && TargetTy->isIntegerTy()))) { - LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, + + Value *NewPtr = getPtrToNewAI(LI.getPointerAddressSpace(), + LI.isVolatile()); + LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr, NewAI.getAlignment(), LI.isVolatile(), LI.getName()); if (AATags) @@ -2675,7 +2668,9 @@ } V = convertValue(DL, IRB, V, NewAllocaTy); - NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), + Value *NewPtr = getPtrToNewAI(SI.getPointerAddressSpace(), + SI.isVolatile()); + NewSI = IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlignment(), SI.isVolatile()); } else { unsigned AS = SI.getPointerAddressSpace(); @@ -2843,7 +2838,8 @@ V = convertValue(DL, IRB, V, AllocaTy); } - StoreInst *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), + Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile()); + StoreInst *New = IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlignment(), II.isVolatile()); if (AATags) New->setAAMetadata(AATags); @@ -2995,16 +2991,25 @@ } OtherPtrTy = OtherTy->getPointerTo(OtherAS); - Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy, + Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy, OtherPtr->getName() + "."); unsigned SrcAlign = OtherAlign; - Value *DstPtr = &NewAI; unsigned DstAlign = SliceAlign; + Value *DstPtr; + Value *SrcPtr; if (!IsDest) { - std::swap(SrcPtr, DstPtr); std::swap(SrcAlign, DstAlign); } + if (IsDest) { + DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile()); + SrcPtr = AdjPtr; + } else { + DstPtr = AdjPtr; + SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile()); + } + + Value *Src; if (VecTy && !IsWholeAlloca && !IsDest) { Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, Index: test/Transforms/SROA/addrspacecast.ll =================================================================== --- test/Transforms/SROA/addrspacecast.ll +++ test/Transforms/SROA/addrspacecast.ll @@ -172,12 +172,11 @@ define i64 @alloca_addrspacecast_bitcast_volatile_store(i64 %X) { ; CHECK-LABEL: @alloca_addrspacecast_bitcast_volatile_store( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A:%.*]] = alloca [8 x i8] -; CHECK-NEXT: [[A_CAST:%.*]] = addrspacecast [8 x i8]* [[A]] to [8 x i8] addrspace(1)* -; CHECK-NEXT: [[B:%.*]] = bitcast [8 x i8] addrspace(1)* [[A_CAST]] to i64 addrspace(1)* -; CHECK-NEXT: store volatile i64 [[X:%.*]], i64 addrspace(1)* [[B]] -; CHECK-NEXT: [[Z:%.*]] = load i64, i64 addrspace(1)* [[B]] -; CHECK-NEXT: ret i64 [[Z]] +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i64 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i64* [[A_SROA_0]] to i64 addrspace(1)* +; CHECK-NEXT: store volatile i64 [[X:%.*]], i64 addrspace(1)* [[TMP0]] +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_Z:%.*]] = load i64, i64* [[A_SROA_0]] +; CHECK-NEXT: ret i64 [[A_SROA_0_0_A_SROA_0_0_Z]] ; entry: %A = alloca [8 x i8] @@ -188,16 +187,44 @@ ret i64 %Z } +%struct = type { [256 x i8], i32 } + +define i65 @volatile_store_addrspacecast_slice(i65 %X, i16 %idx) { +; CHECK-LABEL: @volatile_store_addrspacecast_slice( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [9 x i8], align 4 +; CHECK-NEXT: [[A_SROA_1:%.*]] = alloca [9 x i8], align 4 +; CHECK-NEXT: [[A_SROA_1_0_GEPB_SROA_CAST:%.*]] = addrspacecast [9 x i8]* [[A_SROA_1]] to i65 addrspace(1)* +; CHECK-NEXT: store volatile i65 [[X:%.*]], i65 addrspace(1)* [[A_SROA_1_0_GEPB_SROA_CAST]] +; CHECK-NEXT: br label [[L2:%.*]] +; CHECK: L2: +; CHECK-NEXT: [[A_SROA_0_0_GEPA_BC_SROA_CAST:%.*]] = bitcast [9 x i8]* [[A_SROA_0]] to i65* +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_20_Z:%.*]] = load i65, i65* [[A_SROA_0_0_GEPA_BC_SROA_CAST]] +; CHECK-NEXT: ret i65 [[A_SROA_0_0_A_SROA_0_20_Z]] +; +entry: + %A = alloca %struct + %B = addrspacecast %struct* %A to i65 addrspace(1)* + %gepA = getelementptr %struct, %struct* %A, i32 0, i32 0, i16 20 + %gepB = getelementptr i65, i65 addrspace(1)* %B, i16 6 + store volatile i65 %X, i65 addrspace(1)* %gepB, align 1 + br label %L2 + +L2: + %gepA.bc = bitcast i8* %gepA to i65* + %Z = load i65, i65* %gepA.bc, align 1 + ret i65 %Z +} + ; Don't change the address space of a volatile operation define i64 @alloca_addrspacecast_bitcast_volatile_load(i64 %X) { ; CHECK-LABEL: @alloca_addrspacecast_bitcast_volatile_load( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A:%.*]] = alloca [8 x i8] -; CHECK-NEXT: [[A_CAST:%.*]] = addrspacecast [8 x i8]* [[A]] to [8 x i8] addrspace(1)* -; CHECK-NEXT: [[B:%.*]] = bitcast [8 x i8] addrspace(1)* [[A_CAST]] to i64 addrspace(1)* -; CHECK-NEXT: store i64 [[X:%.*]], i64 addrspace(1)* [[B]] -; CHECK-NEXT: [[Z:%.*]] = load volatile i64, i64 addrspace(1)* [[B]] -; CHECK-NEXT: ret i64 [[Z]] +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i64 +; CHECK-NEXT: store i64 [[X:%.*]], i64* [[A_SROA_0]] +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i64* [[A_SROA_0]] to i64 addrspace(1)* +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_Z:%.*]] = load volatile i64, i64 addrspace(1)* [[TMP0]] +; CHECK-NEXT: ret i64 [[A_SROA_0_0_A_SROA_0_0_Z]] ; entry: %A = alloca [8 x i8] @@ -208,19 +235,45 @@ ret i64 %Z } +define i65 @volatile_load_addrspacecast_slice(i65 %X, i16 %idx) { +; CHECK-LABEL: @volatile_load_addrspacecast_slice( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [9 x i8], align 4 +; CHECK-NEXT: [[A_SROA_1:%.*]] = alloca [9 x i8], align 4 +; CHECK-NEXT: [[A_SROA_1_0_GEPB_SROA_CAST:%.*]] = addrspacecast [9 x i8]* [[A_SROA_1]] to i65 addrspace(1)* +; CHECK-NEXT: store i65 [[X:%.*]], i65 addrspace(1)* [[A_SROA_1_0_GEPB_SROA_CAST]] +; CHECK-NEXT: br label [[L2:%.*]] +; CHECK: L2: +; CHECK-NEXT: [[A_SROA_0_0_GEPA_BC_SROA_CAST:%.*]] = bitcast [9 x i8]* [[A_SROA_0]] to i65* +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_20_Z:%.*]] = load volatile i65, i65* [[A_SROA_0_0_GEPA_BC_SROA_CAST]] +; CHECK-NEXT: ret i65 [[A_SROA_0_0_A_SROA_0_20_Z]] +; +entry: + %A = alloca %struct + %B = addrspacecast %struct* %A to i65 addrspace(1)* + %gepA = getelementptr %struct, %struct* %A, i32 0, i32 0, i16 20 + %gepB = getelementptr i65, i65 addrspace(1)* %B, i16 6 + store i65 %X, i65 addrspace(1)* %gepB, align 1 + br label %L2 + +L2: + %gepA.bc = bitcast i8* %gepA to i65* + %Z = load volatile i65, i65* %gepA.bc, align 1 + ret i65 %Z +} + + declare void @llvm.memset.p1i8.i32(i8 addrspace(1)* nocapture, i8, i32, i1) nounwind ; Don't change the address space of a volatile operation define i32 @volatile_memset() { ; CHECK-LABEL: @volatile_memset( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A:%.*]] = alloca [4 x i8] -; CHECK-NEXT: [[PTR:%.*]] = getelementptr [4 x i8], [4 x i8]* [[A]], i32 0, i32 0 -; CHECK-NEXT: [[ASC:%.*]] = addrspacecast i8* [[PTR]] to i8 addrspace(1)* -; CHECK-NEXT: call void @llvm.memset.p1i8.i32(i8 addrspace(1)* [[ASC]], i8 42, i32 4, i1 true) -; CHECK-NEXT: [[IPTR:%.*]] = bitcast i8* [[PTR]] to i32* -; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[IPTR]] -; CHECK-NEXT: ret i32 [[VAL]] +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i32* [[A_SROA_0]] to i32 addrspace(1)* +; CHECK-NEXT: store volatile i32 707406378, i32 addrspace(1)* [[TMP0]] +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_VAL:%.*]] = load i32, i32* [[A_SROA_0]] +; CHECK-NEXT: ret i32 [[A_SROA_0_0_A_SROA_0_0_VAL]] ; entry: %a = alloca [4 x i8] @@ -236,11 +289,15 @@ define void @volatile_memcpy(i8* %src, i8* %dst) { ; CHECK-LABEL: @volatile_memcpy( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A:%.*]] = alloca [4 x i8] -; CHECK-NEXT: [[PTR:%.*]] = getelementptr [4 x i8], [4 x i8]* [[A]], i32 0, i32 0 -; CHECK-NEXT: [[ASC:%.*]] = addrspacecast i8* [[PTR]] to i8 addrspace(1)* -; CHECK-NEXT: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* [[ASC]], i8* [[SRC:%.*]], i32 4, i1 true), !tbaa !0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p1i8.i32(i8* [[DST:%.*]], i8 addrspace(1)* [[ASC]], i32 4, i1 true), !tbaa !3 +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32 +; CHECK-NEXT: [[A_SROA_0_0_SRC_SROA_CAST:%.*]] = bitcast i8* [[SRC:%.*]] to i32* +; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, i32* [[A_SROA_0_0_SRC_SROA_CAST]], align 1, !tbaa !0 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i32* [[A_SROA_0]] to i32 addrspace(1)* +; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_COPYLOAD]], i32 addrspace(1)* [[TMP0]], !tbaa !0 +; CHECK-NEXT: [[A_SROA_0_0_DST_SROA_CAST:%.*]] = bitcast i8* [[DST:%.*]] to i32* +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i32* [[A_SROA_0]] to i32 addrspace(1)* +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1:%.*]] = load volatile i32, i32 addrspace(1)* [[TMP1]], !tbaa !3 +; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1]], i32* [[A_SROA_0_0_DST_SROA_CAST]], align 1, !tbaa !3 ; CHECK-NEXT: ret void ; entry: Index: test/Transforms/SROA/basictest.ll =================================================================== --- test/Transforms/SROA/basictest.ll +++ test/Transforms/SROA/basictest.ll @@ -1255,11 +1255,10 @@ define void @PR14105_as1({ [16 x i8] } addrspace(1)* %ptr) { ; Make sure this the right address space pointer is used for type check. ; CHECK-LABEL: @PR14105_as1( -; CHECK: alloca { [16 x i8] }, align 8 -; CHECK-NEXT: %gep = getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i64 -1 -; CHECK-NEXT: %cast1 = bitcast { [16 x i8] } addrspace(1)* %gep to i8 addrspace(1)* -; CHECK-NEXT: %cast2 = bitcast { [16 x i8] }* %a to i8* -; CHECK-NEXT: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 8 %cast1, i8* align 8 %cast2, i32 16, i1 true) +; CHECK: [[ALLOCA:%.*]] = alloca [16 x i8], align 8 +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i16 -1 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[ALLOCA]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 8 [[GEP0]], i8* align 8 [[GEP1]], i32 16, i1 true) entry: %a = alloca { [16 x i8] }, align 8