diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1128,9 +1128,9 @@ /// Walk the range of a partitioning looking for a common type to cover this /// sequence of slices. -static Type *findCommonType(AllocaSlices::const_iterator B, - AllocaSlices::const_iterator E, - uint64_t EndOffset) { +static std::pair +findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, + uint64_t EndOffset) { Type *Ty = nullptr; bool TyIsCommon = true; IntegerType *ITy = nullptr; @@ -1174,7 +1174,7 @@ Ty = UserTy; } - return TyIsCommon ? Ty : ITy; + return {TyIsCommon ? Ty : nullptr, ITy}; } /// PHI instructions that use an alloca and are subsequently loaded can be @@ -4264,13 +4264,21 @@ // or an i8 array of an appropriate size. Type *SliceTy = nullptr; const DataLayout &DL = AI.getModule()->getDataLayout(); - if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset())) - if (DL.getTypeAllocSize(CommonUseTy).getFixedSize() >= P.size()) - SliceTy = CommonUseTy; + std::pair CommonUseTy = + findCommonType(P.begin(), P.end(), P.endOffset()); + // Do all uses operate on the same type? + if (CommonUseTy.first) + if (DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >= P.size()) + SliceTy = CommonUseTy.first; + // If not, can we find an appropriate subtype in the original allocated type? if (!SliceTy) if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), P.beginOffset(), P.size())) SliceTy = TypePartitionTy; + // If still not, can we use the largest bitwidth integer type used? + if (!SliceTy && CommonUseTy.second) + if (DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >= P.size()) + SliceTy = CommonUseTy.second; if ((!SliceTy || (SliceTy->isArrayTy() && SliceTy->getArrayElementType()->isIntegerTy())) && DL.isLegalInteger(P.size() * 8)) diff --git a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll --- a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll @@ -68,13 +68,12 @@ ; CHECK-LABEL: @_Z3foo1S( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I2:%.*]] = alloca [[TMP0:%.*]], align 8 -; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_CAST:%.*]] = bitcast %0* [[ARG:%.*]] to i64* -; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[I1_SROA_0_0_I5_SROA_CAST]], align 8 -; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_CAST:%.*]] = bitcast %0* [[I2]] to i64* -; CHECK-NEXT: store i64 [[I1_SROA_0_0_COPYLOAD]], i64* [[I_SROA_0_0_I6_SROA_CAST]], align 8 +; CHECK-NEXT: [[TMP0]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD15:%.*]] = load i32*, i32** [[TMP0]], align 8 +; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I2]], i64 0, i32 0 +; CHECK-NEXT: store i32* [[I1_SROA_0_0_COPYLOAD15]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8 ; CHECK-NEXT: tail call void @_Z7escape01S(%0* nonnull byval(%0) align 8 [[I2]]) -; CHECK-NEXT: [[TMP0]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32* -; CHECK-NEXT: ret i32* [[TMP0]] +; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD15]] ; bb: %i = alloca %0, align 8 @@ -108,24 +107,21 @@ define dso_local i32* @_Z3bar1S(%0* byval(%0) align 8 %arg) { ; CHECK-LABEL: @_Z3bar1S( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_CAST:%.*]] = bitcast %0* [[ARG:%.*]] to i64* -; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[I1_SROA_0_0_I4_SROA_CAST]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD14:%.*]] = load i32*, i32** [[TMP0]], align 8 ; CHECK-NEXT: [[I5:%.*]] = tail call i32 @_Z4condv() ; CHECK-NEXT: [[I6_NOT:%.*]] = icmp eq i32 [[I5]], 0 ; CHECK-NEXT: br i1 [[I6_NOT]], label [[BB10:%.*]], label [[BB7:%.*]] ; CHECK: bb7: ; CHECK-NEXT: tail call void @_Z5sync0v() -; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32* -; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[TMP0]]) +; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[I1_SROA_0_0_COPYLOAD14]]) ; CHECK-NEXT: br label [[BB13:%.*]] ; CHECK: bb10: ; CHECK-NEXT: tail call void @_Z5sync1v() -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32* -; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[TMP1]]) +; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[I1_SROA_0_0_COPYLOAD14]]) ; CHECK-NEXT: br label [[BB13]] ; CHECK: bb13: -; CHECK-NEXT: [[DOTPRE_PHI:%.*]] = phi i32* [ [[TMP1]], [[BB10]] ], [ [[TMP0]], [[BB7]] ] -; CHECK-NEXT: ret i32* [[DOTPRE_PHI]] +; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD14]] ; bb: %i = alloca %0, align 8 diff --git a/llvm/test/Transforms/SROA/ppcf128-no-fold.ll b/llvm/test/Transforms/SROA/ppcf128-no-fold.ll --- a/llvm/test/Transforms/SROA/ppcf128-no-fold.ll +++ b/llvm/test/Transforms/SROA/ppcf128-no-fold.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -sroa -S | FileCheck %s +; RUN: opt < %s -sroa -S | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -27,8 +27,8 @@ ; CHECK-LABEL: @foo ; CHECK-NOT: i128 4628293042053316608 ; CHECK-NOT: i128 4653260752096854016 -; CHECK-DAG: i128 bitcast (ppc_fp128 0xM403B0000000000000000000000000000 to i128) -; CHECK-DAG: i128 bitcast (ppc_fp128 0xM4093B400000000000000000000000000 to i128) +; CHECK-DAG: bitcast ppc_fp128 0xM403B0000000000000000000000000000 to i128 +; CHECK-DAG: bitcast ppc_fp128 0xM4093B400000000000000000000000000 to i128 ; CHECK: call void @bar(i8* %v, [2 x i128] ; CHECK: ret void diff --git a/llvm/test/Transforms/SROA/preserve-nonnull.ll b/llvm/test/Transforms/SROA/preserve-nonnull.ll --- a/llvm/test/Transforms/SROA/preserve-nonnull.ll +++ b/llvm/test/Transforms/SROA/preserve-nonnull.ll @@ -51,11 +51,10 @@ define i8* @propagate_nonnull_to_int() { ; CHECK-LABEL: define i8* @propagate_nonnull_to_int( ; CHECK-NEXT: entry: -; CHECK-NEXT: %[[A:.*]] = alloca i64 -; CHECK-NEXT: store i64 42, i64* %[[A]] -; CHECK-NEXT: %[[LOAD:.*]] = load volatile i64, i64* %[[A]] -; CHECK-NEXT: %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8* -; CHECK-NEXT: ret i8* %[[CAST]] +; CHECK-NEXT: %[[A:.*]] = alloca i8* +; CHECK-NEXT: store i8* inttoptr (i64 42 to i8*), i8** %[[A]] +; CHECK-NEXT: %[[LOAD:.*]] = load volatile i8*, i8** %[[A]] +; CHECK-NEXT: ret i8* %[[LOAD]] entry: %a = alloca [2 x i8*] %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0 @@ -75,8 +74,7 @@ define i8* @propagate_nonnull_to_int_and_promote() { ; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote( ; CHECK-NEXT: entry: -; CHECK-NEXT: %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8* -; CHECK-NEXT: ret i8* %[[PROMOTED_VALUE]] +; CHECK-NEXT: ret i8* inttoptr (i64 42 to i8*) entry: %a = alloca [2 x i8*], align 8 %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0