diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2045,7 +2045,7 @@ // Rank the remaining candidate vector types. This is easy because we know // they're all integer vectors. We sort by ascending number of elements. - auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) { + auto RankVectorTypesComp = [&DL](VectorType *RHSTy, VectorType *LHSTy) { (void)DL; assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() == DL.getTypeSizeInBits(LHSTy).getFixedValue() && @@ -2057,10 +2057,22 @@ return cast(RHSTy)->getNumElements() < cast(LHSTy)->getNumElements(); }; - llvm::sort(CandidateTys, RankVectorTypes); - CandidateTys.erase( - std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes), - CandidateTys.end()); + auto RankVectorTypesEq = [&DL](VectorType *RHSTy, VectorType *LHSTy) { + (void)DL; + assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() == + DL.getTypeSizeInBits(LHSTy).getFixedValue() && + "Cannot have vector types of different sizes!"); + assert(RHSTy->getElementType()->isIntegerTy() && + "All non-integer types eliminated!"); + assert(LHSTy->getElementType()->isIntegerTy() && + "All non-integer types eliminated!"); + return cast(RHSTy)->getNumElements() == + cast(LHSTy)->getNumElements(); + }; + llvm::sort(CandidateTys, RankVectorTypesComp); + CandidateTys.erase(std::unique(CandidateTys.begin(), CandidateTys.end(), + RankVectorTypesEq), + CandidateTys.end()); } else { // The only way to have the same element type in every vector type is to // have the same vector type. Check that and remove all but one. diff --git a/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll b/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll --- a/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll +++ b/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll @@ -15,12 +15,15 @@ ; CHECK-LABEL: @test_zeroinit( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x half> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x i16> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 0 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 1 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 2 +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT]] to half ; CHECK-NEXT: ret void ; entry: @@ -67,24 +70,19 @@ ret void } -; Initial SROA pass failed to promote alloca and same alloca type was re-used -; so alloca was not re-added to the worklist after initial SROA pass. This -; caused it to fail to promote unlike the other tests. define amdgpu_kernel void @vector_type_alloca() #0 { ; CHECK-LABEL: @vector_type_alloca( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_BLOCKWISE_COPY:%.*]] = alloca <8 x half>, align 16 -; CHECK-NEXT: store <8 x half> zeroinitializer, ptr [[B_BLOCKWISE_COPY]], align 16 ; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x half> -; CHECK-NEXT: store <8 x half> [[TMP0]], ptr [[B_BLOCKWISE_COPY]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x i16> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_0_LOAD1:%.*]] = load half, ptr [[B_BLOCKWISE_COPY]], align 16 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_2_PTR2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY]], i64 2 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_2_LOAD2:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_2_PTR2_SROA_IDX]], align 2 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_4_PTR3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY]], i64 4 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_4_LOAD3:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_4_PTR3_SROA_IDX]], align 4 +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT]] to half ; CHECK-NEXT: ret void ; entry: @@ -107,12 +105,15 @@ ; CHECK-LABEL: @test_struct_contain_multiple_types1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x half> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x i16> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 0 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 1 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 2 +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT]] to half ; CHECK-NEXT: ret void ; entry: @@ -140,12 +141,15 @@ ; CHECK-NEXT: [[DATA1_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i32] [[DATA1]], 2 ; CHECK-NEXT: [[DATA1_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i32] [[DATA1]], 3 ; CHECK-NEXT: [[DATA2:%.*]] = load <4 x float>, ptr undef, align 16 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA2]] to <8 x half> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA2]] to <8 x i16> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5_0_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 0 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5_2_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 1 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5_4_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 2 +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5_16_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_5_16_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5_18_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_5_18_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5_20_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_5_20_VEC_EXTRACT]] to half ; CHECK-NEXT: ret void ; entry: @@ -173,13 +177,15 @@ ; CHECK-LABEL: @test_struct_array_vector( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DATA0:%.*]] = load <4 x float>, ptr undef, align 16 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA0]] to <8 x half> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA0]] to <8 x i16> ; CHECK-NEXT: [[DATA1:%.*]] = load <4 x float>, ptr undef, align 16 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[DATA1]] to <8 x half> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[DATA1]] to <8 x i16> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 0 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_3_0_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP1]], i32 0 +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_3_16_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_3_16_VEC_EXTRACT]] to half ; CHECK-NEXT: ret void ; entry: