Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2184,15 +2184,17 @@ // of a bitcasted pointer to vector or array of the same dimensions: // gep (bitcast * X to [c x ty]*), Y, Z --> gep X, Y, Z // gep (bitcast [c x ty]* X to *), Y, Z --> gep X, Y, Z - auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy) { + auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy, + const DataLayout &DL) { return ArrTy->getArrayElementType() == VecTy->getVectorElementType() && - ArrTy->getArrayNumElements() == VecTy->getVectorNumElements(); + ArrTy->getArrayNumElements() == VecTy->getVectorNumElements() && + DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy); }; if (GEP.getNumOperands() == 3 && ((GEPEltType->isArrayTy() && SrcEltType->isVectorTy() && - areMatchingArrayAndVecTypes(GEPEltType, SrcEltType)) || + areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) || (GEPEltType->isVectorTy() && SrcEltType->isArrayTy() && - areMatchingArrayAndVecTypes(SrcEltType, GEPEltType)))) { + areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) { // Create a new GEP here, as using `setOperand()` followed by // `setSourceElementType()` won't actually update the type of the Index: llvm/test/Transforms/InstCombine/gep-vector.ll =================================================================== --- llvm/test/Transforms/InstCombine/gep-vector.ll +++ llvm/test/Transforms/InstCombine/gep-vector.ll @@ -27,9 +27,12 @@ ret <2 x i8*> %1 } +; Negative test - datalayout's alloc size for the 2 types must match. + define i32* @bitcast_vec_to_array_gep(<7 x i32>* %x, i64 %y, i64 %z) { ; CHECK-LABEL: @bitcast_vec_to_array_gep( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr <7 x i32>, <7 x i32>* [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]] +; CHECK-NEXT: [[ARR_PTR:%.*]] = bitcast <7 x i32>* [[X:%.*]] to [7 x i32]* +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [7 x i32], [7 x i32]* [[ARR_PTR]], i64 [[Y:%.*]], i64 [[Z:%.*]] ; CHECK-NEXT: ret i32* [[GEP]] ; %arr_ptr = bitcast <7 x i32>* %x to [7 x i32]* @@ -37,9 +40,12 @@ ret i32* %gep } +; Negative test - datalayout's alloc size for the 2 types must match. + define i32* @bitcast_array_to_vec_gep([3 x i32]* %x, i64 %y, i64 %z) { ; CHECK-LABEL: @bitcast_array_to_vec_gep( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]] +; CHECK-NEXT: [[VEC_PTR:%.*]] = bitcast [3 x i32]* [[X:%.*]] to <3 x i32>* +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <3 x i32>, <3 x i32>* [[VEC_PTR]], i64 [[Y:%.*]], i64 [[Z:%.*]] ; CHECK-NEXT: ret i32* [[GEP]] ; %vec_ptr = bitcast [3 x i32]* %x to <3 x i32>* @@ -47,6 +53,8 @@ ret i32* %gep } +; Sizes and types match - safe to remove bitcast. + define i32* @bitcast_vec_to_array_gep_matching_alloc_size(<4 x i32>* %x, i64 %y, i64 %z) { ; CHECK-LABEL: @bitcast_vec_to_array_gep_matching_alloc_size( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]] @@ -57,6 +65,8 @@ ret i32* %gep } +; Sizes and types match - safe to remove bitcast. + define i32* @bitcast_array_to_vec_gep_matching_alloc_size([4 x i32]* %x, i64 %y, i64 %z) { ; CHECK-LABEL: @bitcast_array_to_vec_gep_matching_alloc_size( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]] @@ -67,11 +77,14 @@ ret i32* %gep } +; Negative test - datalayout's alloc size for the 2 types must match. + define i32 addrspace(3)* @bitcast_vec_to_array_addrspace(<7 x i32>* %x, i64 %y, i64 %z) { ; CHECK-LABEL: @bitcast_vec_to_array_addrspace( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr <7 x i32>, <7 x i32>* [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i32* [[GEP]] to i32 addrspace(3)* -; CHECK-NEXT: ret i32 addrspace(3)* [[TMP1]] +; CHECK-NEXT: [[ARR_PTR:%.*]] = bitcast <7 x i32>* [[X:%.*]] to [7 x i32]* +; CHECK-NEXT: [[ASC:%.*]] = addrspacecast [7 x i32]* [[ARR_PTR]] to [7 x i32] addrspace(3)* +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [7 x i32], [7 x i32] addrspace(3)* [[ASC]], i64 [[Y:%.*]], i64 [[Z:%.*]] +; CHECK-NEXT: ret i32 addrspace(3)* [[GEP]] ; %arr_ptr = bitcast <7 x i32>* %x to [7 x i32]* %asc = addrspacecast [7 x i32]* %arr_ptr to [7 x i32] addrspace(3)* @@ -79,11 +92,14 @@ ret i32 addrspace(3)* %gep } +; Negative test - datalayout's alloc size for the 2 types must match. + define i32 addrspace(3)* @inbounds_bitcast_vec_to_array_addrspace(<7 x i32>* %x, i64 %y, i64 %z) { ; CHECK-LABEL: @inbounds_bitcast_vec_to_array_addrspace( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <7 x i32>, <7 x i32>* [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i32* [[GEP]] to i32 addrspace(3)* -; CHECK-NEXT: ret i32 addrspace(3)* [[TMP1]] +; CHECK-NEXT: [[ARR_PTR:%.*]] = bitcast <7 x i32>* [[X:%.*]] to [7 x i32]* +; CHECK-NEXT: [[ASC:%.*]] = addrspacecast [7 x i32]* [[ARR_PTR]] to [7 x i32] addrspace(3)* +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [7 x i32], [7 x i32] addrspace(3)* [[ASC]], i64 [[Y:%.*]], i64 [[Z:%.*]] +; CHECK-NEXT: ret i32 addrspace(3)* [[GEP]] ; %arr_ptr = bitcast <7 x i32>* %x to [7 x i32]* %asc = addrspacecast [7 x i32]* %arr_ptr to [7 x i32] addrspace(3)* @@ -91,6 +107,8 @@ ret i32 addrspace(3)* %gep } +; Sizes and types match - safe to remove bitcast. + define i32 addrspace(3)* @bitcast_vec_to_array_addrspace_matching_alloc_size(<4 x i32>* %x, i64 %y, i64 %z) { ; CHECK-LABEL: @bitcast_vec_to_array_addrspace_matching_alloc_size( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]] @@ -103,6 +121,8 @@ ret i32 addrspace(3)* %gep } +; Sizes and types match - safe to remove bitcast. + define i32 addrspace(3)* @inbounds_bitcast_vec_to_array_addrspace_matching_alloc_size(<4 x i32>* %x, i64 %y, i64 %z) { ; CHECK-LABEL: @inbounds_bitcast_vec_to_array_addrspace_matching_alloc_size( ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]]