diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1472,12 +1472,19 @@ // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. unsigned VectorWidth = 0; - if (auto *VT = dyn_cast(U.getType())) + + // True if we should use a splat vector; using VectorWidth alone is not + // sufficient. + bool WantSplatVector = false; + if (auto *VT = dyn_cast(U.getType())) { VectorWidth = cast(VT)->getNumElements(); + // We don't produce 1 x N vectors; those are treated as scalars. + WantSplatVector = VectorWidth > 1; + } // We might need to splat the base pointer into a vector if the offsets // are vectors. - if (VectorWidth && !PtrTy.isVector()) { + if (WantSplatVector && !PtrTy.isVector()) { BaseReg = MIRBuilder .buildSplatVector(LLT::fixed_vector(VectorWidth, PtrTy), BaseReg) @@ -1516,7 +1523,7 @@ Register IdxReg = getOrCreateVReg(*Idx); LLT IdxTy = MRI->getType(IdxReg); if (IdxTy != OffsetTy) { - if (!IdxTy.isVector() && VectorWidth) { + if (!IdxTy.isVector() && WantSplatVector) { IdxReg = MIRBuilder.buildSplatVector( OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-one-by-n-vector-ptr-add.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-one-by-n-vector-ptr-add.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-one-by-n-vector-ptr-add.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -O0 -global-isel -mtriple aarch64 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s + +; Make sure we treat <1 x N> getelementptrs like scalar getelementptrs. + +; We should not create a splat vector for the non-vector index on this +; getelementptr. The entire getelementptr should be translated to a scalar +; G_PTR_ADD. +define <1 x i8*> @one_elt_vector_ptr_add_non_vector_idx(<1 x i8*> %vec) { + ; CHECK-LABEL: name: one_elt_vector_ptr_add_non_vector_idx + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $d0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[SEXT]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) + ; CHECK: $d0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit $d0 + %ptr_add = getelementptr i8, <1 x i8*> %vec, <1 x i32> + ret <1 x i8*> %ptr_add +} + +; We should not create a splat vector for the non-vector pointer on this +; getelementptr. The entire getelementptr should be translated to a scalar +; G_PTR_ADD. +define <1 x i8*> @one_elt_vector_ptr_add_non_vector_ptr(i8* %vec) { + ; CHECK-LABEL: name: one_elt_vector_ptr_add_non_vector_ptr + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[SEXT]](s64) + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) + ; CHECK: $d0 = COPY [[COPY2]](p0) + ; CHECK: RET_ReallyLR implicit $d0 + %ptr_add = getelementptr i8, i8* %vec, <1 x i32> + ret <1 x i8*> %ptr_add +}