diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -5708,7 +5708,8 @@ // Create a scalar GEP if there are more than 2 operands. if (Ops.size() != 2) { // Replace the last index with 0. - Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy); + Ops[FinalIndex] = + Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType()); Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front()); SourceTy = GetElementPtrInst::getIndexedType( SourceTy, ArrayRef(Ops).drop_front()); diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll b/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/masked-gather-struct-gep.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -codegenprepare < %s | FileCheck %s +; REQUIRES: x86-registered-target +target triple = "x86_64-pc-linux" + +%s = type <{ float, i32, i8, [3 x i8] }> + +declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x float>) + +define <4 x float> @foo(ptr %p) { +; CHECK-LABEL: define <4 x float> @foo +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[P]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP2]], i32 0, <4 x i1> zeroinitializer, <4 x float> zeroinitializer) +; CHECK-NEXT: ret <4 x float> [[GATHER]] +; + %base.splatinsert = insertelement <4 x ptr> poison, ptr %p, i32 0 + %base = shufflevector <4 x ptr> %base.splatinsert, <4 x ptr> poison, <4 x i32> + %gep = getelementptr %s, <4 x ptr> %base, <4 x i64> zeroinitializer, <4 x i32> zeroinitializer + %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> zeroinitializer, <4 x float> zeroinitializer) + ret <4 x float> %gather +}