diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -352,9 +352,27 @@ // * Dereferenceable address & few lanes -> scalarize speculative load/selects // * Adjacent vector addresses -> masked.load // * Narrow width by halfs excluding zero/undef lanes -// * Vector splat address w/known mask -> scalar load // * Vector incrementing address -> vector masked load Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) { + auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2)); + if (!ConstMask) + return nullptr; + + // Vector splat address w/known mask -> scalar load + // Fold the gather to load the source vector first lane + // because it is reloading the same value each time + if (ConstMask->isAllOnesValue()) + if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) { + auto *VecTy = cast<VectorType>(II.getType()); + const Align Alignment = + cast<ConstantInt>(II.getArgOperand(1))->getAlignValue(); + LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr, + Alignment, "load.combine"); + Value *Shuf = + Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast"); + return replaceInstUsesWith(II, cast<Instruction>(Shuf)); + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/vscale_masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/vscale_masked_intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/vscale_masked_intrinsics.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine -S < %s | FileCheck %s + +;; Splat Value and all active/one mask +define <vscale x 2 x i64> @valid_invariant_load_i64(i64* %src) #0 { +; CHECK-LABEL: @valid_invariant_load_i64( +; CHECK-NEXT: [[LOAD_COMBINE:%.*]] = load i64, i64* [[SRC:%.*]], align 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[LOAD_COMBINE]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer +; CHECK-NEXT: ret <vscale x 2 x i64> [[BROADCAST_SPLAT2]] +; + %broadcast.splatinsert = insertelement <vscale x 2 x i64*> poison, i64 *%src, i32 0 + %broadcast.splat = shufflevector <vscale x 2 x i64*> %broadcast.splatinsert, <vscale x 2 x i64*> poison, <vscale x 2 x i32> zeroinitializer + %res = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %broadcast.splat, i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> undef) + ret <vscale x 2 x i64> %res +} + +;; Not a splat value +define <vscale x 2 x i64> @invalid_value_invariant_load_i64(i64* %src ) #0 { +; CHECK-LABEL: @invalid_value_invariant_load_i64( +; CHECK-NEXT: [[INSERT_VALUE:%.*]] = insertelement <vscale x 2 x i64*> poison, i64* [[SRC:%.*]], i32 1 +; CHECK-NEXT: [[RES:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*> [[INSERT_VALUE]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> undef) +; CHECK-NEXT: ret <vscale x 2 x i64> [[RES]] +; + %insert.value = insertelement <vscale x 2 x i64*> poison, i64 *%src, i32 1 + %res = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %insert.value, i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> undef) + ret <vscale x 2 x i64> %res +} + +;; Not all active/one or inactive/zero mask +define <vscale x 2 x i64> @invalid_mask_invariant_load_i64(i64* %src, <vscale x 2 x i1> %mask) #0 { +; CHECK-LABEL: @invalid_mask_invariant_load_i64( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64*> poison, i64* [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64*> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64*> poison, <vscale x 2 x i32> zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0i64(<vscale x 2 x i64*> [[BROADCAST_SPLAT]], i32 8, <vscale x 2 x i1> [[MASK:%.*]], <vscale x 2 x i64> undef) +; CHECK-NEXT: ret <vscale x 2 x i64> [[RES]] +; + %broadcast.splatinsert = insertelement <vscale x 2 x i64*> poison, i64 *%src, i32 0 + %broadcast.splat = shufflevector <vscale x 2 x i64*> %broadcast.splatinsert, <vscale x 2 x i64*> poison, <vscale x 2 x i32> zeroinitializer + %res = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %broadcast.splat, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef) + ret <vscale x 2 x i64> %res +} + + +; Function Attrs: +declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>) + +attributes #0 = { "target-features"="+sve,+sve" } +