diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -352,9 +352,27 @@ // * Dereferenceable address & few lanes -> scalarize speculative load/selects // * Adjacent vector addresses -> masked.load // * Narrow width by halfs excluding zero/undef lanes -// * Vector splat address w/known mask -> scalar load // * Vector incrementing address -> vector masked load Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) { + auto *ConstMask = dyn_cast(II.getArgOperand(2)); + if (!ConstMask) + return nullptr; + + // Vector splat address w/known mask -> scalar load + // Fold the gather to load the source vector first lane + // because it is reloading the same value each time + if (ConstMask->isAllOnesValue()) + if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) { + auto *VecTy = cast(II.getType()); + const Align Alignment = + cast(II.getArgOperand(1))->getAlignValue(); + LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr, + Alignment, "load.combine"); + Value *Shuf = + Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast"); + return replaceInstUsesWith(II, cast(Shuf)); + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/vscale_masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/vscale_masked_intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/vscale_masked_intrinsics.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine -S < %s | FileCheck %s + +;; Splat Value and all active/one mask +define @valid_invariant_load_i64(i64* %src) #0 { +; CHECK-LABEL: @valid_invariant_load_i64( +; CHECK-NEXT: [[LOAD_COMBINE:%.*]] = load i64, i64* [[SRC:%.*]], align 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[LOAD_COMBINE]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: ret [[BROADCAST_SPLAT2]] +; + %broadcast.splatinsert = insertelement poison, i64 *%src, i32 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + %res = call @llvm.masked.gather.nxv2i64( %broadcast.splat, i32 8, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), undef) + ret %res +} + +;; Not a splat value +define @invalid_value_invariant_load_i64(i64* %src ) #0 { +; CHECK-LABEL: @invalid_value_invariant_load_i64( +; CHECK-NEXT: [[INSERT_VALUE:%.*]] = insertelement poison, i64* [[SRC:%.*]], i32 1 +; CHECK-NEXT: [[RES:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0i64( [[INSERT_VALUE]], i32 8, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), undef) +; CHECK-NEXT: ret [[RES]] +; + %insert.value = insertelement poison, i64 *%src, i32 1 + %res = call @llvm.masked.gather.nxv2i64( %insert.value, i32 8, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), undef) + ret %res +} + +;; Not all active/one or inactive/zero mask +define @invalid_mask_invariant_load_i64(i64* %src, %mask) #0 { +; CHECK-LABEL: @invalid_mask_invariant_load_i64( +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64* [[SRC:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0i64( [[BROADCAST_SPLAT]], i32 8, [[MASK:%.*]], undef) +; CHECK-NEXT: ret [[RES]] +; + %broadcast.splatinsert = insertelement poison, i64 *%src, i32 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + %res = call @llvm.masked.gather.nxv2i64( %broadcast.splat, i32 8, %mask, undef) + ret %res +} + + +; Function Attrs: +declare @llvm.masked.gather.nxv2i64(, i32, , ) + +attributes #0 = { "target-features"="+sve,+sve" } +