diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -792,6 +792,32 @@ return IC.replaceInstUsesWith(II, VectorSplat); } +static Optional instCombineSVETupleGet(InstCombiner &IC, + IntrinsicInst &II) { + // Try to remove sequences of tuple get/set. + Value *SetTuple, *SetIndex, *SetValue; + auto *GetTuple = II.getArgOperand(0); + auto *GetIndex = II.getArgOperand(1); + // Check that we have tuple_get(GetTuple, GetIndex) where GetTuple is a + // call to tuple_set i.e. tuple_set(SetTuple, SetIndex, SetValue). + // Make sure that the types of the current intrinsic and SetValue match + // in order to safely remove the sequence. + if (!match(GetTuple, + m_Intrinsic( + m_Value(SetTuple), m_Value(SetIndex), m_Value(SetValue))) || + SetValue->getType() != II.getType()) + return None; + // Case where we get the same index right after setting it. + // tuple_get(tuple_set(SetTuple, SetIndex, SetValue), GetIndex) --> SetValue + if (GetIndex == SetIndex) + return IC.replaceInstUsesWith(II, SetValue); + // If we are getting a different index than what was set in the tuple_set + // intrinsic. We can just set the input tuple to the one up in the chain. + // tuple_get(tuple_set(SetTuple, SetIndex, SetValue), GetIndex) + // --> tuple_get(SetTuple, GetIndex) + return IC.replaceOperand(II, 0, SetTuple); +} + static Optional instCombineSVEZip(InstCombiner &IC, IntrinsicInst &II) { // zip1(uzp1(A, B), uzp2(A, B)) --> A @@ -850,6 +876,8 @@ case Intrinsic::aarch64_sve_sunpkhi: case Intrinsic::aarch64_sve_sunpklo: return instCombineSVEUnpack(IC, II); + case Intrinsic::aarch64_sve_tuple_get: + return instCombineSVETupleGet(IC, II); case Intrinsic::aarch64_sve_zip1: case Intrinsic::aarch64_sve_zip2: return instCombineSVEZip(IC, II); diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-tuple-get.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-tuple-get.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-tuple-get.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; This stores %a using st4 after reversing the 4 tuples. Check that the +; redundant sequences of get/set are eliminated. +define void @redundant_tuple_get_set( %a, i8* %ptr) #0 { +; CHECK-LABEL: @redundant_tuple_get_set( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( [[A:%.*]], i32 3) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( [[A]], i32 0) +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( [[A]], i32 2) +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( [[A]], i32 1) +; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv16i8( [[TMP1]], [[TMP3]], [[TMP4]], [[TMP2]], shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), i8* [[PTR:%.*]]) +; CHECK-NEXT: ret void +; + %1 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %a, i32 3) + %2 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %a, i32 0) + %3 = call @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8( %a, i32 3, %2) + %4 = call @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8( %3, i32 0, %1) + %5 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %4, i32 2) + %6 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %4, i32 1) + %7 = call @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8( %4, i32 2, %6) + %8 = call @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8( %7, i32 1, %5) + %9 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %8, i32 0) + %10 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %8, i32 1) + %11 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %8, i32 2) + %12 = call @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8( %8, i32 3) + call void @llvm.aarch64.sve.st4.nxv16i8( %9, %10, %11, %12, shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer), i8* %ptr) + ret void +} + +declare @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8(, i32, ) +declare @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(, i32) +declare void @llvm.aarch64.sve.st4.nxv16i8(, , , , , i8*) + +attributes #0 = { "target-features"="+sve" }