Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -766,6 +766,40 @@ return None; } +static Value *getScalableSplat(Value *V, unsigned NumElts, + IRBuilder<> &Builder) { + Value *Poison = + PoisonValue::get(ScalableVectorType::get(V->getType(), NumElts)); + Value *Zeros = ConstantAggregateZero::get( + ScalableVectorType::get(Builder.getInt32Ty(), NumElts)); + Value *NewVal = Builder.CreateInsertElement( + Poison, V, ConstantInt::get(Builder.getInt32Ty(), 0)); + NewVal = Builder.CreateShuffleVector(NewVal, Poison, Zeros); + return NewVal; +} + +static Optional instCombineSVEUnpack(InstCombiner &IC, + IntrinsicInst &II) { + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + Value *UnpackArg = II.getArgOperand(0); + auto *RetTy = cast(II.getType()); + auto RetNumElements = RetTy->getMinNumElements(); + bool IsSigned = II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi || + II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo; + + // Hi = uunpkhi(splat(X)) --> Hi = splat(extend(X)) + // Lo = uunpklo(splat(X)) --> Lo = splat(extend(X)) + if (auto *ScalarArg = getSplatValue(UnpackArg)) { + ScalarArg = + Builder.CreateIntCast(ScalarArg, RetTy->getScalarType(), IsSigned); + Value *NewVal = getScalableSplat(ScalarArg, RetNumElements, Builder); + NewVal->takeName(&II); + return IC.replaceInstUsesWith(II, NewVal); + } + + return None; +} static Optional instCombineSVETBL(InstCombiner &IC, IntrinsicInst &II) { auto *OpVal = II.getOperand(0); @@ -831,6 +865,11 @@ return instCombineSVEVectorMul(IC, II); case Intrinsic::aarch64_sve_tbl: return instCombineSVETBL(IC, II); + case Intrinsic::aarch64_sve_uunpkhi: + case Intrinsic::aarch64_sve_uunpklo: + case Intrinsic::aarch64_sve_sunpkhi: + case Intrinsic::aarch64_sve_sunpklo: + return instCombineSVEUnpack(IC, II); } return None; Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-unpkhi-unpklo.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-unpkhi-unpklo.ll @@ -0,0 +1,104 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define @uunpkhi_splat(i16 %a) #0 { +; CHECK-LABEL: @uunpkhi_splat( +; CHECK: %1 = zext i16 %a to i32 +; CHECK: %2 = insertelement poison, i32 %1, i32 0 +; CHECK: %unpack = shufflevector %2, poison, zeroinitializer +; CHECK-NEXT: ret %unpack + %splat_insert = insertelement poison, i16 %a, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %unpack = tail call @llvm.aarch64.sve.uunpkhi.nxv8i32 + ( %splat) + ret %unpack +} + +define @uunpklo_splat(i16 %a) #0 { +; CHECK-LABEL: @uunpklo_splat( +; CHECK: %1 = zext i16 %a to i32 +; CHECK: %2 = insertelement poison, i32 %1, i32 0 +; CHECK: %unpack = shufflevector %2, poison, zeroinitializer +; CHECK-NEXT: ret %unpack + %splat_insert = insertelement poison, i16 %a, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %unpack = tail call @llvm.aarch64.sve.uunpkhi.nxv8i32 + ( %splat) + ret %unpack +} + +define void @uunpk_splat(i16 %a) #0 { +; CHECK-LABEL: @uunpk_splat( +; CHECK: %1 = zext i16 %a to i32 +; CHECK: %2 = insertelement poison, i32 %1, i32 0 +; CHECK: %unpackhi = shufflevector %2, poison, zeroinitializer +; CHECK: %3 = zext i16 %a to i32 +; CHECK: %4 = insertelement poison, i32 %3, i32 0 +; CHECK: %unpacklo = shufflevector %4, poison, zeroinitializer +; CHECK: call void @use( %unpackhi, %unpacklo) +; CHECK-NEXT: ret void + %splat_insert = insertelement poison, i16 %a, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %unpackhi = tail call @llvm.aarch64.sve.uunpkhi.nxv8i32 + ( %splat) + %unpacklo = tail call @llvm.aarch64.sve.uunpklo.nxv8i32 + ( %splat) + call void @use( %unpackhi, %unpacklo) + ret void +} + +define @sunpkhi_splat(i16 %a) #0 { +; CHECK-LABEL: @sunpkhi_splat( +; CHECK: %1 = sext i16 %a to i32 +; CHECK: %2 = insertelement poison, i32 %1, i32 0 +; CHECK: %unpack = shufflevector %2, poison, zeroinitializer +; CHECK-NEXT: ret %unpack + %splat_insert = insertelement poison, i16 %a, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %unpack = tail call @llvm.aarch64.sve.sunpkhi.nxv8i32 + ( %splat) + ret %unpack +} + +define @sunpklo_splat(i16 %a) #0 { +; CHECK-LABEL: @sunpklo_splat( +; CHECK: %1 = sext i16 %a to i32 +; CHECK: %2 = insertelement poison, i32 %1, i32 0 +; CHECK: %unpack = shufflevector %2, poison, zeroinitializer +; CHECK-NEXT: ret %unpack + %splat_insert = insertelement poison, i16 %a, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %unpack = tail call @llvm.aarch64.sve.sunpkhi.nxv8i32 + ( %splat) + ret %unpack +} + +define void @sunpk_splat(i16 %a) #0 { +; CHECK-LABEL: @sunpk_splat( +; CHECK: %1 = sext i16 %a to i32 +; CHECK: %2 = insertelement poison, i32 %1, i32 0 +; CHECK: %unpackhi = shufflevector %2, poison, zeroinitializer +; CHECK: %3 = sext i16 %a to i32 +; CHECK: %4 = insertelement poison, i32 %3, i32 0 +; CHECK: %unpacklo = shufflevector %4, poison, zeroinitializer +; CHECK: call void @use( %unpackhi, %unpacklo) +; CHECK-NEXT: ret void + %splat_insert = insertelement poison, i16 %a, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %unpackhi = tail call @llvm.aarch64.sve.sunpkhi.nxv8i32 + ( %splat) + %unpacklo = tail call @llvm.aarch64.sve.sunpklo.nxv8i32 + ( %splat) + call void @use( %unpackhi, %unpacklo) + ret void +} + +declare void @use( , ) + +declare @llvm.aarch64.sve.uunpkhi.nxv8i32() +declare @llvm.aarch64.sve.uunpklo.nxv8i32() +declare @llvm.aarch64.sve.sunpkhi.nxv8i32() +declare @llvm.aarch64.sve.sunpklo.nxv8i32() + +attributes #0 = { "target-features"="+sve" }