diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1696,6 +1696,7 @@ case Intrinsic::aarch64_sve_ptest_last: return instCombineSVEPTest(IC, II); case Intrinsic::aarch64_sve_mul: + case Intrinsic::aarch64_sve_mul_u: case Intrinsic::aarch64_sve_fmul: case Intrinsic::aarch64_sve_fmul_u: return instCombineSVEVectorMul(IC, II); diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul_u-idempotency.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul_u-idempotency.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul_u-idempotency.ll @@ -0,0 +1,119 @@ +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; Idempotent muls -- should compile to just a ret. +define @idempotent_mul_u_i16( %pg, %a) #0 { +; CHECK-LABEL: @idempotent_mul_u_i16( +; CHECK-NEXT: ret [[A:%.*]] +; + %1 = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 1) + %2 = call @llvm.aarch64.sve.mul.u.nxv8i16( %pg, %a, %1) + ret %2 +} + +define @idempotent_mul_u_i32( %pg, %a) #0 { +; CHECK-LABEL: @idempotent_mul_u_i32( +; CHECK-NEXT: ret [[A:%.*]] +; + %1 = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) + %2 = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, %a, %1) + ret %2 +} + +define @idempotent_mul_u_i64( %pg, %a) #0 { +; CHECK-LABEL: @idempotent_mul_u_i64( +; CHECK-NEXT: ret [[A:%.*]] +; + %1 = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 1) + %2 = call @llvm.aarch64.sve.mul.u.nxv2i64( %pg, %a, %1) + ret %2 +} + +define @idempotent_mul_u_different_argument_order( %pg, %a) #0 { +; CHECK-LABEL: @idempotent_mul_u_different_argument_order( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mul.u.nxv2i64( [[PG:%.*]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer), [[A:%.*]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 1) + ; Different argument order to the above tests. + %2 = call @llvm.aarch64.sve.mul.u.nxv2i64( %pg, %1, %a) + ret %2 +} + +define @idempotent_mul_u_with_predicated_dup( %pg, %a) #0 { +; CHECK-LABEL: @idempotent_mul_u_with_predicated_dup( +; CHECK-NEXT: ret [[A:%.*]] +; + %1 = call @llvm.aarch64.sve.dup.nxv8i16( undef, %pg, i16 1) + %2 = call @llvm.aarch64.sve.mul.u.nxv8i16( %pg, %a, %1) + ret %2 +} + +define @idempotent_mul_u_two_dups( %pg, %a) #0 { + ; Edge case -- make sure that the case where we're multiplying two dups + ; together is sane. +; CHECK-LABEL: @idempotent_mul_u_two_dups( +; CHECK-NEXT: ret shufflevector ( insertelement ( poison, i16 1, i64 0), poison, zeroinitializer) +; + %1 = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 1) + %2 = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 1) + %3 = call @llvm.aarch64.sve.mul.u.nxv8i16( %pg, %1, %2) + ret %3 +} + +; Non-idempotent muls -- we don't expect these to be optimised out. +define @non_idempotent_mul_u_i16( %pg, %a) #0 { +; CHECK-LABEL: @non_idempotent_mul_u_i16( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mul.u.nxv8i16( [[PG:%.*]], [[A:%.*]], shufflevector ( insertelement ( poison, i16 2, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %2 = call @llvm.aarch64.sve.mul.u.nxv8i16( %pg, %a, %1) + ret %2 +} + +define @non_idempotent_mul_u_i32( %pg, %a) #0 { +; CHECK-LABEL: @non_idempotent_mul_u_i32( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mul.u.nxv4i32( [[PG:%.*]], [[A:%.*]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 2) + %2 = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, %a, %1) + ret %2 +} + +define @non_idempotent_mul_u_i64( %pg, %a) #0 { +; CHECK-LABEL: @non_idempotent_mul_u_i64( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mul.u.nxv2i64( [[PG:%.*]], [[A:%.*]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 2) + %2 = call @llvm.aarch64.sve.mul.u.nxv2i64( %pg, %a, %1) + ret %2 +} + +define @non_idempotent_mul_u_with_predicated_dup( %pg1, %pg2, %a) #0 { + ; Different predicates +; CHECK-LABEL: @non_idempotent_mul_u_with_predicated_dup( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dup.nxv2i64( undef, [[PG1:%.*]], i64 1) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.mul.u.nxv2i64( [[PG2:%.*]], [[A:%.*]], [[TMP1]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = call @llvm.aarch64.sve.dup.nxv2i64( undef, %pg1, i64 1) + %2 = call @llvm.aarch64.sve.mul.u.nxv2i64( %pg2, %a, %1) + ret %2 +} + +declare @llvm.aarch64.sve.dup.x.nxv8i16(i16) +declare @llvm.aarch64.sve.dup.x.nxv4i32(i32) +declare @llvm.aarch64.sve.dup.x.nxv2i64(i64) + +declare @llvm.aarch64.sve.dup.nxv2i64(, , i64) +declare @llvm.aarch64.sve.dup.nxv8i16(, , i16) + +declare @llvm.aarch64.sve.mul.u.nxv8i16(, , ) +declare @llvm.aarch64.sve.mul.u.nxv4i32(, , ) +declare @llvm.aarch64.sve.mul.u.nxv2i64(, , ) + +attributes #0 = { "target-features"="+sve" }