diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -833,17 +833,12 @@ return match(SplatValue, m_FPOne()) || match(SplatValue, m_One()); }; - // The OpMultiplier variable should always point to the dup (if any), so - // swap if necessary. - if (IsUnitDup(OpMultiplicand) || IsUnitSplat(OpMultiplicand)) - std::swap(OpMultiplier, OpMultiplicand); - if (IsUnitSplat(OpMultiplier)) { - // [f]mul pg (dupx 1) %n => %n + // [f]mul pg %n, (dupx 1) => %n OpMultiplicand->takeName(&II); return IC.replaceInstUsesWith(II, OpMultiplicand); } else if (IsUnitDup(OpMultiplier)) { - // [f]mul pg (dup pg 1) %n => %n + // [f]mul pg %n, (dup pg 1) => %n auto *DupInst = cast(OpMultiplier); auto *DupPg = DupInst->getOperand(1); // TODO: this is naive. The optimization is still valid if DupPg diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll @@ -32,7 +32,8 @@ define @idempotent_fmul_different_argument_order( %pg, %a) #0 { ; CHECK-LABEL: @idempotent_fmul_different_argument_order( -; CHECK-NEXT: ret [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.fmul.nxv2f64( [[PG:%.*]], shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer), [[A:%.*]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = call @llvm.aarch64.sve.dup.x.nxv2f64(double 1.0) ; Different argument order to the above tests. diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-mul-idempotency.ll @@ -32,7 +32,8 @@ define @idempotent_mul_different_argument_order( %pg, %a) #0 { ; CHECK-LABEL: @idempotent_mul_different_argument_order( -; CHECK-NEXT: ret [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.mul.nxv2i64( [[PG:%.*]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer), [[A:%.*]]) +; CHECK-NEXT: ret [[TMP1]] ; %1 = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 1) ; Different argument order to the above tests.