diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -3198,10 +3198,12 @@ def _UNDEF_S : PredThreeOpPseudo; def _UNDEF_D : PredThreeOpPseudo; - def : SVE_4_Op_Pat(NAME # _UNDEF_B)>; - def : SVE_4_Op_Pat(NAME # _UNDEF_H)>; - def : SVE_4_Op_Pat(NAME # _UNDEF_S)>; - def : SVE_4_Op_Pat(NAME # _UNDEF_D)>; + let AddedComplexity = 9 in { + def : SVE_4_Op_Pat(NAME # _UNDEF_B)>; + def : SVE_4_Op_Pat(NAME # _UNDEF_H)>; + def : SVE_4_Op_Pat(NAME # _UNDEF_S)>; + def : SVE_4_Op_Pat(NAME # _UNDEF_D)>; + } } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-int-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith.ll @@ -586,8 +586,8 @@ ; CHECK-LABEL: muladd_i16_positiveAddend: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: add z0.h, z0.h, #255 // =0xff +; CHECK-NEXT: mov z2.h, #255 // =0xff +; CHECK-NEXT: mad z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: ret { %1 = mul %a, %b @@ -612,8 +612,8 @@ ; CHECK-LABEL: muladd_i8_positiveAddend: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: add z0.b, z0.b, #15 // =0xf +; CHECK-NEXT: mov z2.b, #15 // =0xf +; CHECK-NEXT: mad z0.b, p0/m, z1.b, z2.b ; CHECK-NEXT: ret { %1 = mul %a, %b @@ -625,8 +625,8 @@ ; CHECK-LABEL: muladd_i8_negativeAddend: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mul z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: add z0.b, z0.b, #241 // =0xf1 +; CHECK-NEXT: mov z2.b, #-15 // =0xfffffffffffffff1 +; CHECK-NEXT: mad z0.b, p0/m, z1.b, z2.b ; CHECK-NEXT: ret { %1 = mul %a, %b @@ -744,13 +744,14 @@ ret %2 } +; TOFIX: Should generate msb for mul+sub in this case. Shuffling operand of sub generates the required msb instruction. define @multiple_fused_ops( %a, %b) ; CHECK-LABEL: multiple_fused_ops: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #200 // =0xc8 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.h, p0/m, z2.h, z1.h -; CHECK-NEXT: add z2.h, z2.h, #200 // =0xc8 +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mla z2.h, p0/m, z0.h, z1.h ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h ; CHECK-NEXT: sub z0.h, z0.h, z1.h ; CHECK-NEXT: ret @@ -771,15 +772,15 @@ ; CHECK-NEXT: mov w9, w3 ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: cntw x10 +; CHECK-NEXT: mov z0.s, #1 // =0x1 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: whilelo p1.s, xzr, x9 ; CHECK-NEXT: .LBB70_2: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ld1w { z0.s }, p1/z, [x1, x8, lsl #2] -; CHECK-NEXT: ld1w { z1.s }, p1/z, [x2, x8, lsl #2] -; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: add z0.s, z0.s, #1 // =0x1 -; CHECK-NEXT: st1w { z0.s }, p1, [x0, x8, lsl #2] +; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1, x8, lsl #2] +; CHECK-NEXT: ld1w { z2.s }, p1/z, [x2, x8, lsl #2] +; CHECK-NEXT: mad z1.s, p0/m, z2.s, z0.s +; CHECK-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2] ; CHECK-NEXT: add x8, x8, x10 ; CHECK-NEXT: whilelo p1.s, x8, x9 ; CHECK-NEXT: b.mi .LBB70_2