Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -617,6 +617,9 @@ bool isLegalAddImmediate(int64_t) const override; bool isLegalICmpImmediate(int64_t) const override; + bool decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const override; + bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13584,6 +13584,30 @@ return IsLegal; } +// Return true if the MUL can be replaced with shift/sub/add with cheaper cost. +bool AArch64TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const { + // Check integral scalar types. + if (!VT.isScalarInteger()) + return false; + if (auto *ConstNode = dyn_cast(C.getNode())) { + if (!ConstNode->getAPIntValue().isSignedIntN(64)) + return false; + + const APInt &Imm = ConstNode->getAPIntValue(); + if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || + (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) + return true; + + // Optimize the MUL to (SH*ADD x, (SLLI x, bits)). + if ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() || + (Imm - 8).isPowerOf2()) + return true; + } + + return false; +} + // Return false to prevent folding // (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine, // if the folding leads to worse code. Index: llvm/test/CodeGen/AArch64/arm64-mul.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-mul.ll +++ llvm/test/CodeGen/AArch64/arm64-mul.ll @@ -125,11 +125,9 @@ define i64 @t10(i32 %a) nounwind { ; CHECK-LABEL: t10: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 -; CHECK-NEXT: movk w8, #32768, lsl #16 -; CHECK-NEXT: mul x0, x9, x8 +; CHECK-NEXT: sbfiz x8, x0, #31, #32 +; CHECK-NEXT: add x0, x8, w0, sxtw #1 ; CHECK-NEXT: ret entry: %tmp1 = sext i32 %a to i64 Index: llvm/test/CodeGen/AArch64/arm64_32.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64_32.ll +++ llvm/test/CodeGen/AArch64/arm64_32.ll @@ -720,13 +720,15 @@ define { [18 x i8] }* @test_gep_nonpow2({ [18 x i8] }* %a0, i32 %a1) { ; CHECK-LABEL: test_gep_nonpow2: -; CHECK-OPT: mov w[[SIZE:[0-9]+]], #18 -; CHECK-OPT-NEXT: smaddl x0, w1, w[[SIZE]], x0 +; CHECK-OPT: sbfiz x8, x1, #4, #32 +; CHECK-OPT-NEXT: add x8, x8, w1, sxtw #1 +; CHECK-OPT-NEXT: add x0, x0, x8 ; CHECK-OPT-NEXT: ret -; CHECK-FAST: mov w[[SIZE:[0-9]+]], #18 -; CHECK-FAST-NEXT: smaddl [[TMP:x[0-9]+]], w1, w[[SIZE]], x0 -; CHECK-FAST-NEXT: and x0, [[TMP]], #0xffffffff +; CHECK-FAST: sbfiz x8, x1, #4, #32 +; CHECK-FAST-NEXT: add x8, x8, w1, sxtw #1 +; CHECK-FAST-NEXT: add x8, x0, x8 +; CHECK-FAST-NEXT: and x0, x8, #0xffffffff ; CHECK-FAST-NEXT: ret %tmp0 = getelementptr inbounds { [18 x i8] }, { [18 x i8] }* %a0, i32 %a1 ret { [18 x i8] }* %tmp0 Index: llvm/test/CodeGen/AArch64/machine-combiner-madd.ll =================================================================== --- llvm/test/CodeGen/AArch64/machine-combiner-madd.ll +++ llvm/test/CodeGen/AArch64/machine-combiner-madd.ll @@ -13,16 +13,16 @@ ; Make sure that inst-combine fuses the multiply add in the addressing mode of ; the load. -; CHECK-LABEL: fun: -; CHECK-NOT: mul -; CHECK: madd -; CHECK-NOT: mul %class.D = type { %class.basic_string.base, [4 x i8] } %class.basic_string.base = type <{ i64, i64, i32 }> @a = global %class.D* zeroinitializer, align 8 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) define internal void @fun() section ".text.startup" { +; CHECK-LABEL: fun: +; CHECK-NOT: mul +; CHECK-NOT: madd +; CHECK-COUNT-2: lsl entry: %tmp.i.i = alloca %class.D, align 8 %y = bitcast %class.D* %tmp.i.i to i8* Index: llvm/test/CodeGen/AArch64/mul_pow2.ll =================================================================== --- llvm/test/CodeGen/AArch64/mul_pow2.ll +++ llvm/test/CodeGen/AArch64/mul_pow2.ll @@ -71,8 +71,8 @@ define i32 @test6_32b(i32 %x) { ; CHECK-LABEL: test6_32b: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w0, lsl #1 -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: lsl w8, w0, #2 +; CHECK-NEXT: add w0, w8, w0, lsl #1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_32b: @@ -88,8 +88,8 @@ define i64 @test6_64b(i64 %x) { ; CHECK-LABEL: test6_64b: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0, lsl #1 -; CHECK-NEXT: lsl x0, x8, #1 +; CHECK-NEXT: lsl x8, x0, #2 +; CHECK-NEXT: add x0, x8, x0, lsl #1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_64b: @@ -107,8 +107,9 @@ define i64 @test6_umull(i32 %x) { ; CHECK-LABEL: test6_umull: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: umull x0, w0, w8 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: add x0, x8, w0, uxtw #1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_umull: @@ -125,8 +126,9 @@ define i64 @test6_smull(i32 %x) { ; CHECK-LABEL: test6_smull: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: smull x0, w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfiz x8, x0, #2, #32 +; CHECK-NEXT: add x0, x8, w0, sxtw #1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_smull: @@ -143,8 +145,9 @@ define i32 @test6_madd(i32 %x, i32 %y) { ; CHECK-LABEL: test6_madd: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: madd w0, w0, w8, w1 +; CHECK-NEXT: lsl w8, w0, #2 +; CHECK-NEXT: add w8, w8, w0, lsl #1 +; CHECK-NEXT: add w0, w8, w1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_madd: @@ -161,8 +164,9 @@ define i32 @test6_msub(i32 %x, i32 %y) { ; CHECK-LABEL: test6_msub: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: msub w0, w0, w8, w1 +; CHECK-NEXT: lsl w8, w0, #2 +; CHECK-NEXT: add w8, w8, w0, lsl #1 +; CHECK-NEXT: sub w0, w1, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_msub: @@ -176,11 +180,14 @@ ret i32 %sub } +; TODO: mov w8, w0 + lsl x8, x8, #2 should combine into lsl x8, x0, #2 define i64 @test6_umaddl(i32 %x, i64 %y) { ; CHECK-LABEL: test6_umaddl: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: umaddl x0, w0, w8, x1 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: add x8, x8, w0, uxtw #1 +; CHECK-NEXT: add x0, x8, x1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_umaddl: @@ -198,8 +205,10 @@ define i64 @test6_smaddl(i32 %x, i64 %y) { ; CHECK-LABEL: test6_smaddl: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: smaddl x0, w0, w8, x1 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfiz x8, x0, #2, #32 +; CHECK-NEXT: add x8, x8, w0, sxtw #1 +; CHECK-NEXT: add x0, x8, x1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_smaddl: @@ -214,11 +223,14 @@ ret i64 %add } +; TODO: mov w8, w0 + lsl x8, x8, #2 should combine into lsl x8, x0, #2 define i64 @test6_umsubl(i32 %x, i64 %y) { ; CHECK-LABEL: test6_umsubl: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: umsubl x0, w0, w8, x1 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: add x8, x8, w0, uxtw #1 +; CHECK-NEXT: sub x0, x1, x8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_umsubl: @@ -236,8 +248,10 @@ define i64 @test6_smsubl(i32 %x, i64 %y) { ; CHECK-LABEL: test6_smsubl: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: smsubl x0, w0, w8, x1 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfiz x8, x0, #2, #32 +; CHECK-NEXT: add x8, x8, w0, sxtw #1 +; CHECK-NEXT: sub x0, x1, x8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_smsubl: @@ -255,8 +269,10 @@ define i64 @test6_umnegl(i32 %x) { ; CHECK-LABEL: test6_umnegl: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: umnegl x0, w0, w8 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: add x8, x8, w0, uxtw #1 +; CHECK-NEXT: neg x0, x8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_umnegl: @@ -274,8 +290,10 @@ define i64 @test6_smnegl(i32 %x) { ; CHECK-LABEL: test6_smnegl: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: smnegl x0, w0, w8 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfiz x8, x0, #2, #32 +; CHECK-NEXT: add x8, x8, w0, sxtw #1 +; CHECK-NEXT: neg x0, x8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_smnegl: @@ -290,6 +308,25 @@ ret i64 %sub } +define i32 @mull6_sub(i32 %x) { +; CHECK-LABEL: mull6_sub: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl w8, w0, #2 +; CHECK-NEXT: add w8, w8, w0, lsl #1 +; CHECK-NEXT: sub w0, w8, #1 +; CHECK-NEXT: ret +; +; GISEL-LABEL: mull6_sub: +; GISEL: // %bb.0: +; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mul w8, w0, w8 +; GISEL-NEXT: sub w0, w8, #1 +; GISEL-NEXT: ret + %mul = mul nsw i32 %x, 6 + %sub = add nsw i32 %mul, -1 + ret i32 %sub +} + define i32 @test7(i32 %x) { ; CHECK-LABEL: test7: ; CHECK: // %bb.0: @@ -340,8 +377,8 @@ define i32 @test10(i32 %x) { ; CHECK-LABEL: test10: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w0, lsl #2 -; CHECK-NEXT: lsl w0, w8, #1 +; CHECK-NEXT: lsl w8, w0, #3 +; CHECK-NEXT: add w0, w8, w0, lsl #1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test10: @@ -374,8 +411,8 @@ define i32 @test12(i32 %x) { ; CHECK-LABEL: test12: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w0, lsl #1 -; CHECK-NEXT: lsl w0, w8, #2 +; CHECK-NEXT: lsl w8, w0, #3 +; CHECK-NEXT: add w0, w8, w0, lsl #2 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test12: @@ -477,7 +514,8 @@ define i32 @ntest3(i32 %x) { ; CHECK-LABEL: ntest3: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w0, w0, w0, lsl #2 +; CHECK-NEXT: add w8, w0, w0, lsl #1 +; CHECK-NEXT: neg w0, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: ntest3: @@ -731,11 +769,11 @@ ; ; GISEL-LABEL: muladd_demand_commute: ; GISEL: // %bb.0: -; GISEL-NEXT: adrp x8, .LCPI42_1 -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI42_1] -; GISEL-NEXT: adrp x8, .LCPI42_0 +; GISEL-NEXT: adrp x8, .LCPI43_1 +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI43_1] +; GISEL-NEXT: adrp x8, .LCPI43_0 ; GISEL-NEXT: mla v1.4s, v0.4s, v2.4s -; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI42_0] +; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI43_0] ; GISEL-NEXT: and v0.16b, v1.16b, v0.16b ; GISEL-NEXT: ret %m = mul <4 x i32> %x, Index: llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll +++ llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll @@ -23,12 +23,13 @@ define i1 @test_srem_even(i4 %X) nounwind { ; CHECK-LABEL: test_srem_even: ; CHECK: // %bb.0: -; CHECK-NEXT: sbfx w9, w0, #0, #4 -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: add w9, w9, w9, lsl #1 -; CHECK-NEXT: ubfx w10, w9, #7, #1 -; CHECK-NEXT: add w9, w10, w9, lsr #4 -; CHECK-NEXT: msub w8, w9, w8, w0 +; CHECK-NEXT: sbfx w8, w0, #0, #4 +; CHECK-NEXT: add w8, w8, w8, lsl #1 +; CHECK-NEXT: ubfx w9, w8, #7, #1 +; CHECK-NEXT: add w8, w9, w8, lsr #4 +; CHECK-NEXT: lsl w9, w8, #2 +; CHECK-NEXT: add w8, w9, w8, lsl #1 +; CHECK-NEXT: sub w8, w0, w8 ; CHECK-NEXT: and w8, w8, #0xf ; CHECK-NEXT: cmp w8, #1 ; CHECK-NEXT: cset w0, eq Index: llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll @@ -111,16 +111,17 @@ define i32 @inch_mul(i32 %a) { ; NO_SCALAR_INC-LABEL: inch_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #5 -; NO_SCALAR_INC-NEXT: cnth x9, vl8 -; NO_SCALAR_INC-NEXT: madd w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cnth x8, vl8 +; NO_SCALAR_INC-NEXT: add w8, w8, w8, lsl #2 +; NO_SCALAR_INC-NEXT: add w0, w8, w0 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: inch_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: inch x0, vl8, mul #5 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: cnth x8, vl8 +; CHECK-NEXT: // kill: def $w8 killed $w8 killed $x8 def $x8 +; CHECK-NEXT: inch x8, vl8, mul #4 +; CHECK-NEXT: add w0, w8, w0 ; CHECK-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cnth(i32 8) %conv = trunc i64 %cnt to i32 @@ -155,16 +156,17 @@ define i32 @dech_mul(i32 %a) { ; NO_SCALAR_INC-LABEL: dech_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #7 -; NO_SCALAR_INC-NEXT: cnth x9, vl16 -; NO_SCALAR_INC-NEXT: msub w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cnth x8, vl16 +; NO_SCALAR_INC-NEXT: sub w8, w8, w8, lsl #3 +; NO_SCALAR_INC-NEXT: add w0, w0, w8 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: dech_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: dech x0, vl16, mul #7 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: cnth x8, vl16 +; CHECK-NEXT: // kill: def $w8 killed $w8 killed $x8 def $x8 +; CHECK-NEXT: dech x8, vl16, mul #8 +; CHECK-NEXT: add w0, w0, w8 ; CHECK-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cnth(i32 9) %conv = trunc i64 %cnt to i32 @@ -199,16 +201,18 @@ define i32 @incw_mul(i32 %a) { ; NO_SCALAR_INC-LABEL: incw_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #12 -; NO_SCALAR_INC-NEXT: cntw x9, vl32 -; NO_SCALAR_INC-NEXT: madd w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cntw x8, vl32 +; NO_SCALAR_INC-NEXT: lsl w9, w8, #3 +; NO_SCALAR_INC-NEXT: add w8, w9, w8, lsl #2 +; NO_SCALAR_INC-NEXT: add w0, w8, w0 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: incw_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: incw x0, vl32, mul #12 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: cntw x8, vl32 +; CHECK-NEXT: lsl w8, w8, #3 +; CHECK-NEXT: incw x8, vl32, mul #4 +; CHECK-NEXT: add w0, w8, w0 ; CHECK-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cntw(i32 10) %conv = trunc i64 %cnt to i32 @@ -284,16 +288,18 @@ define i32 @incd_mul(i32 %base) { ; NO_SCALAR_INC-LABEL: incd_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #15 -; NO_SCALAR_INC-NEXT: cntd x9, vl64 -; NO_SCALAR_INC-NEXT: madd w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cntd x8, vl64 +; NO_SCALAR_INC-NEXT: lsl w9, w8, #4 +; NO_SCALAR_INC-NEXT: sub w8, w9, w8 +; NO_SCALAR_INC-NEXT: add w0, w0, w8 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: incd_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: incd x0, vl64, mul #15 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: cntd x8, vl64 +; CHECK-NEXT: lsl w8, w8, #4 +; CHECK-NEXT: decd x8, vl64 +; CHECK-NEXT: add w0, w0, w8 ; CHECK-NEXT: ret %cnt = tail call i64 @llvm.aarch64.sve.cntd(i32 11) %conv = trunc i64 %cnt to i32 @@ -328,16 +334,17 @@ define i32 @decd_mul(i32 %a) { ; NO_SCALAR_INC-LABEL: decd_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #9 -; NO_SCALAR_INC-NEXT: cntd x9, vl2 -; NO_SCALAR_INC-NEXT: msub w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cntd x8, vl2 +; NO_SCALAR_INC-NEXT: add w8, w8, w8, lsl #3 +; NO_SCALAR_INC-NEXT: sub w0, w0, w8 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: decd_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: decd x0, vl2, mul #9 -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: cntd x8, vl2 +; CHECK-NEXT: // kill: def $w8 killed $w8 killed $x8 def $x8 +; CHECK-NEXT: incd x8, vl2, mul #8 +; CHECK-NEXT: sub w0, w0, w8 ; CHECK-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cntd(i32 2) %conv = trunc i64 %cnt to i32 Index: llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll @@ -25,12 +25,14 @@ define i64 @cntb_mul3() { ; CHECK-LABEL: cntb_mul3: ; CHECK: // %bb.0: -; CHECK-NEXT: cntb x0, vl6, mul #3 +; CHECK-NEXT: cntb x8, vl6 +; CHECK-NEXT: add x0, x8, x8, lsl #1 ; CHECK-NEXT: ret ; ; USE_SCALAR_INC-LABEL: cntb_mul3: ; USE_SCALAR_INC: // %bb.0: -; USE_SCALAR_INC-NEXT: cntb x0, vl6, mul #3 +; USE_SCALAR_INC-NEXT: cntb x0, vl6 +; USE_SCALAR_INC-NEXT: incb x0, vl6, mul #2 ; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cntb(i32 6) %out = mul i64 %cnt, 3 @@ -73,12 +75,14 @@ define i64 @cnth_mul5() { ; CHECK-LABEL: cnth_mul5: ; CHECK: // %bb.0: -; CHECK-NEXT: cnth x0, vl7, mul #5 +; CHECK-NEXT: cnth x8, vl7 +; CHECK-NEXT: add x0, x8, x8, lsl #2 ; CHECK-NEXT: ret ; ; USE_SCALAR_INC-LABEL: cnth_mul5: ; USE_SCALAR_INC: // %bb.0: -; USE_SCALAR_INC-NEXT: cnth x0, vl7, mul #5 +; USE_SCALAR_INC-NEXT: cnth x0, vl7 +; USE_SCALAR_INC-NEXT: inch x0, vl7, mul #4 ; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cnth(i32 7) %out = mul i64 %cnt, 5 @@ -169,12 +173,15 @@ define i64 @cntd_mul15() { ; CHECK-LABEL: cntd_mul15: ; CHECK: // %bb.0: -; CHECK-NEXT: cntd x0, vl16, mul #15 +; CHECK-NEXT: cntd x8, vl16 +; CHECK-NEXT: cntd x9, vl16, mul #16 +; CHECK-NEXT: sub x0, x9, x8 ; CHECK-NEXT: ret ; ; USE_SCALAR_INC-LABEL: cntd_mul15: ; USE_SCALAR_INC: // %bb.0: -; USE_SCALAR_INC-NEXT: cntd x0, vl16, mul #15 +; USE_SCALAR_INC-NEXT: cntd x0, vl16, mul #16 +; USE_SCALAR_INC-NEXT: decd x0, vl16 ; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cntd(i32 9) %out = mul i64 %cnt, 15 @@ -357,13 +364,16 @@ define i64 @inch_mul(i64 %a) { ; CHECK-LABEL: inch_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: cnth x8, vl8, mul #5 +; CHECK-NEXT: cnth x8, vl8 +; CHECK-NEXT: add x8, x8, x8, lsl #2 ; CHECK-NEXT: add x0, x8, x0 ; CHECK-NEXT: ret ; ; USE_SCALAR_INC-LABEL: inch_mul: ; USE_SCALAR_INC: // %bb.0: -; USE_SCALAR_INC-NEXT: inch x0, vl8, mul #5 +; USE_SCALAR_INC-NEXT: cnth x8, vl8 +; USE_SCALAR_INC-NEXT: inch x8, vl8, mul #4 +; USE_SCALAR_INC-NEXT: add x0, x8, x0 ; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cnth(i32 8) %mul = mul i64 %cnt, 5 @@ -394,13 +404,16 @@ define i64 @dech_mul(i64 %a) { ; CHECK-LABEL: dech_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: cnth x8, vl16, mul #7 -; CHECK-NEXT: sub x0, x0, x8 +; CHECK-NEXT: cnth x8, vl16 +; CHECK-NEXT: sub x8, x8, x8, lsl #3 +; CHECK-NEXT: add x0, x0, x8 ; CHECK-NEXT: ret ; ; USE_SCALAR_INC-LABEL: dech_mul: ; USE_SCALAR_INC: // %bb.0: -; USE_SCALAR_INC-NEXT: dech x0, vl16, mul #7 +; USE_SCALAR_INC-NEXT: cnth x8, vl16 +; USE_SCALAR_INC-NEXT: dech x8, vl16, mul #8 +; USE_SCALAR_INC-NEXT: add x0, x0, x8 ; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cnth(i32 9) %mul = mul i64 %cnt, 7 @@ -431,13 +444,17 @@ define i64 @incw_mul(i64 %a) { ; CHECK-LABEL: incw_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: cntw x8, vl32, mul #12 +; CHECK-NEXT: cntw x8, vl32 +; CHECK-NEXT: cntw x9, vl32, mul #8 +; CHECK-NEXT: add x8, x9, x8, lsl #2 ; CHECK-NEXT: add x0, x8, x0 ; CHECK-NEXT: ret ; ; USE_SCALAR_INC-LABEL: incw_mul: ; USE_SCALAR_INC: // %bb.0: -; USE_SCALAR_INC-NEXT: incw x0, vl32, mul #12 +; USE_SCALAR_INC-NEXT: cntw x8, vl32, mul #8 +; USE_SCALAR_INC-NEXT: incw x8, vl32, mul #4 +; USE_SCALAR_INC-NEXT: add x0, x8, x0 ; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cntw(i32 10) %mul = mul i64 %cnt, 12 @@ -501,13 +518,17 @@ define i64 @incd_mul(i64 %a) { ; CHECK-LABEL: incd_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: cntd x8, all, mul #15 +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: cntd x9, all, mul #16 +; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: add x0, x8, x0 ; CHECK-NEXT: ret ; ; USE_SCALAR_INC-LABEL: incd_mul: ; USE_SCALAR_INC: // %bb.0: -; USE_SCALAR_INC-NEXT: incd x0, all, mul #15 +; USE_SCALAR_INC-NEXT: cntd x8, all, mul #16 +; USE_SCALAR_INC-NEXT: decd x8 +; USE_SCALAR_INC-NEXT: add x0, x8, x0 ; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31) %mul = mul i64 %cnt, 15 @@ -538,13 +559,16 @@ define i64 @decd_mul(i64 %a) { ; CHECK-LABEL: decd_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: cntd x8, vl2, mul #9 +; CHECK-NEXT: cntd x8, vl2 +; CHECK-NEXT: add x8, x8, x8, lsl #3 ; CHECK-NEXT: sub x0, x0, x8 ; CHECK-NEXT: ret ; ; USE_SCALAR_INC-LABEL: decd_mul: ; USE_SCALAR_INC: // %bb.0: -; USE_SCALAR_INC-NEXT: decd x0, vl2, mul #9 +; USE_SCALAR_INC-NEXT: cntd x8, vl2 +; USE_SCALAR_INC-NEXT: incd x8, vl2, mul #8 +; USE_SCALAR_INC-NEXT: sub x0, x0, x8 ; USE_SCALAR_INC-NEXT: ret %cnt = call i64 @llvm.aarch64.sve.cntd(i32 2) %mul = mul i64 %cnt, 9 Index: llvm/test/CodeGen/AArch64/typepromotion-phisret.ll =================================================================== --- llvm/test/CodeGen/AArch64/typepromotion-phisret.ll +++ llvm/test/CodeGen/AArch64/typepromotion-phisret.ll @@ -220,8 +220,8 @@ define i16 @promote_arg_return(i16 zeroext %arg1, i16 zeroext %arg2, i8* %res) { ; CHECK-LABEL: promote_arg_return: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w0, lsl #1 -; CHECK-NEXT: add w8, w8, #45 +; CHECK-NEXT: add w8, w0, #15 +; CHECK-NEXT: add w8, w8, w8, lsl #1 ; CHECK-NEXT: cmp w8, w1 ; CHECK-NEXT: cset w8, lo ; CHECK-NEXT: strb w8, [x2] Index: llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll =================================================================== --- llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll +++ llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll @@ -37,8 +37,8 @@ define i1 @test_urem_odd_setne(i4 %X) nounwind { ; CHECK-LABEL: test_urem_odd_setne: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #13 -; CHECK-NEXT: mul w8, w0, w8 +; CHECK-NEXT: add w8, w0, w0, lsl #1 +; CHECK-NEXT: neg w8, w8 ; CHECK-NEXT: and w8, w8, #0xf ; CHECK-NEXT: cmp w8, #3 ; CHECK-NEXT: cset w0, hi Index: llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/split-gep.ll =================================================================== --- llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/split-gep.ll +++ llvm/test/Transforms/SeparateConstOffsetFromGEP/AArch64/split-gep.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -O3 -mtriple=aarch64-linux-gnu | FileCheck %s %struct = type { i32, i32, i32 } @@ -5,8 +6,9 @@ define i32 @test1(%struct* %ptr, i64 %idx) { ; CHECK-LABEL: test1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #12 -; CHECK-NEXT: madd x8, x1, x8, x0 +; CHECK-NEXT: lsl x8, x1, #3 +; CHECK-NEXT: add x8, x8, x1, lsl #2 +; CHECK-NEXT: add x8, x0, x8 ; CHECK-NEXT: ldr w9, [x8, #4] ; CHECK-NEXT: tbnz w9, #31, .LBB0_2 ; CHECK-NEXT: // %bb.1: