Index: llvm/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -3074,9 +3074,18 @@ // won't include them. unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); + // We may address some of the stack above the canonical frame address, either + // for our own arguments or during a call. Include that in calculating whether + // we have complicated addressing concerns. + int64_t CalleeStackUsed = 0; + for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) { + int64_t FixedOff = MFI.getObjectOffset(I); + if (FixedOff > CalleeStackUsed) CalleeStackUsed = FixedOff; + } + // Conservatively always assume BigStack when there are SVE spills. - bool BigStack = SVEStackSize || - (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit; + bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize + + CalleeStackUsed) > EstimatedStackSizeLimit; if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) AFI->setHasStackFrame(true); Index: llvm/test/CodeGen/AArch64/arm64-stackmap.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-stackmap.ll +++ llvm/test/CodeGen/AArch64/arm64-stackmap.ll @@ -46,7 +46,7 @@ ; CHECK-NEXT: .quad 160 ; CHECK-NEXT: .quad 1 ; CHECK-NEXT: .quad _spilledStackMapValue -; CHECK-NEXT: .quad 128 +; CHECK-NEXT: .quad 144 ; CHECK-NEXT: .quad 1 ; CHECK-NEXT: .quad _liveConstant ; CHECK-NEXT: .quad 16 Index: llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -358,122 +358,125 @@ define <64 x i8> @zext_v64i1(<64 x i1> %arg) { ; CHECK-LABEL: zext_v64i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [sp, #320] +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr w8, [sp, #336] ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp, #64] -; CHECK-NEXT: ldr w10, [sp, #192] +; CHECK-NEXT: ldr w9, [sp, #80] +; CHECK-NEXT: ldr w10, [sp, #208] ; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: ldr w8, [sp, #328] +; CHECK-NEXT: ldr w8, [sp, #344] ; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldr w9, [sp, #200] +; CHECK-NEXT: ldr w9, [sp, #216] ; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: ldr w10, [sp, #336] +; CHECK-NEXT: ldr w10, [sp, #352] ; CHECK-NEXT: mov.b v3[1], w8 -; CHECK-NEXT: ldr w8, [sp, #72] +; CHECK-NEXT: ldr w8, [sp, #88] ; CHECK-NEXT: mov.b v0[1], w1 -; CHECK-NEXT: ldr w11, [sp, #352] +; CHECK-NEXT: ldr w11, [sp, #368] ; CHECK-NEXT: mov.b v2[1], w9 -; CHECK-NEXT: ldr w9, [sp, #80] +; CHECK-NEXT: ldr w9, [sp, #96] ; CHECK-NEXT: mov.b v1[1], w8 -; CHECK-NEXT: ldr w8, [sp, #344] +; CHECK-NEXT: ldr w8, [sp, #360] ; CHECK-NEXT: mov.b v3[2], w10 -; CHECK-NEXT: ldr w10, [sp, #208] +; CHECK-NEXT: ldr w10, [sp, #224] ; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: ldr w12, [sp, #368] -; CHECK-NEXT: ldr w13, [sp, #384] +; CHECK-NEXT: ldr w12, [sp, #384] +; CHECK-NEXT: ldr w13, [sp, #400] ; CHECK-NEXT: mov.b v1[2], w9 -; CHECK-NEXT: ldr w9, [sp, #360] +; CHECK-NEXT: ldr w9, [sp, #376] ; CHECK-NEXT: mov.b v2[2], w10 -; CHECK-NEXT: ldr w10, [sp, #88] +; CHECK-NEXT: ldr w10, [sp, #104] ; CHECK-NEXT: mov.b v3[3], w8 -; CHECK-NEXT: ldr w8, [sp, #216] +; CHECK-NEXT: ldr w8, [sp, #232] ; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: ldr w14, [sp, #400] +; CHECK-NEXT: ldr w14, [sp, #416] ; CHECK-NEXT: mov.b v1[3], w10 -; CHECK-NEXT: ldr w10, [sp, #376] +; CHECK-NEXT: ldr w10, [sp, #392] ; CHECK-NEXT: mov.b v2[3], w8 -; CHECK-NEXT: ldr w8, [sp, #96] +; CHECK-NEXT: ldr w8, [sp, #112] ; CHECK-NEXT: mov.b v3[4], w11 -; CHECK-NEXT: ldr w11, [sp, #224] +; CHECK-NEXT: ldr w11, [sp, #240] ; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: ldr w15, [sp, #416] +; CHECK-NEXT: ldr w15, [sp, #432] ; CHECK-NEXT: mov.b v1[4], w8 -; CHECK-NEXT: ldr w8, [sp, #392] +; CHECK-NEXT: ldr w8, [sp, #408] ; CHECK-NEXT: mov.b v2[4], w11 -; CHECK-NEXT: ldr w11, [sp, #104] +; CHECK-NEXT: ldr w11, [sp, #120] ; CHECK-NEXT: mov.b v3[5], w9 -; CHECK-NEXT: ldr w9, [sp, #232] +; CHECK-NEXT: ldr w9, [sp, #248] ; CHECK-NEXT: mov.b v0[5], w5 -; CHECK-NEXT: ldr w16, [sp, #432] +; CHECK-NEXT: ldr w16, [sp, #448] ; CHECK-NEXT: mov.b v1[5], w11 -; CHECK-NEXT: ldr w11, [sp, #408] +; CHECK-NEXT: ldr w11, [sp, #424] ; CHECK-NEXT: mov.b v2[5], w9 -; CHECK-NEXT: ldr w9, [sp, #112] +; CHECK-NEXT: ldr w9, [sp, #128] ; CHECK-NEXT: mov.b v3[6], w12 -; CHECK-NEXT: ldr w12, [sp, #240] +; CHECK-NEXT: ldr w12, [sp, #256] ; CHECK-NEXT: mov.b v0[6], w6 ; CHECK-NEXT: mov.b v1[6], w9 -; CHECK-NEXT: ldr w9, [sp, #424] +; CHECK-NEXT: ldr w9, [sp, #440] ; CHECK-NEXT: mov.b v2[6], w12 -; CHECK-NEXT: ldr w12, [sp, #120] +; CHECK-NEXT: ldr w12, [sp, #136] ; CHECK-NEXT: mov.b v3[7], w10 -; CHECK-NEXT: ldr w10, [sp, #248] +; CHECK-NEXT: ldr w10, [sp, #264] ; CHECK-NEXT: mov.b v0[7], w7 ; CHECK-NEXT: mov.b v1[7], w12 -; CHECK-NEXT: ldr w12, [sp] +; CHECK-NEXT: ldr w12, [sp, #16] ; CHECK-NEXT: mov.b v2[7], w10 -; CHECK-NEXT: ldr w10, [sp, #128] +; CHECK-NEXT: ldr w10, [sp, #144] ; CHECK-NEXT: mov.b v3[8], w13 -; CHECK-NEXT: ldr w13, [sp, #256] +; CHECK-NEXT: ldr w13, [sp, #272] ; CHECK-NEXT: mov.b v0[8], w12 -; CHECK-NEXT: ldr w12, [sp, #440] +; CHECK-NEXT: ldr w12, [sp, #456] ; CHECK-NEXT: mov.b v1[8], w10 -; CHECK-NEXT: ldr w10, [sp, #8] +; CHECK-NEXT: ldr w10, [sp, #24] ; CHECK-NEXT: mov.b v2[8], w13 -; CHECK-NEXT: ldr w13, [sp, #136] +; CHECK-NEXT: ldr w13, [sp, #152] ; CHECK-NEXT: mov.b v3[9], w8 -; CHECK-NEXT: ldr w8, [sp, #264] +; CHECK-NEXT: ldr w8, [sp, #280] ; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #272] +; CHECK-NEXT: ldr w10, [sp, #288] ; CHECK-NEXT: mov.b v1[9], w13 -; CHECK-NEXT: ldr w13, [sp, #16] +; CHECK-NEXT: ldr w13, [sp, #32] ; CHECK-NEXT: mov.b v2[9], w8 -; CHECK-NEXT: ldr w8, [sp, #144] +; CHECK-NEXT: ldr w8, [sp, #160] ; CHECK-NEXT: mov.b v3[10], w14 -; CHECK-NEXT: ldr w14, [sp, #280] +; CHECK-NEXT: ldr w14, [sp, #296] ; CHECK-NEXT: mov.b v0[10], w13 -; CHECK-NEXT: ldr w13, [sp, #296] +; CHECK-NEXT: ldr w13, [sp, #312] ; CHECK-NEXT: mov.b v1[10], w8 -; CHECK-NEXT: ldr w8, [sp, #24] +; CHECK-NEXT: ldr w8, [sp, #40] ; CHECK-NEXT: mov.b v2[10], w10 -; CHECK-NEXT: ldr w10, [sp, #152] +; CHECK-NEXT: ldr w10, [sp, #168] ; CHECK-NEXT: mov.b v3[11], w11 -; CHECK-NEXT: ldr w11, [sp, #288] +; CHECK-NEXT: ldr w11, [sp, #304] ; CHECK-NEXT: mov.b v0[11], w8 -; CHECK-NEXT: ldr w8, [sp, #32] +; CHECK-NEXT: ldr w8, [sp, #48] ; CHECK-NEXT: mov.b v1[11], w10 -; CHECK-NEXT: ldr w10, [sp, #160] +; CHECK-NEXT: ldr w10, [sp, #176] ; CHECK-NEXT: mov.b v2[11], w14 ; CHECK-NEXT: mov.b v3[12], w15 ; CHECK-NEXT: mov.b v0[12], w8 -; CHECK-NEXT: ldr w8, [sp, #40] +; CHECK-NEXT: ldr w8, [sp, #56] ; CHECK-NEXT: mov.b v1[12], w10 -; CHECK-NEXT: ldr w10, [sp, #168] +; CHECK-NEXT: ldr w10, [sp, #184] ; CHECK-NEXT: mov.b v2[12], w11 -; CHECK-NEXT: ldr w11, [sp, #312] +; CHECK-NEXT: ldr w11, [sp, #328] ; CHECK-NEXT: mov.b v3[13], w9 -; CHECK-NEXT: ldr w9, [sp, #304] +; CHECK-NEXT: ldr w9, [sp, #320] ; CHECK-NEXT: mov.b v0[13], w8 -; CHECK-NEXT: ldr w8, [sp, #48] +; CHECK-NEXT: ldr w8, [sp, #64] ; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ldr w10, [sp, #176] +; CHECK-NEXT: ldr w10, [sp, #192] ; CHECK-NEXT: mov.b v2[13], w13 ; CHECK-NEXT: mov.b v3[14], w16 ; CHECK-NEXT: mov.b v0[14], w8 -; CHECK-NEXT: ldr w8, [sp, #56] +; CHECK-NEXT: ldr w8, [sp, #72] ; CHECK-NEXT: mov.b v1[14], w10 ; CHECK-NEXT: mov.b v2[14], w9 -; CHECK-NEXT: ldr w9, [sp, #184] +; CHECK-NEXT: ldr w9, [sp, #200] ; CHECK-NEXT: movi.16b v4, #1 ; CHECK-NEXT: mov.b v0[15], w8 ; CHECK-NEXT: mov.b v1[15], w9 @@ -483,6 +486,7 @@ ; CHECK-NEXT: and.16b v1, v1, v4 ; CHECK-NEXT: and.16b v2, v2, v4 ; CHECK-NEXT: and.16b v3, v3, v4 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %res = zext <64 x i1> %arg to <64 x i8> ret <64 x i8> %res @@ -491,121 +495,124 @@ define <64 x i8> @sext_v64i1(<64 x i1> %arg) { ; CHECK-LABEL: sext_v64i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [sp, #320] +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr w8, [sp, #336] ; CHECK-NEXT: fmov s3, w0 -; CHECK-NEXT: ldr w9, [sp, #64] -; CHECK-NEXT: ldr w10, [sp, #192] +; CHECK-NEXT: ldr w9, [sp, #80] +; CHECK-NEXT: ldr w10, [sp, #208] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldr w8, [sp, #328] +; CHECK-NEXT: ldr w8, [sp, #344] ; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: ldr w9, [sp, #88] ; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: ldr w10, [sp, #80] +; CHECK-NEXT: ldr w10, [sp, #96] ; CHECK-NEXT: mov.b v0[1], w8 -; CHECK-NEXT: ldr w8, [sp, #200] +; CHECK-NEXT: ldr w8, [sp, #216] ; CHECK-NEXT: mov.b v1[1], w9 -; CHECK-NEXT: ldr w9, [sp, #336] +; CHECK-NEXT: ldr w9, [sp, #352] ; CHECK-NEXT: mov.b v3[1], w1 -; CHECK-NEXT: ldr w11, [sp, #88] +; CHECK-NEXT: ldr w11, [sp, #104] ; CHECK-NEXT: mov.b v2[1], w8 -; CHECK-NEXT: ldr w8, [sp, #344] +; CHECK-NEXT: ldr w8, [sp, #360] ; CHECK-NEXT: mov.b v0[2], w9 -; CHECK-NEXT: ldr w9, [sp, #208] +; CHECK-NEXT: ldr w9, [sp, #224] ; CHECK-NEXT: mov.b v1[2], w10 -; CHECK-NEXT: ldr w10, [sp, #352] +; CHECK-NEXT: ldr w10, [sp, #368] ; CHECK-NEXT: mov.b v3[2], w2 -; CHECK-NEXT: ldr w12, [sp, #96] +; CHECK-NEXT: ldr w12, [sp, #112] ; CHECK-NEXT: mov.b v2[2], w9 -; CHECK-NEXT: ldr w9, [sp, #360] +; CHECK-NEXT: ldr w9, [sp, #376] ; CHECK-NEXT: mov.b v0[3], w8 -; CHECK-NEXT: ldr w8, [sp, #216] +; CHECK-NEXT: ldr w8, [sp, #232] ; CHECK-NEXT: mov.b v1[3], w11 -; CHECK-NEXT: ldr w13, [sp, #104] +; CHECK-NEXT: ldr w13, [sp, #120] ; CHECK-NEXT: mov.b v3[3], w3 -; CHECK-NEXT: ldr w11, [sp, #368] +; CHECK-NEXT: ldr w11, [sp, #384] ; CHECK-NEXT: mov.b v2[3], w8 -; CHECK-NEXT: ldr w14, [sp, #112] +; CHECK-NEXT: ldr w14, [sp, #128] ; CHECK-NEXT: mov.b v0[4], w10 -; CHECK-NEXT: ldr w10, [sp, #224] +; CHECK-NEXT: ldr w10, [sp, #240] ; CHECK-NEXT: mov.b v1[4], w12 -; CHECK-NEXT: ldr w8, [sp, #376] +; CHECK-NEXT: ldr w8, [sp, #392] ; CHECK-NEXT: mov.b v3[4], w4 -; CHECK-NEXT: ldr w15, [sp, #120] +; CHECK-NEXT: ldr w15, [sp, #136] ; CHECK-NEXT: mov.b v2[4], w10 -; CHECK-NEXT: ldr w12, [sp, #384] +; CHECK-NEXT: ldr w12, [sp, #400] ; CHECK-NEXT: mov.b v0[5], w9 -; CHECK-NEXT: ldr w9, [sp, #232] +; CHECK-NEXT: ldr w9, [sp, #248] ; CHECK-NEXT: mov.b v1[5], w13 -; CHECK-NEXT: ldr w16, [sp, #128] +; CHECK-NEXT: ldr w16, [sp, #144] ; CHECK-NEXT: mov.b v3[5], w5 -; CHECK-NEXT: ldr w10, [sp, #392] +; CHECK-NEXT: ldr w10, [sp, #408] ; CHECK-NEXT: mov.b v2[5], w9 -; CHECK-NEXT: ldr w13, [sp, #400] +; CHECK-NEXT: ldr w13, [sp, #416] ; CHECK-NEXT: mov.b v0[6], w11 -; CHECK-NEXT: ldr w11, [sp, #240] +; CHECK-NEXT: ldr w11, [sp, #256] ; CHECK-NEXT: mov.b v1[6], w14 -; CHECK-NEXT: ldr w9, [sp, #408] +; CHECK-NEXT: ldr w9, [sp, #424] ; CHECK-NEXT: mov.b v3[6], w6 -; CHECK-NEXT: ldr w14, [sp, #416] +; CHECK-NEXT: ldr w14, [sp, #432] ; CHECK-NEXT: mov.b v2[6], w11 -; CHECK-NEXT: ldr w11, [sp, #424] +; CHECK-NEXT: ldr w11, [sp, #440] ; CHECK-NEXT: mov.b v0[7], w8 -; CHECK-NEXT: ldr w8, [sp, #248] +; CHECK-NEXT: ldr w8, [sp, #264] ; CHECK-NEXT: mov.b v1[7], w15 -; CHECK-NEXT: ldr w15, [sp, #432] +; CHECK-NEXT: ldr w15, [sp, #448] ; CHECK-NEXT: mov.b v3[7], w7 ; CHECK-NEXT: mov.b v2[7], w8 -; CHECK-NEXT: ldr w8, [sp] +; CHECK-NEXT: ldr w8, [sp, #16] ; CHECK-NEXT: mov.b v0[8], w12 -; CHECK-NEXT: ldr w12, [sp, #256] +; CHECK-NEXT: ldr w12, [sp, #272] ; CHECK-NEXT: mov.b v1[8], w16 -; CHECK-NEXT: ldr w16, [sp, #440] +; CHECK-NEXT: ldr w16, [sp, #456] ; CHECK-NEXT: mov.b v3[8], w8 -; CHECK-NEXT: ldr w8, [sp, #136] +; CHECK-NEXT: ldr w8, [sp, #152] ; CHECK-NEXT: mov.b v2[8], w12 -; CHECK-NEXT: ldr w12, [sp, #8] +; CHECK-NEXT: ldr w12, [sp, #24] ; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #264] +; CHECK-NEXT: ldr w10, [sp, #280] ; CHECK-NEXT: mov.b v1[9], w8 -; CHECK-NEXT: ldr w8, [sp, #272] +; CHECK-NEXT: ldr w8, [sp, #288] ; CHECK-NEXT: mov.b v3[9], w12 -; CHECK-NEXT: ldr w12, [sp, #144] +; CHECK-NEXT: ldr w12, [sp, #160] ; CHECK-NEXT: mov.b v2[9], w10 -; CHECK-NEXT: ldr w10, [sp, #16] +; CHECK-NEXT: ldr w10, [sp, #32] ; CHECK-NEXT: mov.b v0[10], w13 -; CHECK-NEXT: ldr w13, [sp, #280] +; CHECK-NEXT: ldr w13, [sp, #296] ; CHECK-NEXT: mov.b v1[10], w12 -; CHECK-NEXT: ldr w12, [sp, #152] +; CHECK-NEXT: ldr w12, [sp, #168] ; CHECK-NEXT: mov.b v3[10], w10 -; CHECK-NEXT: ldr w10, [sp, #160] +; CHECK-NEXT: ldr w10, [sp, #176] ; CHECK-NEXT: mov.b v2[10], w8 -; CHECK-NEXT: ldr w8, [sp, #24] +; CHECK-NEXT: ldr w8, [sp, #40] ; CHECK-NEXT: mov.b v0[11], w9 -; CHECK-NEXT: ldr w9, [sp, #288] +; CHECK-NEXT: ldr w9, [sp, #304] ; CHECK-NEXT: mov.b v1[11], w12 -; CHECK-NEXT: ldr w12, [sp, #296] +; CHECK-NEXT: ldr w12, [sp, #312] ; CHECK-NEXT: mov.b v3[11], w8 -; CHECK-NEXT: ldr w8, [sp, #32] +; CHECK-NEXT: ldr w8, [sp, #48] ; CHECK-NEXT: mov.b v2[11], w13 ; CHECK-NEXT: mov.b v0[12], w14 ; CHECK-NEXT: mov.b v1[12], w10 -; CHECK-NEXT: ldr w10, [sp, #168] +; CHECK-NEXT: ldr w10, [sp, #184] ; CHECK-NEXT: mov.b v3[12], w8 -; CHECK-NEXT: ldr w8, [sp, #40] +; CHECK-NEXT: ldr w8, [sp, #56] ; CHECK-NEXT: mov.b v2[12], w9 -; CHECK-NEXT: ldr w9, [sp, #304] +; CHECK-NEXT: ldr w9, [sp, #320] ; CHECK-NEXT: mov.b v0[13], w11 -; CHECK-NEXT: ldr w11, [sp, #312] +; CHECK-NEXT: ldr w11, [sp, #328] ; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ldr w10, [sp, #176] +; CHECK-NEXT: ldr w10, [sp, #192] ; CHECK-NEXT: mov.b v3[13], w8 -; CHECK-NEXT: ldr w8, [sp, #48] +; CHECK-NEXT: ldr w8, [sp, #64] ; CHECK-NEXT: mov.b v2[13], w12 ; CHECK-NEXT: mov.b v0[14], w15 ; CHECK-NEXT: mov.b v1[14], w10 -; CHECK-NEXT: ldr w10, [sp, #184] +; CHECK-NEXT: ldr w10, [sp, #200] ; CHECK-NEXT: mov.b v3[14], w8 -; CHECK-NEXT: ldr w8, [sp, #56] +; CHECK-NEXT: ldr w8, [sp, #72] ; CHECK-NEXT: mov.b v2[14], w9 ; CHECK-NEXT: mov.b v0[15], w16 ; CHECK-NEXT: mov.b v1[15], w10 @@ -619,6 +626,7 @@ ; CHECK-NEXT: cmlt.16b v1, v1, #0 ; CHECK-NEXT: cmlt.16b v2, v2, #0 ; CHECK-NEXT: cmlt.16b v3, v4, #0 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %res = sext <64 x i1> %arg to <64 x i8> ret <64 x i8> %res Index: llvm/test/CodeGen/AArch64/neon-dotreduce.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-dotreduce.ll +++ llvm/test/CodeGen/AArch64/neon-dotreduce.ll @@ -652,272 +652,275 @@ define i32 @test_sdot_v33i8_double(<33 x i8> %a, <33 x i8> %b, <33 x i8> %c, <33 x i8> %d) { ; CHECK-LABEL: test_sdot_v33i8_double: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [sp, #64] -; CHECK-NEXT: add x8, sp, #72 -; CHECK-NEXT: ldr b3, [sp] -; CHECK-NEXT: add x9, sp, #136 -; CHECK-NEXT: ldr b2, [sp, #128] -; CHECK-NEXT: add x10, sp, #88 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr b0, [sp, #80] +; CHECK-NEXT: add x8, sp, #88 +; CHECK-NEXT: ldr b3, [sp, #16] +; CHECK-NEXT: add x9, sp, #152 +; CHECK-NEXT: ldr b2, [sp, #144] +; CHECK-NEXT: add x10, sp, #104 ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #80 -; CHECK-NEXT: ldr b4, [sp, #328] -; CHECK-NEXT: add x11, sp, #352 +; CHECK-NEXT: add x8, sp, #96 +; CHECK-NEXT: ldr b4, [sp, #344] +; CHECK-NEXT: add x11, sp, #368 ; CHECK-NEXT: ld1 { v2.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #144 -; CHECK-NEXT: ldr b7, [sp, #200] +; CHECK-NEXT: add x9, sp, #160 +; CHECK-NEXT: ldr b7, [sp, #216] ; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: ld1 { v0.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #8 -; CHECK-NEXT: ldr b17, [sp, #264] +; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: ldr b17, [sp, #280] ; CHECK-NEXT: ld1 { v2.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #152 +; CHECK-NEXT: add x9, sp, #168 ; CHECK-NEXT: ld1 { v3.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: add x8, sp, #32 ; CHECK-NEXT: ld1 { v0.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #96 -; CHECK-NEXT: ldr b16, [sp, #392] +; CHECK-NEXT: add x10, sp, #112 +; CHECK-NEXT: ldr b16, [sp, #408] ; CHECK-NEXT: ld1 { v2.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #160 +; CHECK-NEXT: add x9, sp, #176 ; CHECK-NEXT: ld1 { v3.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: add x8, sp, #40 ; CHECK-NEXT: ld1 { v0.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #104 -; CHECK-NEXT: ldr b5, [sp, #192] +; CHECK-NEXT: add x10, sp, #120 +; CHECK-NEXT: ldr b5, [sp, #208] ; CHECK-NEXT: ld1 { v2.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #168 +; CHECK-NEXT: add x9, sp, #184 ; CHECK-NEXT: ld1 { v3.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #32 +; CHECK-NEXT: add x8, sp, #48 ; CHECK-NEXT: ld1 { v0.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #112 +; CHECK-NEXT: add x10, sp, #128 ; CHECK-NEXT: sshll v5.8h, v5.8b, #0 -; CHECK-NEXT: ldr b20, [sp, #856] +; CHECK-NEXT: ldr b20, [sp, #872] ; CHECK-NEXT: ld1 { v2.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #176 +; CHECK-NEXT: add x9, sp, #192 ; CHECK-NEXT: ld1 { v3.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #40 +; CHECK-NEXT: add x8, sp, #56 ; CHECK-NEXT: ld1 { v0.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #48 +; CHECK-NEXT: add x10, sp, #64 ; CHECK-NEXT: mov v1.b[1], w1 -; CHECK-NEXT: ldr b21, [sp, #728] +; CHECK-NEXT: ldr b21, [sp, #744] ; CHECK-NEXT: ld1 { v2.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #184 +; CHECK-NEXT: add x9, sp, #200 ; CHECK-NEXT: ld1 { v3.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #120 +; CHECK-NEXT: add x8, sp, #136 ; CHECK-NEXT: mov v1.b[2], w2 ; CHECK-NEXT: ld1 { v0.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #56 +; CHECK-NEXT: add x8, sp, #72 ; CHECK-NEXT: ld1 { v3.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #272 +; CHECK-NEXT: add x10, sp, #288 ; CHECK-NEXT: ld1 { v2.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #208 +; CHECK-NEXT: add x9, sp, #224 ; CHECK-NEXT: mov v1.b[3], w3 ; CHECK-NEXT: ld1 { v17.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #224 +; CHECK-NEXT: add x10, sp, #240 ; CHECK-NEXT: ld1 { v3.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #336 +; CHECK-NEXT: add x8, sp, #352 ; CHECK-NEXT: ld1 { v7.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #400 +; CHECK-NEXT: add x9, sp, #416 ; CHECK-NEXT: sshll v19.8h, v2.8b, #0 -; CHECK-NEXT: ldr b2, [sp, #456] +; CHECK-NEXT: ldr b2, [sp, #472] ; CHECK-NEXT: ld1 { v4.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #344 +; CHECK-NEXT: add x8, sp, #360 ; CHECK-NEXT: ld1 { v16.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #408 +; CHECK-NEXT: add x9, sp, #424 ; CHECK-NEXT: sshll v3.8h, v3.8b, #0 ; CHECK-NEXT: mov v1.b[4], w4 ; CHECK-NEXT: ld1 { v4.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #216 +; CHECK-NEXT: add x8, sp, #232 ; CHECK-NEXT: ld1 { v16.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #416 +; CHECK-NEXT: add x9, sp, #432 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-NEXT: ld1 { v7.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #280 +; CHECK-NEXT: add x8, sp, #296 ; CHECK-NEXT: ld1 { v4.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #360 +; CHECK-NEXT: add x11, sp, #376 ; CHECK-NEXT: ld1 { v16.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #424 +; CHECK-NEXT: add x9, sp, #440 ; CHECK-NEXT: ld1 { v17.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #288 +; CHECK-NEXT: add x8, sp, #304 ; CHECK-NEXT: ld1 { v7.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #232 +; CHECK-NEXT: add x10, sp, #248 ; CHECK-NEXT: ld1 { v4.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #368 +; CHECK-NEXT: add x11, sp, #384 ; CHECK-NEXT: ld1 { v16.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #432 +; CHECK-NEXT: add x9, sp, #448 ; CHECK-NEXT: ld1 { v17.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #296 +; CHECK-NEXT: add x8, sp, #312 ; CHECK-NEXT: ld1 { v7.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #240 +; CHECK-NEXT: add x10, sp, #256 ; CHECK-NEXT: ld1 { v4.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #376 +; CHECK-NEXT: add x11, sp, #392 ; CHECK-NEXT: ld1 { v16.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #440 +; CHECK-NEXT: add x9, sp, #456 ; CHECK-NEXT: ld1 { v17.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #304 +; CHECK-NEXT: add x8, sp, #320 ; CHECK-NEXT: ld1 { v7.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #248 +; CHECK-NEXT: add x10, sp, #264 ; CHECK-NEXT: ld1 { v4.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #384 +; CHECK-NEXT: add x11, sp, #400 ; CHECK-NEXT: ld1 { v16.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #448 +; CHECK-NEXT: add x9, sp, #464 ; CHECK-NEXT: ld1 { v17.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #312 +; CHECK-NEXT: add x8, sp, #328 ; CHECK-NEXT: ld1 { v7.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #256 +; CHECK-NEXT: add x10, sp, #272 ; CHECK-NEXT: ld1 { v4.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #480 +; CHECK-NEXT: add x11, sp, #496 ; CHECK-NEXT: ld1 { v16.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #472 +; CHECK-NEXT: add x9, sp, #488 ; CHECK-NEXT: ld1 { v17.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #320 +; CHECK-NEXT: add x8, sp, #336 ; CHECK-NEXT: ld1 { v7.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #624 +; CHECK-NEXT: add x10, sp, #640 ; CHECK-NEXT: sshll v6.8h, v4.8b, #0 ; CHECK-NEXT: sshll v16.8h, v16.8b, #0 ; CHECK-NEXT: ld1 { v17.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #600 +; CHECK-NEXT: add x8, sp, #616 ; CHECK-NEXT: sshll v18.8h, v7.8b, #0 -; CHECK-NEXT: ldr b7, [sp, #592] +; CHECK-NEXT: ldr b7, [sp, #608] ; CHECK-NEXT: mov v1.b[5], w5 ; CHECK-NEXT: ld1 { v7.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #608 +; CHECK-NEXT: add x8, sp, #624 ; CHECK-NEXT: sshll v4.8h, v17.8b, #0 ; CHECK-NEXT: sshll v17.8h, v2.8b, #0 ; CHECK-NEXT: smull2 v2.4s, v3.8h, v4.8h ; CHECK-NEXT: smull v3.4s, v3.4h, v4.4h ; CHECK-NEXT: ld1 { v7.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #616 +; CHECK-NEXT: add x8, sp, #632 ; CHECK-NEXT: smull v4.4s, v5.4h, v17.4h -; CHECK-NEXT: ldr b17, [sp, #528] +; CHECK-NEXT: ldr b17, [sp, #544] ; CHECK-NEXT: smlal2 v2.4s, v19.8h, v16.8h ; CHECK-NEXT: smlal v3.4s, v19.4h, v16.4h -; CHECK-NEXT: ldr b16, [sp, #464] +; CHECK-NEXT: ldr b16, [sp, #480] ; CHECK-NEXT: ld1 { v7.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #536 -; CHECK-NEXT: ldr b19, [sp, #656] +; CHECK-NEXT: add x8, sp, #552 +; CHECK-NEXT: ldr b19, [sp, #672] ; CHECK-NEXT: ld1 { v16.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #664 +; CHECK-NEXT: add x9, sp, #680 ; CHECK-NEXT: ld1 { v17.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #544 +; CHECK-NEXT: add x8, sp, #560 ; CHECK-NEXT: ld1 { v7.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #632 +; CHECK-NEXT: add x10, sp, #648 ; CHECK-NEXT: ld1 { v19.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #672 +; CHECK-NEXT: add x9, sp, #688 ; CHECK-NEXT: ld1 { v16.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #488 +; CHECK-NEXT: add x11, sp, #504 ; CHECK-NEXT: ld1 { v17.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #552 +; CHECK-NEXT: add x8, sp, #568 ; CHECK-NEXT: ld1 { v7.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #640 +; CHECK-NEXT: add x10, sp, #656 ; CHECK-NEXT: ld1 { v19.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #680 +; CHECK-NEXT: add x9, sp, #696 ; CHECK-NEXT: ld1 { v16.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #496 +; CHECK-NEXT: add x11, sp, #512 ; CHECK-NEXT: ld1 { v17.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #560 +; CHECK-NEXT: add x8, sp, #576 ; CHECK-NEXT: mov v4.s[1], wzr ; CHECK-NEXT: ld1 { v7.b }[6], [x10] ; CHECK-NEXT: ld1 { v19.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #688 +; CHECK-NEXT: add x9, sp, #704 ; CHECK-NEXT: ld1 { v16.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #504 +; CHECK-NEXT: add x11, sp, #520 ; CHECK-NEXT: ld1 { v17.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #568 +; CHECK-NEXT: add x8, sp, #584 ; CHECK-NEXT: mov v1.b[6], w6 -; CHECK-NEXT: add x10, sp, #648 +; CHECK-NEXT: add x10, sp, #664 ; CHECK-NEXT: ld1 { v19.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #696 +; CHECK-NEXT: add x9, sp, #712 ; CHECK-NEXT: mov v4.s[2], wzr ; CHECK-NEXT: ld1 { v16.b }[5], [x11] ; CHECK-NEXT: ld1 { v17.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #864 +; CHECK-NEXT: add x8, sp, #880 ; CHECK-NEXT: mov v1.b[7], w7 ; CHECK-NEXT: ld1 { v7.b }[7], [x10] ; CHECK-NEXT: ld1 { v19.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #704 +; CHECK-NEXT: add x9, sp, #720 ; CHECK-NEXT: ld1 { v20.b }[1], [x8] -; CHECK-NEXT: add x10, sp, #512 -; CHECK-NEXT: add x8, sp, #872 -; CHECK-NEXT: add x11, sp, #576 +; CHECK-NEXT: add x10, sp, #528 +; CHECK-NEXT: add x8, sp, #888 +; CHECK-NEXT: add x11, sp, #592 ; CHECK-NEXT: mov v4.s[3], wzr ; CHECK-NEXT: ld1 { v19.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #712 +; CHECK-NEXT: add x9, sp, #728 ; CHECK-NEXT: ld1 { v16.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #520 +; CHECK-NEXT: add x10, sp, #536 ; CHECK-NEXT: ld1 { v20.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #736 +; CHECK-NEXT: add x8, sp, #752 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0 ; CHECK-NEXT: ld1 { v17.b }[6], [x11] ; CHECK-NEXT: ld1 { v19.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #928 +; CHECK-NEXT: add x9, sp, #944 ; CHECK-NEXT: smull2 v5.4s, v1.8h, v18.8h ; CHECK-NEXT: ld1 { v21.b }[1], [x8] ; CHECK-NEXT: smlal v4.4s, v1.4h, v18.4h -; CHECK-NEXT: ldr b1, [sp, #920] -; CHECK-NEXT: add x11, sp, #584 +; CHECK-NEXT: ldr b1, [sp, #936] +; CHECK-NEXT: add x11, sp, #600 ; CHECK-NEXT: ld1 { v16.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #880 -; CHECK-NEXT: add x8, sp, #744 +; CHECK-NEXT: add x10, sp, #896 +; CHECK-NEXT: add x8, sp, #760 ; CHECK-NEXT: ld1 { v1.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #936 +; CHECK-NEXT: add x9, sp, #952 ; CHECK-NEXT: ld1 { v17.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #800 +; CHECK-NEXT: add x11, sp, #816 ; CHECK-NEXT: smlal2 v5.4s, v0.8h, v6.8h ; CHECK-NEXT: ld1 { v20.b }[3], [x10] ; CHECK-NEXT: smlal v4.4s, v0.4h, v6.4h ; CHECK-NEXT: ld1 { v21.b }[2], [x8] -; CHECK-NEXT: ldr b0, [sp, #792] -; CHECK-NEXT: add x10, sp, #888 -; CHECK-NEXT: add x8, sp, #752 +; CHECK-NEXT: ldr b0, [sp, #808] +; CHECK-NEXT: add x10, sp, #904 +; CHECK-NEXT: add x8, sp, #768 ; CHECK-NEXT: ld1 { v1.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #944 -; CHECK-NEXT: ldr b18, [sp, #720] +; CHECK-NEXT: add x9, sp, #960 +; CHECK-NEXT: ldr b18, [sp, #736] ; CHECK-NEXT: ld1 { v0.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #808 +; CHECK-NEXT: add x11, sp, #824 ; CHECK-NEXT: ld1 { v20.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #896 +; CHECK-NEXT: add x10, sp, #912 ; CHECK-NEXT: ld1 { v21.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #760 +; CHECK-NEXT: add x8, sp, #776 ; CHECK-NEXT: ld1 { v1.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #952 +; CHECK-NEXT: add x9, sp, #968 ; CHECK-NEXT: ld1 { v0.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #816 +; CHECK-NEXT: add x11, sp, #832 ; CHECK-NEXT: ld1 { v20.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #904 +; CHECK-NEXT: add x10, sp, #920 ; CHECK-NEXT: ld1 { v21.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #768 +; CHECK-NEXT: add x8, sp, #784 ; CHECK-NEXT: sshll v6.8h, v7.8b, #0 ; CHECK-NEXT: ld1 { v1.b }[4], [x9] ; CHECK-NEXT: sshll v7.8h, v16.8b, #0 ; CHECK-NEXT: ld1 { v0.b }[3], [x11] ; CHECK-NEXT: sshll v16.8h, v18.8b, #0 -; CHECK-NEXT: ldr b18, [sp, #984] +; CHECK-NEXT: ldr b18, [sp, #1000] ; CHECK-NEXT: ld1 { v20.b }[6], [x10] -; CHECK-NEXT: add x9, sp, #960 +; CHECK-NEXT: add x9, sp, #976 ; CHECK-NEXT: ld1 { v21.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #824 +; CHECK-NEXT: add x8, sp, #840 ; CHECK-NEXT: sshll v18.8h, v18.8b, #0 -; CHECK-NEXT: add x10, sp, #912 +; CHECK-NEXT: add x10, sp, #928 ; CHECK-NEXT: ld1 { v1.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #776 +; CHECK-NEXT: add x9, sp, #792 ; CHECK-NEXT: smull v16.4s, v16.4h, v18.4h ; CHECK-NEXT: ld1 { v0.b }[4], [x8] ; CHECK-NEXT: ld1 { v20.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #968 -; CHECK-NEXT: add x8, sp, #832 +; CHECK-NEXT: add x10, sp, #984 +; CHECK-NEXT: add x8, sp, #848 ; CHECK-NEXT: ld1 { v21.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #784 +; CHECK-NEXT: add x9, sp, #800 ; CHECK-NEXT: mov v16.s[1], wzr ; CHECK-NEXT: ld1 { v1.b }[6], [x10] ; CHECK-NEXT: ld1 { v0.b }[5], [x8] -; CHECK-NEXT: add x10, sp, #976 -; CHECK-NEXT: add x8, sp, #840 +; CHECK-NEXT: add x10, sp, #992 +; CHECK-NEXT: add x8, sp, #856 ; CHECK-NEXT: ld1 { v21.b }[7], [x9] ; CHECK-NEXT: sshll v18.8h, v19.8b, #0 ; CHECK-NEXT: mov v16.s[2], wzr ; CHECK-NEXT: ld1 { v1.b }[7], [x10] ; CHECK-NEXT: ld1 { v0.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #848 +; CHECK-NEXT: add x8, sp, #864 ; CHECK-NEXT: sshll v19.8h, v20.8b, #0 ; CHECK-NEXT: sshll v21.8h, v21.8b, #0 ; CHECK-NEXT: mov v16.s[3], wzr @@ -942,6 +945,7 @@ ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %az = sext <33 x i8> %a to <33 x i32> @@ -959,135 +963,138 @@ define i32 @test_sdot_v33i8_double_nomla(<33 x i8> %a, <33 x i8> %b, <33 x i8> %c, <33 x i8> %d) { ; CHECK-LABEL: test_sdot_v33i8_double_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [sp, #64] -; CHECK-NEXT: add x8, sp, #72 -; CHECK-NEXT: ldr b3, [sp, #128] -; CHECK-NEXT: add x10, sp, #80 -; CHECK-NEXT: ldr b5, [sp] -; CHECK-NEXT: add x9, sp, #8 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr b0, [sp, #80] +; CHECK-NEXT: add x8, sp, #88 +; CHECK-NEXT: ldr b3, [sp, #144] +; CHECK-NEXT: add x10, sp, #96 +; CHECK-NEXT: ldr b5, [sp, #16] +; CHECK-NEXT: add x9, sp, #24 ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #136 +; CHECK-NEXT: add x8, sp, #152 ; CHECK-NEXT: fmov s2, w0 -; CHECK-NEXT: ldr b1, [sp, #192] +; CHECK-NEXT: ldr b1, [sp, #208] ; CHECK-NEXT: ld1 { v5.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #16 +; CHECK-NEXT: add x9, sp, #32 ; CHECK-NEXT: ld1 { v3.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #144 +; CHECK-NEXT: add x8, sp, #160 ; CHECK-NEXT: ld1 { v0.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #88 +; CHECK-NEXT: add x10, sp, #104 ; CHECK-NEXT: mov v2.b[1], w1 -; CHECK-NEXT: ldr b17, [sp, #656] +; CHECK-NEXT: ldr b17, [sp, #672] ; CHECK-NEXT: ld1 { v5.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #24 +; CHECK-NEXT: add x9, sp, #40 ; CHECK-NEXT: ld1 { v3.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #152 +; CHECK-NEXT: add x8, sp, #168 ; CHECK-NEXT: ld1 { v0.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #96 +; CHECK-NEXT: add x10, sp, #112 ; CHECK-NEXT: mov v2.b[2], w2 -; CHECK-NEXT: add x11, sp, #632 +; CHECK-NEXT: add x11, sp, #648 ; CHECK-NEXT: ld1 { v5.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #32 +; CHECK-NEXT: add x9, sp, #48 ; CHECK-NEXT: ld1 { v3.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #160 +; CHECK-NEXT: add x8, sp, #176 ; CHECK-NEXT: ld1 { v0.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #104 +; CHECK-NEXT: add x10, sp, #120 ; CHECK-NEXT: mov v2.b[3], w3 ; CHECK-NEXT: ld1 { v5.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #40 +; CHECK-NEXT: add x9, sp, #56 ; CHECK-NEXT: ld1 { v3.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #168 +; CHECK-NEXT: add x8, sp, #184 ; CHECK-NEXT: ld1 { v0.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #112 +; CHECK-NEXT: add x10, sp, #128 ; CHECK-NEXT: mov v2.b[4], w4 ; CHECK-NEXT: ld1 { v5.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #48 +; CHECK-NEXT: add x9, sp, #64 ; CHECK-NEXT: ld1 { v3.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #176 +; CHECK-NEXT: add x8, sp, #192 ; CHECK-NEXT: ld1 { v0.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #120 +; CHECK-NEXT: add x10, sp, #136 ; CHECK-NEXT: sshll v4.8h, v1.8b, #0 ; CHECK-NEXT: mov v2.b[5], w5 ; CHECK-NEXT: ld1 { v5.b }[6], [x9] ; CHECK-NEXT: ld1 { v3.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #184 +; CHECK-NEXT: add x8, sp, #200 ; CHECK-NEXT: ld1 { v0.b }[7], [x10] -; CHECK-NEXT: add x9, sp, #56 -; CHECK-NEXT: add x10, sp, #664 +; CHECK-NEXT: add x9, sp, #72 +; CHECK-NEXT: add x10, sp, #680 ; CHECK-NEXT: mov v2.b[6], w6 ; CHECK-NEXT: ld1 { v3.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #600 +; CHECK-NEXT: add x8, sp, #616 ; CHECK-NEXT: sshll v1.8h, v0.8b, #0 -; CHECK-NEXT: ldr b0, [sp, #592] +; CHECK-NEXT: ldr b0, [sp, #608] ; CHECK-NEXT: ld1 { v5.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #472 +; CHECK-NEXT: add x9, sp, #488 ; CHECK-NEXT: mov v2.b[7], w7 ; CHECK-NEXT: ld1 { v17.b }[1], [x10] ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #608 +; CHECK-NEXT: add x8, sp, #624 ; CHECK-NEXT: sshll v7.8h, v3.8b, #0 -; CHECK-NEXT: add x10, sp, #488 +; CHECK-NEXT: add x10, sp, #504 ; CHECK-NEXT: sshll v16.8h, v5.8b, #0 -; CHECK-NEXT: ldr b5, [sp, #464] +; CHECK-NEXT: ldr b5, [sp, #480] ; CHECK-NEXT: sshll v6.8h, v2.8b, #0 ; CHECK-NEXT: ld1 { v0.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #616 +; CHECK-NEXT: add x8, sp, #632 ; CHECK-NEXT: saddl2 v2.4s, v16.8h, v7.8h ; CHECK-NEXT: ld1 { v5.b }[1], [x9] ; CHECK-NEXT: saddl v7.4s, v16.4h, v7.4h -; CHECK-NEXT: ldr b16, [sp, #528] -; CHECK-NEXT: add x9, sp, #536 +; CHECK-NEXT: ldr b16, [sp, #544] +; CHECK-NEXT: add x9, sp, #552 ; CHECK-NEXT: ld1 { v0.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #624 +; CHECK-NEXT: add x8, sp, #640 ; CHECK-NEXT: sshll v4.4s, v4.4h, #0 ; CHECK-NEXT: ld1 { v16.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #544 +; CHECK-NEXT: add x9, sp, #560 ; CHECK-NEXT: mov v4.s[1], wzr ; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #480 +; CHECK-NEXT: add x8, sp, #496 ; CHECK-NEXT: saddl2 v3.4s, v6.8h, v1.8h ; CHECK-NEXT: ld1 { v16.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #552 +; CHECK-NEXT: add x9, sp, #568 ; CHECK-NEXT: ld1 { v5.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #672 +; CHECK-NEXT: add x8, sp, #688 ; CHECK-NEXT: mov v4.s[2], wzr ; CHECK-NEXT: ld1 { v0.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #640 +; CHECK-NEXT: add x11, sp, #656 ; CHECK-NEXT: ld1 { v17.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #680 +; CHECK-NEXT: add x8, sp, #696 ; CHECK-NEXT: ld1 { v5.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #496 +; CHECK-NEXT: add x10, sp, #512 ; CHECK-NEXT: mov v4.s[3], wzr ; CHECK-NEXT: ld1 { v16.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #560 +; CHECK-NEXT: add x9, sp, #576 ; CHECK-NEXT: ld1 { v0.b }[6], [x11] ; CHECK-NEXT: ld1 { v17.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #688 +; CHECK-NEXT: add x8, sp, #704 ; CHECK-NEXT: ld1 { v5.b }[4], [x10] -; CHECK-NEXT: add x11, sp, #504 +; CHECK-NEXT: add x11, sp, #520 ; CHECK-NEXT: saddw v4.4s, v4.4s, v6.4h -; CHECK-NEXT: ldr b6, [sp, #720] +; CHECK-NEXT: ldr b6, [sp, #736] ; CHECK-NEXT: ld1 { v16.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #568 +; CHECK-NEXT: add x9, sp, #584 ; CHECK-NEXT: ld1 { v17.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #696 -; CHECK-NEXT: add x10, sp, #648 +; CHECK-NEXT: add x8, sp, #712 +; CHECK-NEXT: add x10, sp, #664 ; CHECK-NEXT: ld1 { v5.b }[5], [x11] ; CHECK-NEXT: saddw v1.4s, v4.4s, v1.4h ; CHECK-NEXT: sshll v4.8h, v6.8b, #0 ; CHECK-NEXT: ld1 { v16.b }[5], [x9] ; CHECK-NEXT: ld1 { v17.b }[5], [x8] -; CHECK-NEXT: add x9, sp, #576 +; CHECK-NEXT: add x9, sp, #592 ; CHECK-NEXT: ld1 { v0.b }[7], [x10] -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: add x10, sp, #704 +; CHECK-NEXT: add x8, sp, #528 +; CHECK-NEXT: add x10, sp, #720 ; CHECK-NEXT: sshll v4.4s, v4.4h, #0 ; CHECK-NEXT: ld1 { v16.b }[6], [x9] ; CHECK-NEXT: ld1 { v5.b }[6], [x8] -; CHECK-NEXT: add x9, sp, #584 +; CHECK-NEXT: add x9, sp, #600 ; CHECK-NEXT: mov v4.s[1], wzr ; CHECK-NEXT: ld1 { v17.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #712 -; CHECK-NEXT: add x8, sp, #520 +; CHECK-NEXT: add x10, sp, #728 +; CHECK-NEXT: add x8, sp, #536 ; CHECK-NEXT: ld1 { v16.b }[7], [x9] ; CHECK-NEXT: add v2.4s, v3.4s, v2.4s ; CHECK-NEXT: mov v4.s[2], wzr @@ -1111,6 +1118,7 @@ ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %az = sext <33 x i8> %a to <33 x i32> @@ -1252,390 +1260,393 @@ define i32 @test_sdot_v48i8_double(<48 x i8> %a, <48 x i8> %b, <48 x i8> %c, <48 x i8> %d) { ; CHECK-LABEL: test_sdot_v48i8_double: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [sp, #64] -; CHECK-NEXT: add x8, sp, #72 -; CHECK-NEXT: add x9, sp, #80 -; CHECK-NEXT: ldr b1, [sp, #192] -; CHECK-NEXT: ldr b3, [sp, #128] -; CHECK-NEXT: add x10, sp, #88 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr b0, [sp, #80] +; CHECK-NEXT: add x8, sp, #88 +; CHECK-NEXT: add x9, sp, #96 +; CHECK-NEXT: ldr b1, [sp, #208] +; CHECK-NEXT: ldr b3, [sp, #144] +; CHECK-NEXT: add x10, sp, #104 ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #200 -; CHECK-NEXT: ldr b4, [sp, #256] +; CHECK-NEXT: add x8, sp, #216 +; CHECK-NEXT: ldr b4, [sp, #272] ; CHECK-NEXT: fmov s17, w0 -; CHECK-NEXT: ldr b18, [sp] -; CHECK-NEXT: add x11, sp, #528 +; CHECK-NEXT: ldr b18, [sp, #16] +; CHECK-NEXT: add x11, sp, #544 ; CHECK-NEXT: ld1 { v1.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #264 +; CHECK-NEXT: add x8, sp, #280 ; CHECK-NEXT: ld1 { v0.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #136 -; CHECK-NEXT: ldr b5, [sp, #576] +; CHECK-NEXT: add x9, sp, #152 +; CHECK-NEXT: ldr b5, [sp, #592] ; CHECK-NEXT: ld1 { v4.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #144 +; CHECK-NEXT: add x8, sp, #160 ; CHECK-NEXT: ld1 { v3.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #208 +; CHECK-NEXT: add x9, sp, #224 ; CHECK-NEXT: ld1 { v0.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #96 -; CHECK-NEXT: ldr b2, [sp, #448] +; CHECK-NEXT: add x10, sp, #112 +; CHECK-NEXT: ldr b2, [sp, #464] ; CHECK-NEXT: ld1 { v1.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #8 +; CHECK-NEXT: add x9, sp, #24 ; CHECK-NEXT: ld1 { v3.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #104 +; CHECK-NEXT: add x8, sp, #120 ; CHECK-NEXT: ld1 { v0.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #216 +; CHECK-NEXT: add x10, sp, #232 ; CHECK-NEXT: ld1 { v18.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #152 -; CHECK-NEXT: ldr b21, [sp, #320] +; CHECK-NEXT: add x9, sp, #168 +; CHECK-NEXT: ldr b21, [sp, #336] ; CHECK-NEXT: ld1 { v1.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #272 +; CHECK-NEXT: add x10, sp, #288 ; CHECK-NEXT: ld1 { v0.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #224 +; CHECK-NEXT: add x8, sp, #240 ; CHECK-NEXT: ld1 { v3.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #112 +; CHECK-NEXT: add x9, sp, #128 ; CHECK-NEXT: ld1 { v4.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #160 +; CHECK-NEXT: add x10, sp, #176 ; CHECK-NEXT: ld1 { v1.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #280 +; CHECK-NEXT: add x8, sp, #296 ; CHECK-NEXT: ld1 { v0.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #232 +; CHECK-NEXT: add x9, sp, #248 ; CHECK-NEXT: ld1 { v3.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #120 +; CHECK-NEXT: add x10, sp, #136 ; CHECK-NEXT: ld1 { v4.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #168 +; CHECK-NEXT: add x8, sp, #184 ; CHECK-NEXT: ld1 { v1.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #288 +; CHECK-NEXT: add x9, sp, #304 ; CHECK-NEXT: ld1 { v0.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #16 +; CHECK-NEXT: add x10, sp, #32 ; CHECK-NEXT: ld1 { v3.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #240 +; CHECK-NEXT: add x8, sp, #256 ; CHECK-NEXT: ld1 { v4.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #176 +; CHECK-NEXT: add x9, sp, #192 ; CHECK-NEXT: ld1 { v18.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #296 +; CHECK-NEXT: add x10, sp, #312 ; CHECK-NEXT: ld1 { v1.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: add x8, sp, #40 ; CHECK-NEXT: ld1 { v3.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #248 +; CHECK-NEXT: add x9, sp, #264 ; CHECK-NEXT: ld1 { v4.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #184 +; CHECK-NEXT: add x10, sp, #200 ; CHECK-NEXT: ld1 { v18.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #304 +; CHECK-NEXT: add x8, sp, #320 ; CHECK-NEXT: ld1 { v1.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #32 +; CHECK-NEXT: add x9, sp, #48 ; CHECK-NEXT: ld1 { v3.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #584 +; CHECK-NEXT: add x10, sp, #600 ; CHECK-NEXT: ld1 { v4.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #312 +; CHECK-NEXT: add x8, sp, #328 ; CHECK-NEXT: ld1 { v18.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #456 +; CHECK-NEXT: add x9, sp, #472 ; CHECK-NEXT: ld1 { v5.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #600 +; CHECK-NEXT: add x10, sp, #616 ; CHECK-NEXT: sshll v6.8h, v0.8b, #0 -; CHECK-NEXT: ldr b0, [sp, #384] +; CHECK-NEXT: ldr b0, [sp, #400] ; CHECK-NEXT: ld1 { v4.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #40 +; CHECK-NEXT: add x8, sp, #56 ; CHECK-NEXT: ld1 { v2.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #464 -; CHECK-NEXT: ldr b19, [sp, #512] +; CHECK-NEXT: add x9, sp, #480 +; CHECK-NEXT: ldr b19, [sp, #528] ; CHECK-NEXT: ld1 { v18.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #592 +; CHECK-NEXT: add x8, sp, #608 ; CHECK-NEXT: sshll v16.8h, v1.8b, #0 -; CHECK-NEXT: ldr b1, [sp, #640] +; CHECK-NEXT: ldr b1, [sp, #656] ; CHECK-NEXT: ld1 { v2.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #472 +; CHECK-NEXT: add x9, sp, #488 ; CHECK-NEXT: ld1 { v5.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #48 +; CHECK-NEXT: add x8, sp, #64 ; CHECK-NEXT: mov v17.b[1], w1 ; CHECK-NEXT: sshll v7.8h, v3.8b, #0 ; CHECK-NEXT: ld1 { v18.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #608 +; CHECK-NEXT: add x8, sp, #624 ; CHECK-NEXT: ld1 { v5.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #616 +; CHECK-NEXT: add x10, sp, #632 ; CHECK-NEXT: ld1 { v2.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #480 +; CHECK-NEXT: add x9, sp, #496 ; CHECK-NEXT: mov v17.b[2], w2 ; CHECK-NEXT: sshll v4.8h, v4.8b, #0 ; CHECK-NEXT: ld1 { v5.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #56 +; CHECK-NEXT: add x8, sp, #72 ; CHECK-NEXT: ld1 { v2.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #488 +; CHECK-NEXT: add x9, sp, #504 ; CHECK-NEXT: mov v17.b[3], w3 ; CHECK-NEXT: ld1 { v18.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #624 +; CHECK-NEXT: add x8, sp, #640 ; CHECK-NEXT: ld1 { v5.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #648 +; CHECK-NEXT: add x10, sp, #664 ; CHECK-NEXT: ld1 { v2.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #496 +; CHECK-NEXT: add x9, sp, #512 ; CHECK-NEXT: mov v17.b[4], w4 ; CHECK-NEXT: ld1 { v1.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #656 +; CHECK-NEXT: add x10, sp, #672 ; CHECK-NEXT: ld1 { v5.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #632 +; CHECK-NEXT: add x8, sp, #648 ; CHECK-NEXT: ld1 { v2.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #504 +; CHECK-NEXT: add x9, sp, #520 ; CHECK-NEXT: mov v17.b[5], w5 ; CHECK-NEXT: ld1 { v1.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #664 +; CHECK-NEXT: add x10, sp, #680 ; CHECK-NEXT: ld1 { v5.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #392 +; CHECK-NEXT: add x8, sp, #408 ; CHECK-NEXT: ld1 { v2.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #520 +; CHECK-NEXT: add x9, sp, #536 ; CHECK-NEXT: mov v17.b[6], w6 ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #328 +; CHECK-NEXT: add x8, sp, #344 ; CHECK-NEXT: ld1 { v19.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #400 +; CHECK-NEXT: add x9, sp, #416 ; CHECK-NEXT: ld1 { v1.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #672 +; CHECK-NEXT: add x10, sp, #688 ; CHECK-NEXT: ld1 { v21.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #336 +; CHECK-NEXT: add x8, sp, #352 ; CHECK-NEXT: ld1 { v0.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #408 +; CHECK-NEXT: add x9, sp, #424 ; CHECK-NEXT: ld1 { v19.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #536 +; CHECK-NEXT: add x11, sp, #552 ; CHECK-NEXT: ld1 { v1.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #680 +; CHECK-NEXT: add x10, sp, #696 ; CHECK-NEXT: ld1 { v21.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #344 +; CHECK-NEXT: add x8, sp, #360 ; CHECK-NEXT: ld1 { v0.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #416 +; CHECK-NEXT: add x9, sp, #432 ; CHECK-NEXT: sshll v3.8h, v18.8b, #0 ; CHECK-NEXT: ld1 { v19.b }[3], [x11] ; CHECK-NEXT: ld1 { v1.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #688 +; CHECK-NEXT: add x10, sp, #704 ; CHECK-NEXT: ld1 { v21.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #352 +; CHECK-NEXT: add x8, sp, #368 ; CHECK-NEXT: ld1 { v0.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #424 +; CHECK-NEXT: add x9, sp, #440 ; CHECK-NEXT: mov v17.b[7], w7 -; CHECK-NEXT: add x11, sp, #544 +; CHECK-NEXT: add x11, sp, #560 ; CHECK-NEXT: ld1 { v1.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #696 +; CHECK-NEXT: add x10, sp, #712 ; CHECK-NEXT: ld1 { v21.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #360 +; CHECK-NEXT: add x8, sp, #376 ; CHECK-NEXT: ld1 { v0.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #432 +; CHECK-NEXT: add x9, sp, #448 ; CHECK-NEXT: sshll v18.8h, v2.8b, #0 ; CHECK-NEXT: ld1 { v19.b }[4], [x11] ; CHECK-NEXT: sshll v20.8h, v5.8b, #0 ; CHECK-NEXT: ld1 { v1.b }[7], [x10] ; CHECK-NEXT: ld1 { v21.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #368 +; CHECK-NEXT: add x8, sp, #384 ; CHECK-NEXT: ld1 { v0.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #440 +; CHECK-NEXT: add x9, sp, #456 ; CHECK-NEXT: sshll v17.8h, v17.8b, #0 -; CHECK-NEXT: add x10, sp, #1032 +; CHECK-NEXT: add x10, sp, #1048 ; CHECK-NEXT: sshll v22.8h, v1.8b, #0 -; CHECK-NEXT: add x11, sp, #552 +; CHECK-NEXT: add x11, sp, #568 ; CHECK-NEXT: ld1 { v21.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #376 +; CHECK-NEXT: add x8, sp, #392 ; CHECK-NEXT: ld1 { v0.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #968 +; CHECK-NEXT: add x9, sp, #984 ; CHECK-NEXT: ld1 { v19.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #560 +; CHECK-NEXT: add x11, sp, #576 ; CHECK-NEXT: ld1 { v21.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #840 +; CHECK-NEXT: add x8, sp, #856 ; CHECK-NEXT: sshll v5.8h, v0.8b, #0 ; CHECK-NEXT: ld1 { v19.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #568 +; CHECK-NEXT: add x11, sp, #584 ; CHECK-NEXT: smull2 v0.4s, v3.8h, v5.8h ; CHECK-NEXT: sshll v2.8h, v21.8b, #0 -; CHECK-NEXT: ldr b21, [sp, #832] +; CHECK-NEXT: ldr b21, [sp, #848] ; CHECK-NEXT: smull v3.4s, v3.4h, v5.4h -; CHECK-NEXT: ldr b5, [sp, #960] +; CHECK-NEXT: ldr b5, [sp, #976] ; CHECK-NEXT: smull2 v1.4s, v17.8h, v2.8h ; CHECK-NEXT: ld1 { v19.b }[7], [x11] ; CHECK-NEXT: ld1 { v21.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #848 +; CHECK-NEXT: add x8, sp, #864 ; CHECK-NEXT: smull v2.4s, v17.4h, v2.4h ; CHECK-NEXT: ld1 { v5.b }[1], [x9] -; CHECK-NEXT: ldr b17, [sp, #1024] -; CHECK-NEXT: add x9, sp, #904 +; CHECK-NEXT: ldr b17, [sp, #1040] +; CHECK-NEXT: add x9, sp, #920 ; CHECK-NEXT: smlal2 v0.4s, v4.8h, v22.8h -; CHECK-NEXT: add x11, sp, #856 +; CHECK-NEXT: add x11, sp, #872 ; CHECK-NEXT: ld1 { v21.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #976 +; CHECK-NEXT: add x8, sp, #992 ; CHECK-NEXT: smlal v3.4s, v4.4h, v22.4h -; CHECK-NEXT: ldr b4, [sp, #896] +; CHECK-NEXT: ldr b4, [sp, #912] ; CHECK-NEXT: ld1 { v17.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #984 +; CHECK-NEXT: add x10, sp, #1000 ; CHECK-NEXT: ld1 { v5.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #1040 +; CHECK-NEXT: add x8, sp, #1056 ; CHECK-NEXT: ld1 { v4.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #912 +; CHECK-NEXT: add x9, sp, #928 ; CHECK-NEXT: ld1 { v21.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #864 +; CHECK-NEXT: add x11, sp, #880 ; CHECK-NEXT: ld1 { v17.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #1048 +; CHECK-NEXT: add x8, sp, #1064 ; CHECK-NEXT: ld1 { v5.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #992 +; CHECK-NEXT: add x10, sp, #1008 ; CHECK-NEXT: ld1 { v4.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #920 +; CHECK-NEXT: add x9, sp, #936 ; CHECK-NEXT: ld1 { v21.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #872 +; CHECK-NEXT: add x11, sp, #888 ; CHECK-NEXT: ld1 { v17.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #1056 +; CHECK-NEXT: add x8, sp, #1072 ; CHECK-NEXT: ld1 { v5.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #1000 +; CHECK-NEXT: add x10, sp, #1016 ; CHECK-NEXT: ld1 { v4.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #928 +; CHECK-NEXT: add x9, sp, #944 ; CHECK-NEXT: ld1 { v21.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #880 +; CHECK-NEXT: add x11, sp, #896 ; CHECK-NEXT: ld1 { v17.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #1064 +; CHECK-NEXT: add x8, sp, #1080 ; CHECK-NEXT: ld1 { v5.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #1008 +; CHECK-NEXT: add x10, sp, #1024 ; CHECK-NEXT: ld1 { v4.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #936 +; CHECK-NEXT: add x9, sp, #952 ; CHECK-NEXT: sshll v19.8h, v19.8b, #0 ; CHECK-NEXT: ld1 { v21.b }[6], [x11] ; CHECK-NEXT: ld1 { v17.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #1072 +; CHECK-NEXT: add x8, sp, #1088 ; CHECK-NEXT: ld1 { v5.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #1016 +; CHECK-NEXT: add x10, sp, #1032 ; CHECK-NEXT: ld1 { v4.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #944 +; CHECK-NEXT: add x9, sp, #960 ; CHECK-NEXT: smlal2 v1.4s, v16.8h, v20.8h -; CHECK-NEXT: add x11, sp, #888 +; CHECK-NEXT: add x11, sp, #904 ; CHECK-NEXT: ld1 { v17.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #1080 +; CHECK-NEXT: add x8, sp, #1096 ; CHECK-NEXT: smlal2 v0.4s, v7.8h, v19.8h ; CHECK-NEXT: ld1 { v5.b }[7], [x10] ; CHECK-NEXT: ld1 { v4.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #952 +; CHECK-NEXT: add x9, sp, #968 ; CHECK-NEXT: smlal v2.4s, v16.4h, v20.4h -; CHECK-NEXT: ldr b16, [sp, #768] -; CHECK-NEXT: add x10, sp, #776 +; CHECK-NEXT: ldr b16, [sp, #784] +; CHECK-NEXT: add x10, sp, #792 ; CHECK-NEXT: ld1 { v17.b }[7], [x8] ; CHECK-NEXT: smlal v3.4s, v7.4h, v19.4h -; CHECK-NEXT: ldr b19, [sp, #1152] -; CHECK-NEXT: add x8, sp, #1160 +; CHECK-NEXT: ldr b19, [sp, #1168] +; CHECK-NEXT: add x8, sp, #1176 ; CHECK-NEXT: ld1 { v4.b }[7], [x9] -; CHECK-NEXT: ldr b7, [sp, #704] -; CHECK-NEXT: add x9, sp, #712 +; CHECK-NEXT: ldr b7, [sp, #720] +; CHECK-NEXT: add x9, sp, #728 ; CHECK-NEXT: ld1 { v16.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #784 +; CHECK-NEXT: add x10, sp, #800 ; CHECK-NEXT: ld1 { v19.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #1168 +; CHECK-NEXT: add x8, sp, #1184 ; CHECK-NEXT: smlal2 v1.4s, v6.8h, v18.8h ; CHECK-NEXT: ld1 { v21.b }[7], [x11] ; CHECK-NEXT: smlal v2.4s, v6.4h, v18.4h -; CHECK-NEXT: ldr b18, [sp, #1408] -; CHECK-NEXT: add x11, sp, #1416 +; CHECK-NEXT: ldr b18, [sp, #1424] +; CHECK-NEXT: add x11, sp, #1432 ; CHECK-NEXT: ld1 { v7.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #720 +; CHECK-NEXT: add x9, sp, #736 ; CHECK-NEXT: ld1 { v16.b }[2], [x10] ; CHECK-NEXT: ld1 { v19.b }[2], [x8] -; CHECK-NEXT: add x10, sp, #792 +; CHECK-NEXT: add x10, sp, #808 ; CHECK-NEXT: ld1 { v18.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #1424 -; CHECK-NEXT: add x8, sp, #1176 +; CHECK-NEXT: add x11, sp, #1440 +; CHECK-NEXT: add x8, sp, #1192 ; CHECK-NEXT: ld1 { v7.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #728 +; CHECK-NEXT: add x9, sp, #744 ; CHECK-NEXT: ld1 { v16.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #800 +; CHECK-NEXT: add x10, sp, #816 ; CHECK-NEXT: ld1 { v18.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #1432 +; CHECK-NEXT: add x11, sp, #1448 ; CHECK-NEXT: ld1 { v19.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #1184 +; CHECK-NEXT: add x8, sp, #1200 ; CHECK-NEXT: ld1 { v7.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #736 +; CHECK-NEXT: add x9, sp, #752 ; CHECK-NEXT: ld1 { v16.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #808 +; CHECK-NEXT: add x10, sp, #824 ; CHECK-NEXT: ld1 { v18.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #1440 +; CHECK-NEXT: add x11, sp, #1456 ; CHECK-NEXT: ld1 { v19.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #1192 +; CHECK-NEXT: add x8, sp, #1208 ; CHECK-NEXT: ld1 { v7.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #744 +; CHECK-NEXT: add x9, sp, #760 ; CHECK-NEXT: ld1 { v16.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #816 +; CHECK-NEXT: add x10, sp, #832 ; CHECK-NEXT: ld1 { v18.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #1448 +; CHECK-NEXT: add x11, sp, #1464 ; CHECK-NEXT: ld1 { v19.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #1200 +; CHECK-NEXT: add x8, sp, #1216 ; CHECK-NEXT: ld1 { v7.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #752 +; CHECK-NEXT: add x9, sp, #768 ; CHECK-NEXT: ld1 { v16.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #824 +; CHECK-NEXT: add x10, sp, #840 ; CHECK-NEXT: ld1 { v18.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #1456 +; CHECK-NEXT: add x11, sp, #1472 ; CHECK-NEXT: ld1 { v19.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #1208 +; CHECK-NEXT: add x8, sp, #1224 ; CHECK-NEXT: ld1 { v7.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #760 +; CHECK-NEXT: add x9, sp, #776 ; CHECK-NEXT: ld1 { v16.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #1464 +; CHECK-NEXT: add x10, sp, #1480 ; CHECK-NEXT: ld1 { v18.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #1288 +; CHECK-NEXT: add x11, sp, #1304 ; CHECK-NEXT: ld1 { v19.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #1352 +; CHECK-NEXT: add x8, sp, #1368 ; CHECK-NEXT: sshll v20.8h, v17.8b, #0 ; CHECK-NEXT: ld1 { v7.b }[7], [x9] -; CHECK-NEXT: ldr b17, [sp, #1088] -; CHECK-NEXT: add x9, sp, #1096 +; CHECK-NEXT: ldr b17, [sp, #1104] +; CHECK-NEXT: add x9, sp, #1112 ; CHECK-NEXT: sshll v6.8h, v21.8b, #0 ; CHECK-NEXT: ld1 { v18.b }[7], [x10] ; CHECK-NEXT: sshll v21.8h, v16.8b, #0 -; CHECK-NEXT: ldr b16, [sp, #1344] +; CHECK-NEXT: ldr b16, [sp, #1360] ; CHECK-NEXT: sshll v19.8h, v19.8b, #0 ; CHECK-NEXT: ld1 { v17.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #1104 -; CHECK-NEXT: add x10, sp, #1224 +; CHECK-NEXT: add x9, sp, #1120 +; CHECK-NEXT: add x10, sp, #1240 ; CHECK-NEXT: smull v22.4s, v21.4h, v19.4h ; CHECK-NEXT: ld1 { v16.b }[1], [x8] ; CHECK-NEXT: smull2 v19.4s, v21.8h, v19.8h -; CHECK-NEXT: ldr b21, [sp, #1216] +; CHECK-NEXT: ldr b21, [sp, #1232] ; CHECK-NEXT: sshll v18.8h, v18.8b, #0 ; CHECK-NEXT: ld1 { v17.b }[2], [x9] -; CHECK-NEXT: add x8, sp, #1360 -; CHECK-NEXT: add x9, sp, #1112 +; CHECK-NEXT: add x8, sp, #1376 +; CHECK-NEXT: add x9, sp, #1128 ; CHECK-NEXT: smlal v22.4s, v20.4h, v18.4h ; CHECK-NEXT: ld1 { v21.b }[1], [x10] ; CHECK-NEXT: smlal2 v19.4s, v20.8h, v18.8h -; CHECK-NEXT: ldr b18, [sp, #1280] +; CHECK-NEXT: ldr b18, [sp, #1296] ; CHECK-NEXT: ld1 { v16.b }[2], [x8] -; CHECK-NEXT: add x10, sp, #1232 +; CHECK-NEXT: add x10, sp, #1248 ; CHECK-NEXT: ld1 { v17.b }[3], [x9] -; CHECK-NEXT: add x8, sp, #1368 +; CHECK-NEXT: add x8, sp, #1384 ; CHECK-NEXT: ld1 { v18.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #1296 -; CHECK-NEXT: add x9, sp, #1120 +; CHECK-NEXT: add x11, sp, #1312 +; CHECK-NEXT: add x9, sp, #1136 ; CHECK-NEXT: ld1 { v21.b }[2], [x10] ; CHECK-NEXT: ld1 { v16.b }[3], [x8] -; CHECK-NEXT: add x10, sp, #1240 -; CHECK-NEXT: add x8, sp, #1376 +; CHECK-NEXT: add x10, sp, #1256 +; CHECK-NEXT: add x8, sp, #1392 ; CHECK-NEXT: ld1 { v18.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #1304 +; CHECK-NEXT: add x11, sp, #1320 ; CHECK-NEXT: ld1 { v17.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #1128 +; CHECK-NEXT: add x9, sp, #1144 ; CHECK-NEXT: ld1 { v21.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #1248 +; CHECK-NEXT: add x10, sp, #1264 ; CHECK-NEXT: ld1 { v16.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #1384 +; CHECK-NEXT: add x8, sp, #1400 ; CHECK-NEXT: ld1 { v18.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #1312 +; CHECK-NEXT: add x11, sp, #1328 ; CHECK-NEXT: ld1 { v17.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #1136 +; CHECK-NEXT: add x9, sp, #1152 ; CHECK-NEXT: ld1 { v21.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #1256 +; CHECK-NEXT: add x10, sp, #1272 ; CHECK-NEXT: ld1 { v16.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #1392 +; CHECK-NEXT: add x8, sp, #1408 ; CHECK-NEXT: ld1 { v18.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #1320 +; CHECK-NEXT: add x11, sp, #1336 ; CHECK-NEXT: ld1 { v17.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #1144 +; CHECK-NEXT: add x9, sp, #1160 ; CHECK-NEXT: ld1 { v21.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #1264 +; CHECK-NEXT: add x10, sp, #1280 ; CHECK-NEXT: ld1 { v16.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #1400 +; CHECK-NEXT: add x8, sp, #1416 ; CHECK-NEXT: ld1 { v18.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #1328 +; CHECK-NEXT: add x11, sp, #1344 ; CHECK-NEXT: ld1 { v17.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #1272 +; CHECK-NEXT: add x9, sp, #1288 ; CHECK-NEXT: sshll v7.8h, v7.8b, #0 ; CHECK-NEXT: ld1 { v21.b }[6], [x10] ; CHECK-NEXT: ld1 { v16.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #1336 +; CHECK-NEXT: add x8, sp, #1352 ; CHECK-NEXT: ld1 { v18.b }[6], [x11] ; CHECK-NEXT: sshll v17.8h, v17.8b, #0 ; CHECK-NEXT: sshll v5.8h, v5.8b, #0 @@ -1662,6 +1673,7 @@ ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %az = sext <48 x i8> %a to <48 x i32> @@ -1679,188 +1691,191 @@ define i32 @test_sdot_v48i8_double_nomla(<48 x i8> %a, <48 x i8> %b, <48 x i8> %c, <48 x i8> %d) { ; CHECK-LABEL: test_sdot_v48i8_double_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [sp, #64] -; CHECK-NEXT: add x8, sp, #72 -; CHECK-NEXT: ldr b1, [sp, #192] -; CHECK-NEXT: add x9, sp, #80 -; CHECK-NEXT: ldr b4, [sp, #128] -; CHECK-NEXT: add x10, sp, #136 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr b0, [sp, #80] +; CHECK-NEXT: add x8, sp, #88 +; CHECK-NEXT: ldr b1, [sp, #208] +; CHECK-NEXT: add x9, sp, #96 +; CHECK-NEXT: ldr b4, [sp, #144] +; CHECK-NEXT: add x10, sp, #152 ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #200 -; CHECK-NEXT: add x11, sp, #152 -; CHECK-NEXT: ldr b6, [sp, #256] +; CHECK-NEXT: add x8, sp, #216 +; CHECK-NEXT: add x11, sp, #168 +; CHECK-NEXT: ldr b6, [sp, #272] ; CHECK-NEXT: ld1 { v4.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #144 +; CHECK-NEXT: add x10, sp, #160 ; CHECK-NEXT: ld1 { v1.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #88 +; CHECK-NEXT: add x8, sp, #104 ; CHECK-NEXT: ld1 { v0.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #208 -; CHECK-NEXT: ldr b7, [sp] +; CHECK-NEXT: add x9, sp, #224 +; CHECK-NEXT: ldr b7, [sp, #16] ; CHECK-NEXT: fmov s5, w0 ; CHECK-NEXT: ld1 { v4.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #112 +; CHECK-NEXT: add x10, sp, #128 ; CHECK-NEXT: ld1 { v1.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #96 +; CHECK-NEXT: add x9, sp, #112 ; CHECK-NEXT: ld1 { v0.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #216 +; CHECK-NEXT: add x8, sp, #232 ; CHECK-NEXT: mov v5.b[1], w1 -; CHECK-NEXT: ldr b3, [sp, #960] +; CHECK-NEXT: ldr b3, [sp, #976] ; CHECK-NEXT: ld1 { v4.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #264 +; CHECK-NEXT: add x11, sp, #280 ; CHECK-NEXT: ld1 { v1.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #104 +; CHECK-NEXT: add x8, sp, #120 ; CHECK-NEXT: ld1 { v0.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #224 +; CHECK-NEXT: add x9, sp, #240 ; CHECK-NEXT: ld1 { v6.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #304 +; CHECK-NEXT: add x11, sp, #320 ; CHECK-NEXT: mov v5.b[2], w2 -; CHECK-NEXT: ldr b19, [sp, #768] +; CHECK-NEXT: ldr b19, [sp, #784] ; CHECK-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #232 +; CHECK-NEXT: add x9, sp, #248 ; CHECK-NEXT: ld1 { v0.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #160 +; CHECK-NEXT: add x8, sp, #176 ; CHECK-NEXT: mov v5.b[3], w3 ; CHECK-NEXT: ld1 { v1.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #120 +; CHECK-NEXT: add x9, sp, #136 ; CHECK-NEXT: ld1 { v0.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #240 +; CHECK-NEXT: add x10, sp, #256 ; CHECK-NEXT: ld1 { v4.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #272 +; CHECK-NEXT: add x8, sp, #288 ; CHECK-NEXT: mov v5.b[4], w4 ; CHECK-NEXT: ld1 { v1.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #8 +; CHECK-NEXT: add x10, sp, #24 ; CHECK-NEXT: ld1 { v0.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #248 +; CHECK-NEXT: add x9, sp, #264 ; CHECK-NEXT: ld1 { v6.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: add x8, sp, #32 ; CHECK-NEXT: ld1 { v7.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #168 +; CHECK-NEXT: add x10, sp, #184 ; CHECK-NEXT: ld1 { v1.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #280 +; CHECK-NEXT: add x9, sp, #296 ; CHECK-NEXT: mov v5.b[5], w5 ; CHECK-NEXT: ld1 { v4.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #176 +; CHECK-NEXT: add x10, sp, #192 ; CHECK-NEXT: ld1 { v6.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #288 +; CHECK-NEXT: add x9, sp, #304 ; CHECK-NEXT: ld1 { v7.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: add x8, sp, #40 ; CHECK-NEXT: sshll v2.8h, v1.8b, #0 -; CHECK-NEXT: ldr b1, [sp, #832] +; CHECK-NEXT: ldr b1, [sp, #848] ; CHECK-NEXT: ld1 { v4.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #296 +; CHECK-NEXT: add x10, sp, #312 ; CHECK-NEXT: ld1 { v6.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #840 +; CHECK-NEXT: add x9, sp, #856 ; CHECK-NEXT: ld1 { v7.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #32 +; CHECK-NEXT: add x8, sp, #48 ; CHECK-NEXT: mov v5.b[6], w6 ; CHECK-NEXT: ld1 { v1.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #848 +; CHECK-NEXT: add x9, sp, #864 ; CHECK-NEXT: ld1 { v6.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #968 +; CHECK-NEXT: add x10, sp, #984 ; CHECK-NEXT: ld1 { v7.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #40 +; CHECK-NEXT: add x8, sp, #56 ; CHECK-NEXT: mov v5.b[7], w7 ; CHECK-NEXT: ld1 { v3.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #976 +; CHECK-NEXT: add x10, sp, #992 ; CHECK-NEXT: ld1 { v1.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #856 +; CHECK-NEXT: add x9, sp, #872 ; CHECK-NEXT: ld1 { v7.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #48 +; CHECK-NEXT: add x8, sp, #64 ; CHECK-NEXT: ld1 { v6.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #312 +; CHECK-NEXT: add x11, sp, #328 ; CHECK-NEXT: ld1 { v3.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #984 +; CHECK-NEXT: add x10, sp, #1000 ; CHECK-NEXT: ld1 { v1.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #864 +; CHECK-NEXT: add x9, sp, #880 ; CHECK-NEXT: ld1 { v7.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #56 +; CHECK-NEXT: add x8, sp, #72 ; CHECK-NEXT: ld1 { v6.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #184 +; CHECK-NEXT: add x11, sp, #200 ; CHECK-NEXT: ld1 { v3.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #712 +; CHECK-NEXT: add x10, sp, #728 ; CHECK-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #992 +; CHECK-NEXT: add x9, sp, #1008 ; CHECK-NEXT: ld1 { v7.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #872 +; CHECK-NEXT: add x8, sp, #888 ; CHECK-NEXT: sshll v16.8h, v6.8b, #0 -; CHECK-NEXT: ldr b6, [sp, #704] +; CHECK-NEXT: ldr b6, [sp, #720] ; CHECK-NEXT: ld1 { v3.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #1000 +; CHECK-NEXT: add x9, sp, #1016 ; CHECK-NEXT: ld1 { v1.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #880 +; CHECK-NEXT: add x8, sp, #896 ; CHECK-NEXT: sshll v18.8h, v7.8b, #0 ; CHECK-NEXT: ld1 { v6.b }[1], [x10] ; CHECK-NEXT: sshll v17.8h, v5.8b, #0 -; CHECK-NEXT: add x10, sp, #776 +; CHECK-NEXT: add x10, sp, #792 ; CHECK-NEXT: ld1 { v3.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #1008 +; CHECK-NEXT: add x9, sp, #1024 ; CHECK-NEXT: ld1 { v1.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #888 +; CHECK-NEXT: add x8, sp, #904 ; CHECK-NEXT: saddl2 v5.4s, v18.8h, v16.8h ; CHECK-NEXT: ld1 { v19.b }[1], [x10] ; CHECK-NEXT: saddl v16.4s, v18.4h, v16.4h -; CHECK-NEXT: ldr b18, [sp, #1024] +; CHECK-NEXT: ldr b18, [sp, #1040] ; CHECK-NEXT: ld1 { v3.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #1032 +; CHECK-NEXT: add x9, sp, #1048 ; CHECK-NEXT: ld1 { v1.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #1016 +; CHECK-NEXT: add x8, sp, #1032 ; CHECK-NEXT: saddl2 v7.4s, v17.8h, v2.8h -; CHECK-NEXT: add x10, sp, #904 +; CHECK-NEXT: add x10, sp, #920 ; CHECK-NEXT: saddl v2.4s, v17.4h, v2.4h ; CHECK-NEXT: ld1 { v18.b }[1], [x9] ; CHECK-NEXT: ld1 { v3.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #720 -; CHECK-NEXT: ldr b17, [sp, #896] -; CHECK-NEXT: add x9, sp, #1040 +; CHECK-NEXT: add x8, sp, #736 +; CHECK-NEXT: ldr b17, [sp, #912] +; CHECK-NEXT: add x9, sp, #1056 ; CHECK-NEXT: ld1 { v4.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #728 +; CHECK-NEXT: add x11, sp, #744 ; CHECK-NEXT: ld1 { v6.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #784 +; CHECK-NEXT: add x8, sp, #800 ; CHECK-NEXT: ld1 { v17.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #912 +; CHECK-NEXT: add x10, sp, #928 ; CHECK-NEXT: ld1 { v18.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #1048 +; CHECK-NEXT: add x9, sp, #1064 ; CHECK-NEXT: ld1 { v19.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #792 +; CHECK-NEXT: add x8, sp, #808 ; CHECK-NEXT: ld1 { v6.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #736 +; CHECK-NEXT: add x11, sp, #752 ; CHECK-NEXT: ld1 { v17.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #920 +; CHECK-NEXT: add x10, sp, #936 ; CHECK-NEXT: ld1 { v18.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #1056 +; CHECK-NEXT: add x9, sp, #1072 ; CHECK-NEXT: ld1 { v19.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #800 +; CHECK-NEXT: add x8, sp, #816 ; CHECK-NEXT: ld1 { v6.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #744 +; CHECK-NEXT: add x11, sp, #760 ; CHECK-NEXT: ld1 { v17.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #928 +; CHECK-NEXT: add x10, sp, #944 ; CHECK-NEXT: ld1 { v18.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #1064 +; CHECK-NEXT: add x9, sp, #1080 ; CHECK-NEXT: ld1 { v19.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #808 +; CHECK-NEXT: add x8, sp, #824 ; CHECK-NEXT: ld1 { v6.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #752 +; CHECK-NEXT: add x11, sp, #768 ; CHECK-NEXT: ld1 { v17.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #936 +; CHECK-NEXT: add x10, sp, #952 ; CHECK-NEXT: ld1 { v18.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #1072 +; CHECK-NEXT: add x9, sp, #1088 ; CHECK-NEXT: ld1 { v19.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #816 +; CHECK-NEXT: add x8, sp, #832 ; CHECK-NEXT: ld1 { v6.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #760 +; CHECK-NEXT: add x11, sp, #776 ; CHECK-NEXT: ld1 { v17.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #944 +; CHECK-NEXT: add x10, sp, #960 ; CHECK-NEXT: ld1 { v18.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #1080 +; CHECK-NEXT: add x9, sp, #1096 ; CHECK-NEXT: ld1 { v19.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #824 +; CHECK-NEXT: add x8, sp, #840 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-NEXT: ld1 { v6.b }[7], [x11] ; CHECK-NEXT: sshll v4.8h, v4.8b, #0 ; CHECK-NEXT: ld1 { v17.b }[6], [x10] ; CHECK-NEXT: ld1 { v18.b }[7], [x9] ; CHECK-NEXT: ld1 { v19.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #952 +; CHECK-NEXT: add x8, sp, #968 ; CHECK-NEXT: saddw2 v5.4s, v5.4s, v4.8h ; CHECK-NEXT: saddw2 v7.4s, v7.4s, v0.8h ; CHECK-NEXT: ld1 { v17.b }[7], [x8] @@ -1889,6 +1904,7 @@ ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %az = sext <48 x i8> %a to <48 x i32> @@ -2231,4 +2247,4 @@ %r2 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> %cz) %x = add i32 %r1, %r2 ret i32 %x -} \ No newline at end of file +} Index: llvm/test/CodeGen/AArch64/scavenge-large-call.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/scavenge-large-call.ll @@ -0,0 +1,11 @@ +; RUN: llc -mtriple=arm64_32-apple-watchos %s -o - | FileCheck %s + +; CHECK-LABEL: caller: +; CHECK: add {{x[0-9]+}}, sp, + +define void @caller(ptr %0, i16 %1, i16 %2, i8 %3, double %4, i16 %5, i8 %6, ptr %7, double %8, i32 %9, ptr %10, double %11, double %12, [2 x i64] %13, [2 x i64] %14, [2 x i64] %15, double %16, double %17, [2 x i64] %18, [2 x i64] %19, i16 %20, i32 %21, double %22, i8 %23, [2 x i64] %24, [2 x i64] %25, [2 x i64] %26, i8 %27, i16 %28, i16 %29, i16 %30, i32 %31, [2 x i64] %32, [2 x i64] %33, [2 x i64] %34, [2 x i64] %35, [2 x i64] %36, i32 %37, i32 %38) { + tail call void @callee(ptr %0, i16 %1, i16 %2, i8 %3, double 0.000000e+00, i16 %5, i8 %6, ptr %7, double 0.000000e+00, i32 %9, ptr %10, double 0.000000e+00, double 0.000000e+00, [2 x i64] %13, [2 x i64] %14, [2 x i64] %15, double 0.000000e+00, double 0.000000e+00, [2 x i64] %18, [2 x i64] %19, i16 %20, i32 %21, double 0.000000e+00, i8 %23, [2 x i64] %24, [2 x i64] %25, [2 x i64] zeroinitializer, i8 %27, i16 0, i16 0, i16 %28, i32 0, [2 x i64] zeroinitializer, [2 x i64] zeroinitializer, [2 x i64] zeroinitializer, [2 x i64] %35, [2 x i64] %36, i32 0, i32 0) + ret void +} + +declare void @callee(ptr, i16, i16, i8, double, i16, i8, ptr, double, i32, ptr, double, double, [2 x i64], [2 x i64], [2 x i64], double, double, [2 x i64], [2 x i64], i16, i32, double, i8, [2 x i64], [2 x i64], [2 x i64], i8, i16, i16, i16, i32, [2 x i64], [2 x i64], [2 x i64], [2 x i64], [2 x i64], i32, i32) Index: llvm/test/CodeGen/AArch64/stackmap.ll =================================================================== --- llvm/test/CodeGen/AArch64/stackmap.ll +++ llvm/test/CodeGen/AArch64/stackmap.ll @@ -38,7 +38,7 @@ ; CHECK-NEXT: .xword 16 ; CHECK-NEXT: .xword 1 ; CHECK-NEXT: .xword spilledValue -; CHECK-NEXT: .xword 144 +; CHECK-NEXT: .xword 160 ; CHECK-NEXT: .xword 1 ; CHECK-NEXT: .xword spilledStackMapValue ; CHECK-NEXT: .xword 128 Index: llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll @@ -11,7 +11,7 @@ define dso_local void @func1(ptr %v1, ptr %v2, ptr %v3, ptr %v4, ptr %v5, ptr %v6, ptr %v7, ptr %v8, ; CHECK-LABEL: func1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x25, [sp, #-64]! // 8-byte Folded Spill +; CHECK-NEXT: stp x29, x25, [sp, #-64]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill @@ -22,7 +22,8 @@ ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w23, -40 ; CHECK-NEXT: .cfi_offset w24, -48 -; CHECK-NEXT: .cfi_offset w25, -64 +; CHECK-NEXT: .cfi_offset w25, -56 +; CHECK-NEXT: .cfi_offset w29, -64 ; CHECK-NEXT: add x8, sp, #64 ; CHECK-NEXT: add x9, sp, #128 ; CHECK-NEXT: add x10, sp, #160 @@ -61,7 +62,7 @@ ; CHECK-NEXT: stp x12, x11, [sp, #320] ; CHECK-NEXT: stp x10, x9, [sp, #336] ; CHECK-NEXT: str x8, [sp, #352] -; CHECK-NEXT: ldr x25, [sp], #64 // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x25, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: b func2 ptr %v9, ptr %v10, ptr %v11, ptr %v12, ptr %v13, ptr %v14, ptr %v15, ptr %v16, ptr %v17, ptr %v18, ptr %v19, ptr %v20, ptr %v21, ptr %v22, ptr %v23, ptr %v24,