diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll @@ -0,0 +1,582 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=ve | FileCheck %s +; RUN: llc < %s -mtriple=ve --frame-pointer=all \ +; RUN: | FileCheck %s --check-prefix=CHECKFP + +;;; Check stack frame allocation with static and dynamic stack object with +;;; alignments as a test of getFrameIndexReference(). + +;; Allocated buffer places from 9 to 15 bytes in 16 bytes local vars area. + +; Function Attrs: nounwind +define i8* @test_frame7(i8* %0) { +; CHECK-LABEL: test_frame7: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -192 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: ld1b.zx %s1, (, %s0) +; CHECK-NEXT: lea %s0, 185(, %s11) +; CHECK-NEXT: st1b %s1, 185(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) +; +; CHECKFP-LABEL: test_frame7: +; CHECKFP: # %bb.0: +; CHECKFP-NEXT: st %s9, (, %s11) +; CHECKFP-NEXT: st %s10, 8(, %s11) +; CHECKFP-NEXT: st %s15, 24(, %s11) +; CHECKFP-NEXT: st %s16, 32(, %s11) +; CHECKFP-NEXT: or %s9, 0, %s11 +; CHECKFP-NEXT: lea %s13, -192 +; CHECKFP-NEXT: and %s13, %s13, (32)0 +; CHECKFP-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECKFP-NEXT: brge.l.t %s11, %s8, .LBB0_2 +; CHECKFP-NEXT: # %bb.1: +; CHECKFP-NEXT: ld %s61, 24(, %s14) +; CHECKFP-NEXT: or %s62, 0, %s0 +; CHECKFP-NEXT: lea %s63, 315 +; CHECKFP-NEXT: shm.l %s63, (%s61) +; CHECKFP-NEXT: shm.l %s8, 8(%s61) +; CHECKFP-NEXT: shm.l %s11, 16(%s61) +; CHECKFP-NEXT: monc +; CHECKFP-NEXT: or %s0, 0, %s62 +; CHECKFP-NEXT: .LBB0_2: +; CHECKFP-NEXT: ld1b.zx %s1, (, %s0) +; CHECKFP-NEXT: lea %s0, -7(, %s9) +; CHECKFP-NEXT: st1b %s1, -7(, %s9) +; CHECKFP-NEXT: or %s11, 0, %s9 +; CHECKFP-NEXT: ld %s16, 32(, %s11) +; CHECKFP-NEXT: ld %s15, 24(, %s11) +; CHECKFP-NEXT: ld %s10, 8(, %s11) +; CHECKFP-NEXT: ld %s9, (, %s11) +; CHECKFP-NEXT: b.l.t (, %s10) + %2 = alloca [7 x i8], align 1 + %3 = getelementptr inbounds [7 x i8], [7 x i8]* %2, i64 0, i64 0 + %4 = load i8, i8* %0, align 1 + store i8 %4, i8* %3, align 1 + ret i8* %3 +} + +;; Allocated buffer is aligned by 8, so it places from 8 to 14 bytes in 16 +;; bytes local vars area. + +; Function Attrs: nounwind +define i8* @test_frame7_align8(i8* %0) { +; CHECK-LABEL: test_frame7_align8: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -192 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: ld1b.zx %s1, (, %s0) +; CHECK-NEXT: lea %s0, 184(, %s11) +; CHECK-NEXT: st1b %s1, 184(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) +; +; CHECKFP-LABEL: test_frame7_align8: +; CHECKFP: # %bb.0: +; CHECKFP-NEXT: st %s9, (, %s11) +; CHECKFP-NEXT: st %s10, 8(, %s11) +; CHECKFP-NEXT: st %s15, 24(, %s11) +; CHECKFP-NEXT: st %s16, 32(, %s11) +; CHECKFP-NEXT: or %s9, 0, %s11 +; CHECKFP-NEXT: lea %s13, -192 +; CHECKFP-NEXT: and %s13, %s13, (32)0 +; CHECKFP-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECKFP-NEXT: brge.l.t %s11, %s8, .LBB1_2 +; CHECKFP-NEXT: # %bb.1: +; CHECKFP-NEXT: ld %s61, 24(, %s14) +; CHECKFP-NEXT: or %s62, 0, %s0 +; CHECKFP-NEXT: lea %s63, 315 +; CHECKFP-NEXT: shm.l %s63, (%s61) +; CHECKFP-NEXT: shm.l %s8, 8(%s61) +; CHECKFP-NEXT: shm.l %s11, 16(%s61) +; CHECKFP-NEXT: monc +; CHECKFP-NEXT: or %s0, 0, %s62 +; CHECKFP-NEXT: .LBB1_2: +; CHECKFP-NEXT: ld1b.zx %s1, (, %s0) +; CHECKFP-NEXT: lea %s0, -8(, %s9) +; CHECKFP-NEXT: st1b %s1, -8(, %s9) +; CHECKFP-NEXT: or %s11, 0, %s9 +; CHECKFP-NEXT: ld %s16, 32(, %s11) +; CHECKFP-NEXT: ld %s15, 24(, %s11) +; CHECKFP-NEXT: ld %s10, 8(, %s11) +; CHECKFP-NEXT: ld %s9, (, %s11) +; CHECKFP-NEXT: b.l.t (, %s10) + %2 = alloca [7 x i8], align 8 + %3 = getelementptr inbounds [7 x i8], [7 x i8]* %2, i64 0, i64 0 + %4 = load i8, i8* %0, align 1 + store i8 %4, i8* %3, align 1 + ret i8* %3 +} + +;; Allocated buffer is aligned by 16, so it places from 0 to 15 bytes in 16 +;; bytes local vars area. + +; Function Attrs: nounwind +define i8* @test_frame16_align16(i8* %0) { +; CHECK-LABEL: test_frame16_align16: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -192 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: ld1b.zx %s1, (, %s0) +; CHECK-NEXT: lea %s0, 176(, %s11) +; CHECK-NEXT: st1b %s1, 176(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) +; +; CHECKFP-LABEL: test_frame16_align16: +; CHECKFP: # %bb.0: +; CHECKFP-NEXT: st %s9, (, %s11) +; CHECKFP-NEXT: st %s10, 8(, %s11) +; CHECKFP-NEXT: st %s15, 24(, %s11) +; CHECKFP-NEXT: st %s16, 32(, %s11) +; CHECKFP-NEXT: or %s9, 0, %s11 +; CHECKFP-NEXT: lea %s13, -192 +; CHECKFP-NEXT: and %s13, %s13, (32)0 +; CHECKFP-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECKFP-NEXT: brge.l.t %s11, %s8, .LBB2_2 +; CHECKFP-NEXT: # %bb.1: +; CHECKFP-NEXT: ld %s61, 24(, %s14) +; CHECKFP-NEXT: or %s62, 0, %s0 +; CHECKFP-NEXT: lea %s63, 315 +; CHECKFP-NEXT: shm.l %s63, (%s61) +; CHECKFP-NEXT: shm.l %s8, 8(%s61) +; CHECKFP-NEXT: shm.l %s11, 16(%s61) +; CHECKFP-NEXT: monc +; CHECKFP-NEXT: or %s0, 0, %s62 +; CHECKFP-NEXT: .LBB2_2: +; CHECKFP-NEXT: ld1b.zx %s1, (, %s0) +; CHECKFP-NEXT: lea %s0, -16(, %s9) +; CHECKFP-NEXT: st1b %s1, -16(, %s9) +; CHECKFP-NEXT: or %s11, 0, %s9 +; CHECKFP-NEXT: ld %s16, 32(, %s11) +; CHECKFP-NEXT: ld %s15, 24(, %s11) +; CHECKFP-NEXT: ld %s10, 8(, %s11) +; CHECKFP-NEXT: ld %s9, (, %s11) +; CHECKFP-NEXT: b.l.t (, %s10) + %2 = alloca [16 x i8], align 16 + %3 = getelementptr inbounds [16 x i8], [16 x i8]* %2, i64 0, i64 0 + %4 = load i8, i8* %0, align 1 + store i8 %4, i8* %3, align 1 + ret i8* %3 +} + +;; Allocated buffer is aligned by 32, so it places from 0 to 15 bytes in 48 +;; bytes local vars area. Or it places from 192 (aligned to 32 bytes) to +;; 207 bytes in 224 + alpha allocated local vars area. + +; Function Attrs: nounwind +define i8* @test_frame16_align32(i8* %0) { +; CHECK-LABEL: test_frame16_align32: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -224 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB3_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: ld1b.zx %s1, (, %s0) +; CHECK-NEXT: lea %s0, 192(, %s11) +; CHECK-NEXT: st1b %s1, 192(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) +; +; CHECKFP-LABEL: test_frame16_align32: +; CHECKFP: # %bb.0: +; CHECKFP-NEXT: st %s9, (, %s11) +; CHECKFP-NEXT: st %s10, 8(, %s11) +; CHECKFP-NEXT: st %s15, 24(, %s11) +; CHECKFP-NEXT: st %s16, 32(, %s11) +; CHECKFP-NEXT: or %s9, 0, %s11 +; CHECKFP-NEXT: lea %s13, -224 +; CHECKFP-NEXT: and %s13, %s13, (32)0 +; CHECKFP-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECKFP-NEXT: and %s11, %s11, (59)1 +; CHECKFP-NEXT: brge.l.t %s11, %s8, .LBB3_2 +; CHECKFP-NEXT: # %bb.1: +; CHECKFP-NEXT: ld %s61, 24(, %s14) +; CHECKFP-NEXT: or %s62, 0, %s0 +; CHECKFP-NEXT: lea %s63, 315 +; CHECKFP-NEXT: shm.l %s63, (%s61) +; CHECKFP-NEXT: shm.l %s8, 8(%s61) +; CHECKFP-NEXT: shm.l %s11, 16(%s61) +; CHECKFP-NEXT: monc +; CHECKFP-NEXT: or %s0, 0, %s62 +; CHECKFP-NEXT: .LBB3_2: +; CHECKFP-NEXT: ld1b.zx %s1, (, %s0) +; CHECKFP-NEXT: lea %s0, 192(, %s11) +; CHECKFP-NEXT: st1b %s1, 192(, %s11) +; CHECKFP-NEXT: or %s11, 0, %s9 +; CHECKFP-NEXT: ld %s16, 32(, %s11) +; CHECKFP-NEXT: ld %s15, 24(, %s11) +; CHECKFP-NEXT: ld %s10, 8(, %s11) +; CHECKFP-NEXT: ld %s9, (, %s11) +; CHECKFP-NEXT: b.l.t (, %s10) + %2 = alloca [16 x i8], align 32 + %3 = getelementptr inbounds [16 x i8], [16 x i8]* %2, i64 0, i64 0 + %4 = load i8, i8* %0, align 1 + store i8 %4, i8* %3, align 1 + ret i8* %3 +} + +;; Allocated buffer is aligned by 32, so it places from 0 to 31 bytes in 48 +;; + alpha bytes local vars area, or it places from 192 (32 bytes aligned 176) +;; to 223 in 224 + alpha bytes local vars area.. + +; Function Attrs: nounwind +define i8* @test_frame32_align32(i8* %0) { +; CHECK-LABEL: test_frame32_align32: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -224 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: ld1b.zx %s1, (, %s0) +; CHECK-NEXT: lea %s0, 192(, %s11) +; CHECK-NEXT: st1b %s1, 192(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) +; +; CHECKFP-LABEL: test_frame32_align32: +; CHECKFP: # %bb.0: +; CHECKFP-NEXT: st %s9, (, %s11) +; CHECKFP-NEXT: st %s10, 8(, %s11) +; CHECKFP-NEXT: st %s15, 24(, %s11) +; CHECKFP-NEXT: st %s16, 32(, %s11) +; CHECKFP-NEXT: or %s9, 0, %s11 +; CHECKFP-NEXT: lea %s13, -224 +; CHECKFP-NEXT: and %s13, %s13, (32)0 +; CHECKFP-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECKFP-NEXT: and %s11, %s11, (59)1 +; CHECKFP-NEXT: brge.l.t %s11, %s8, .LBB4_2 +; CHECKFP-NEXT: # %bb.1: +; CHECKFP-NEXT: ld %s61, 24(, %s14) +; CHECKFP-NEXT: or %s62, 0, %s0 +; CHECKFP-NEXT: lea %s63, 315 +; CHECKFP-NEXT: shm.l %s63, (%s61) +; CHECKFP-NEXT: shm.l %s8, 8(%s61) +; CHECKFP-NEXT: shm.l %s11, 16(%s61) +; CHECKFP-NEXT: monc +; CHECKFP-NEXT: or %s0, 0, %s62 +; CHECKFP-NEXT: .LBB4_2: +; CHECKFP-NEXT: ld1b.zx %s1, (, %s0) +; CHECKFP-NEXT: lea %s0, 192(, %s11) +; CHECKFP-NEXT: st1b %s1, 192(, %s11) +; CHECKFP-NEXT: or %s11, 0, %s9 +; CHECKFP-NEXT: ld %s16, 32(, %s11) +; CHECKFP-NEXT: ld %s15, 24(, %s11) +; CHECKFP-NEXT: ld %s10, 8(, %s11) +; CHECKFP-NEXT: ld %s9, (, %s11) +; CHECKFP-NEXT: b.l.t (, %s10) + %2 = alloca [32 x i8], align 32 + %3 = getelementptr inbounds [32 x i8], [32 x i8]* %2, i64 0, i64 0 + %4 = load i8, i8* %0, align 1 + store i8 %4, i8* %3, align 1 + ret i8* %3 +} + +;; Dynamically allocated buffer is aligned by 16, so it places from 0 to 31 +;; bytes in allocated area, or it places from 240 (32 bytes aligned 176+64) +;; to 271 in allocated area (actually it places not newly allocated area +;; but in somewhere between newly allocated area and allocated area at the +;; prologue since VE ABI requires the reserved area at the top of stack). + +;; FIXME: (size+15)/16*16 is not enough. + +; Function Attrs: nounwind +define i8* @test_frame_dynalign16(i8* %0, i64 %1) { +; CHECK-LABEL: test_frame_dynalign16: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -240 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: or %s2, 0, %s0 +; CHECK-NEXT: lea %s0, 15(, %s1) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: ld1b.zx %s1, (, %s2) +; CHECK-NEXT: st1b %s1, (, %s0) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) +; +; CHECKFP-LABEL: test_frame_dynalign16: +; CHECKFP: # %bb.0: +; CHECKFP-NEXT: st %s9, (, %s11) +; CHECKFP-NEXT: st %s10, 8(, %s11) +; CHECKFP-NEXT: st %s15, 24(, %s11) +; CHECKFP-NEXT: st %s16, 32(, %s11) +; CHECKFP-NEXT: or %s9, 0, %s11 +; CHECKFP-NEXT: lea %s13, -240 +; CHECKFP-NEXT: and %s13, %s13, (32)0 +; CHECKFP-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECKFP-NEXT: brge.l.t %s11, %s8, .LBB5_2 +; CHECKFP-NEXT: # %bb.1: +; CHECKFP-NEXT: ld %s61, 24(, %s14) +; CHECKFP-NEXT: or %s62, 0, %s0 +; CHECKFP-NEXT: lea %s63, 315 +; CHECKFP-NEXT: shm.l %s63, (%s61) +; CHECKFP-NEXT: shm.l %s8, 8(%s61) +; CHECKFP-NEXT: shm.l %s11, 16(%s61) +; CHECKFP-NEXT: monc +; CHECKFP-NEXT: or %s0, 0, %s62 +; CHECKFP-NEXT: .LBB5_2: +; CHECKFP-NEXT: or %s2, 0, %s0 +; CHECKFP-NEXT: lea %s0, 15(, %s1) +; CHECKFP-NEXT: and %s0, -16, %s0 +; CHECKFP-NEXT: lea %s1, __ve_grow_stack@lo +; CHECKFP-NEXT: and %s1, %s1, (32)0 +; CHECKFP-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) +; CHECKFP-NEXT: bsic %s10, (, %s12) +; CHECKFP-NEXT: lea %s0, 240(, %s11) +; CHECKFP-NEXT: ld1b.zx %s1, (, %s2) +; CHECKFP-NEXT: st1b %s1, (, %s0) +; CHECKFP-NEXT: or %s11, 0, %s9 +; CHECKFP-NEXT: ld %s16, 32(, %s11) +; CHECKFP-NEXT: ld %s15, 24(, %s11) +; CHECKFP-NEXT: ld %s10, 8(, %s11) +; CHECKFP-NEXT: ld %s9, (, %s11) +; CHECKFP-NEXT: b.l.t (, %s10) + %3 = alloca i8, i64 %1, align 16 + %4 = load i8, i8* %0, align 1 + store i8 %4, i8* %3, align 1 + ret i8* %3 +} + +;; This test allocates static buffer with 16 bytes align and dynamic buffer +;; with 32 bytes align. In LLVM, stack frame is always aligned to 32 bytes +;; (bigger one). So, LLVM allocates 176 (RSA) + 64 (call site) + 32 (32 bytes +;; aligned 16 bytes data) + 16 (pad to align) if FP is not eliminated. +;; Statically allocated buffer is aligned to 16, so it places from 16 to 31 +;; bytes from BP in 32 + alpha bytes local vars area, or it places from 272 +;; to 287 bytes from BP in 288 + alpha bytes local vars area. +;; Dynamically allocated buffer is aligned to 32, so it places from aligned +;; address between 240 and 271 from SP. + +; Function Attrs: nounwind +define i8* @test_frame16_align16_dynalign32(i8* %0, i64 %n) { +; CHECK-LABEL: test_frame16_align16_dynalign32: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -288 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: or %s17, 0, %s11 +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: ld1b.zx %s0, (, %s0) +; CHECK-NEXT: st1b %s0, 272(, %s17) +; CHECK-NEXT: lea %s0, 15(, %s1) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) +; CHECK-NEXT: or %s1, -32, (0)1 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: ld1b.zx %s1, 272(, %s17) +; CHECK-NEXT: lea %s0, 31(, %s0) +; CHECK-NEXT: and %s0, -32, %s0 +; CHECK-NEXT: st1b %s1, (, %s0) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) +; +; CHECKFP-LABEL: test_frame16_align16_dynalign32: +; CHECKFP: # %bb.0: +; CHECKFP-NEXT: st %s9, (, %s11) +; CHECKFP-NEXT: st %s10, 8(, %s11) +; CHECKFP-NEXT: st %s15, 24(, %s11) +; CHECKFP-NEXT: st %s16, 32(, %s11) +; CHECKFP-NEXT: st %s17, 40(, %s11) +; CHECKFP-NEXT: or %s9, 0, %s11 +; CHECKFP-NEXT: lea %s13, -288 +; CHECKFP-NEXT: and %s13, %s13, (32)0 +; CHECKFP-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECKFP-NEXT: and %s11, %s11, (59)1 +; CHECKFP-NEXT: or %s17, 0, %s11 +; CHECKFP-NEXT: brge.l.t %s11, %s8, .LBB6_2 +; CHECKFP-NEXT: # %bb.1: +; CHECKFP-NEXT: ld %s61, 24(, %s14) +; CHECKFP-NEXT: or %s62, 0, %s0 +; CHECKFP-NEXT: lea %s63, 315 +; CHECKFP-NEXT: shm.l %s63, (%s61) +; CHECKFP-NEXT: shm.l %s8, 8(%s61) +; CHECKFP-NEXT: shm.l %s11, 16(%s61) +; CHECKFP-NEXT: monc +; CHECKFP-NEXT: or %s0, 0, %s62 +; CHECKFP-NEXT: .LBB6_2: +; CHECKFP-NEXT: ld1b.zx %s0, (, %s0) +; CHECKFP-NEXT: st1b %s0, 272(, %s17) +; CHECKFP-NEXT: lea %s0, 15(, %s1) +; CHECKFP-NEXT: and %s0, -16, %s0 +; CHECKFP-NEXT: lea %s1, __ve_grow_stack_align@lo +; CHECKFP-NEXT: and %s1, %s1, (32)0 +; CHECKFP-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) +; CHECKFP-NEXT: or %s1, -32, (0)1 +; CHECKFP-NEXT: bsic %s10, (, %s12) +; CHECKFP-NEXT: lea %s0, 240(, %s11) +; CHECKFP-NEXT: ld1b.zx %s1, 272(, %s17) +; CHECKFP-NEXT: lea %s0, 31(, %s0) +; CHECKFP-NEXT: and %s0, -32, %s0 +; CHECKFP-NEXT: st1b %s1, (, %s0) +; CHECKFP-NEXT: or %s11, 0, %s9 +; CHECKFP-NEXT: ld %s17, 40(, %s11) +; CHECKFP-NEXT: ld %s16, 32(, %s11) +; CHECKFP-NEXT: ld %s15, 24(, %s11) +; CHECKFP-NEXT: ld %s10, 8(, %s11) +; CHECKFP-NEXT: ld %s9, (, %s11) +; CHECKFP-NEXT: b.l.t (, %s10) + %2 = alloca [16 x i8], align 16 + %3 = getelementptr inbounds [16 x i8], [16 x i8]* %2, i64 0, i64 0 + %4 = load i8, i8* %0, align 1 + store i8 %4, i8* %3, align 1 + %5 = alloca i8, i64 %n, align 32 + %6 = load i8, i8* %3, align 1 + store i8 %6, i8* %5, align 1 + ret i8* %5 +} + diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll --- a/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll +++ b/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll @@ -1,26 +1,92 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=ve | FileCheck %s -; RUN: llc < %s -mtriple=ve -relocation-model=pic | FileCheck %s --check-prefix=PIC +; RUN: llc < %s -mtriple=ve -relocation-model=pic \ +; RUN: | FileCheck %s --check-prefix=PIC -;; Check stack frame allocation of a function which calls other functions +;;; Check stack frame allocation of a function which calls other functions +;;; under following conditions and combinations of them: +;;; - access variable or not +;;; - no stack object, a stack object using BP, or a stack object not using BP +;;; - isPositionIndependent or not -; Function Attrs: norecurse nounwind readnone -define signext i32 @test_frame0(i32 signext %0) { +@data = external global i8, align 1 + +; Function Attrs: nounwind +define i8* @test_frame0(i8* %0, i8* %1) { ; CHECK-LABEL: test_frame0: ; CHECK: # %bb.0: -; CHECK-NEXT: adds.w.sx %s0, 3, %s0 -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -240 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: lea %s2, fun@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s12, fun@hi(, %s2) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) ; CHECK-NEXT: b.l.t (, %s10) ; ; PIC-LABEL: test_frame0: ; PIC: # %bb.0: -; PIC-NEXT: adds.w.sx %s0, 3, %s0 -; PIC-NEXT: adds.w.sx %s0, %s0, (0)1 +; PIC-NEXT: st %s9, (, %s11) +; PIC-NEXT: st %s10, 8(, %s11) +; PIC-NEXT: st %s15, 24(, %s11) +; PIC-NEXT: st %s16, 32(, %s11) +; PIC-NEXT: or %s9, 0, %s11 +; PIC-NEXT: lea %s13, -240 +; PIC-NEXT: and %s13, %s13, (32)0 +; PIC-NEXT: lea.sl %s11, -1(%s13, %s11) +; PIC-NEXT: brge.l.t %s11, %s8, .LBB0_2 +; PIC-NEXT: # %bb.1: +; PIC-NEXT: ld %s61, 24(, %s14) +; PIC-NEXT: or %s62, 0, %s0 +; PIC-NEXT: lea %s63, 315 +; PIC-NEXT: shm.l %s63, (%s61) +; PIC-NEXT: shm.l %s8, 8(%s61) +; PIC-NEXT: shm.l %s11, 16(%s61) +; PIC-NEXT: monc +; PIC-NEXT: or %s0, 0, %s62 +; PIC-NEXT: .LBB0_2: +; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) +; PIC-NEXT: and %s15, %s15, (32)0 +; PIC-NEXT: sic %s16 +; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; PIC-NEXT: lea %s12, fun@plt_lo(-24) +; PIC-NEXT: and %s12, %s12, (32)0 +; PIC-NEXT: sic %s16 +; PIC-NEXT: lea.sl %s12, fun@plt_hi(%s16, %s12) +; PIC-NEXT: bsic %s10, (, %s12) +; PIC-NEXT: or %s11, 0, %s9 +; PIC-NEXT: ld %s16, 32(, %s11) +; PIC-NEXT: ld %s15, 24(, %s11) +; PIC-NEXT: ld %s10, 8(, %s11) +; PIC-NEXT: ld %s9, (, %s11) ; PIC-NEXT: b.l.t (, %s10) - %2 = add nsw i32 %0, 3 - ret i32 %2 + %3 = tail call i8* @fun(i8* %0, i8* %1) + ret i8* %3 } +declare i8* @fun(i8*, i8*) + ; Function Attrs: nounwind define i8* @test_frame32(i8* %0) { ; CHECK-LABEL: test_frame32: @@ -106,23 +172,24 @@ ; Function Attrs: argmemonly nofree nosync nounwind willreturn declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) -declare i8* @fun(i8*, i8*) - ; Function Attrs: argmemonly nofree nosync nounwind willreturn declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) ; Function Attrs: nounwind -define i8* @test_frame64(i8* %0) { -; CHECK-LABEL: test_frame64: +define i8* @test_align32(i32 signext %0, i8* nocapture readnone %1) { +; CHECK-LABEL: test_align32: ; CHECK: # %bb.0: ; CHECK-NEXT: st %s9, (, %s11) ; CHECK-NEXT: st %s10, 8(, %s11) ; CHECK-NEXT: st %s15, 24(, %s11) ; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) ; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s13, -304 +; CHECK-NEXT: lea %s13, -288 ; CHECK-NEXT: and %s13, %s13, (32)0 ; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: or %s17, 0, %s11 ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB2_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: ld %s61, 24(, %s14) @@ -134,29 +201,42 @@ ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: or %s1, 0, %s0 +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) +; CHECK-NEXT: or %s1, -32, (0)1 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: lea %s0, 31(, %s0) +; CHECK-NEXT: and %s1, -32, %s0 ; CHECK-NEXT: lea %s0, fun@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, fun@hi(, %s0) -; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: lea %s0, 256(, %s17) ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) ; CHECK-NEXT: ld %s16, 32(, %s11) ; CHECK-NEXT: ld %s15, 24(, %s11) ; CHECK-NEXT: ld %s10, 8(, %s11) ; CHECK-NEXT: ld %s9, (, %s11) ; CHECK-NEXT: b.l.t (, %s10) ; -; PIC-LABEL: test_frame64: +; PIC-LABEL: test_align32: ; PIC: # %bb.0: ; PIC-NEXT: st %s9, (, %s11) ; PIC-NEXT: st %s10, 8(, %s11) ; PIC-NEXT: st %s15, 24(, %s11) ; PIC-NEXT: st %s16, 32(, %s11) +; PIC-NEXT: st %s17, 40(, %s11) ; PIC-NEXT: or %s9, 0, %s11 -; PIC-NEXT: lea %s13, -304 +; PIC-NEXT: lea %s13, -288 ; PIC-NEXT: and %s13, %s13, (32)0 ; PIC-NEXT: lea.sl %s11, -1(%s13, %s11) +; PIC-NEXT: and %s11, %s11, (59)1 +; PIC-NEXT: or %s17, 0, %s11 ; PIC-NEXT: brge.l.t %s11, %s8, .LBB2_2 ; PIC-NEXT: # %bb.1: ; PIC-NEXT: ld %s61, 24(, %s14) @@ -168,41 +248,54 @@ ; PIC-NEXT: monc ; PIC-NEXT: or %s0, 0, %s62 ; PIC-NEXT: .LBB2_2: -; PIC-NEXT: or %s1, 0, %s0 ; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; PIC-NEXT: lea %s0, 15(, %s0) +; PIC-NEXT: and %s0, -16, %s0 +; PIC-NEXT: lea %s12, __ve_grow_stack_align@plt_lo(-24) +; PIC-NEXT: and %s12, %s12, (32)0 +; PIC-NEXT: sic %s16 +; PIC-NEXT: lea.sl %s12, __ve_grow_stack_align@plt_hi(%s16, %s12) +; PIC-NEXT: or %s1, -32, (0)1 +; PIC-NEXT: bsic %s10, (, %s12) +; PIC-NEXT: lea %s0, 240(, %s11) +; PIC-NEXT: lea %s0, 31(, %s0) +; PIC-NEXT: and %s1, -32, %s0 ; PIC-NEXT: lea %s12, fun@plt_lo(-24) ; PIC-NEXT: and %s12, %s12, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s12, fun@plt_hi(%s16, %s12) -; PIC-NEXT: lea %s0, 240(, %s11) +; PIC-NEXT: lea %s0, 256(, %s17) ; PIC-NEXT: bsic %s10, (, %s12) ; PIC-NEXT: or %s11, 0, %s9 +; PIC-NEXT: ld %s17, 40(, %s11) ; PIC-NEXT: ld %s16, 32(, %s11) ; PIC-NEXT: ld %s15, 24(, %s11) ; PIC-NEXT: ld %s10, 8(, %s11) ; PIC-NEXT: ld %s9, (, %s11) ; PIC-NEXT: b.l.t (, %s10) - %2 = alloca [64 x i8], align 1 - %3 = getelementptr inbounds [64 x i8], [64 x i8]* %2, i64 0, i64 0 - call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %3) - %4 = call i8* @fun(i8* nonnull %3, i8* %0) - call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %3) - ret i8* %4 + %3 = alloca [32 x i8], align 32 + %4 = getelementptr inbounds [32 x i8], [32 x i8]* %3, i64 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %4) + %5 = sext i32 %0 to i64 + %6 = alloca i8, i64 %5, align 32 + %7 = call i8* @fun(i8* nonnull %4, i8* nonnull %6) + call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %4) + ret i8* %7 } ; Function Attrs: nounwind -define i8* @test_frame128(i8* %0) { -; CHECK-LABEL: test_frame128: +define i8* @test_frame0_var(i8* %0, i8* %1) { +; CHECK-LABEL: test_frame0_var: ; CHECK: # %bb.0: ; CHECK-NEXT: st %s9, (, %s11) ; CHECK-NEXT: st %s10, 8(, %s11) ; CHECK-NEXT: st %s15, 24(, %s11) ; CHECK-NEXT: st %s16, 32(, %s11) ; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s13, -368 +; CHECK-NEXT: lea %s13, -240 ; CHECK-NEXT: and %s13, %s13, (32)0 ; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB3_2 @@ -216,11 +309,14 @@ ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB3_2: -; CHECK-NEXT: or %s1, 0, %s0 -; CHECK-NEXT: lea %s0, fun@lo -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s12, fun@hi(, %s0) -; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: lea %s2, data@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, data@hi(, %s2) +; CHECK-NEXT: ld1b.zx %s2, (, %s2) +; CHECK-NEXT: st1b %s2, (, %s0) +; CHECK-NEXT: lea %s2, fun@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s12, fun@hi(, %s2) ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 ; CHECK-NEXT: ld %s16, 32(, %s11) @@ -229,14 +325,14 @@ ; CHECK-NEXT: ld %s9, (, %s11) ; CHECK-NEXT: b.l.t (, %s10) ; -; PIC-LABEL: test_frame128: +; PIC-LABEL: test_frame0_var: ; PIC: # %bb.0: ; PIC-NEXT: st %s9, (, %s11) ; PIC-NEXT: st %s10, 8(, %s11) ; PIC-NEXT: st %s15, 24(, %s11) ; PIC-NEXT: st %s16, 32(, %s11) ; PIC-NEXT: or %s9, 0, %s11 -; PIC-NEXT: lea %s13, -368 +; PIC-NEXT: lea %s13, -240 ; PIC-NEXT: and %s13, %s13, (32)0 ; PIC-NEXT: lea.sl %s11, -1(%s13, %s11) ; PIC-NEXT: brge.l.t %s11, %s8, .LBB3_2 @@ -250,16 +346,20 @@ ; PIC-NEXT: monc ; PIC-NEXT: or %s0, 0, %s62 ; PIC-NEXT: .LBB3_2: -; PIC-NEXT: or %s1, 0, %s0 ; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; PIC-NEXT: lea %s2, data@got_lo +; PIC-NEXT: and %s2, %s2, (32)0 +; PIC-NEXT: lea.sl %s2, data@got_hi(, %s2) +; PIC-NEXT: ld %s2, (%s2, %s15) +; PIC-NEXT: ld1b.zx %s2, (, %s2) +; PIC-NEXT: st1b %s2, (, %s0) ; PIC-NEXT: lea %s12, fun@plt_lo(-24) ; PIC-NEXT: and %s12, %s12, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s12, fun@plt_hi(%s16, %s12) -; PIC-NEXT: lea %s0, 240(, %s11) ; PIC-NEXT: bsic %s10, (, %s12) ; PIC-NEXT: or %s11, 0, %s9 ; PIC-NEXT: ld %s16, 32(, %s11) @@ -267,24 +367,22 @@ ; PIC-NEXT: ld %s10, 8(, %s11) ; PIC-NEXT: ld %s9, (, %s11) ; PIC-NEXT: b.l.t (, %s10) - %2 = alloca [128 x i8], align 1 - %3 = getelementptr inbounds [128 x i8], [128 x i8]* %2, i64 0, i64 0 - call void @llvm.lifetime.start.p0i8(i64 128, i8* nonnull %3) - %4 = call i8* @fun(i8* nonnull %3, i8* %0) - call void @llvm.lifetime.end.p0i8(i64 128, i8* nonnull %3) + %3 = load i8, i8* @data, align 1 + store i8 %3, i8* %0, align 1 + %4 = tail call i8* @fun(i8* nonnull %0, i8* %1) ret i8* %4 } ; Function Attrs: nounwind -define i8* @test_frame65536(i8* %0) { -; CHECK-LABEL: test_frame65536: +define i8* @test_frame32_var(i8* %0) { +; CHECK-LABEL: test_frame32_var: ; CHECK: # %bb.0: ; CHECK-NEXT: st %s9, (, %s11) ; CHECK-NEXT: st %s10, 8(, %s11) ; CHECK-NEXT: st %s15, 24(, %s11) ; CHECK-NEXT: st %s16, 32(, %s11) ; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s13, -65776 +; CHECK-NEXT: lea %s13, -272 ; CHECK-NEXT: and %s13, %s13, (32)0 ; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB4_2 @@ -298,7 +396,12 @@ ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: lea %s1, data@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s1, data@hi(, %s1) +; CHECK-NEXT: ld1b.zx %s2, (, %s1) ; CHECK-NEXT: or %s1, 0, %s0 +; CHECK-NEXT: st1b %s2, 240(, %s11) ; CHECK-NEXT: lea %s0, fun@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, fun@hi(, %s0) @@ -311,14 +414,14 @@ ; CHECK-NEXT: ld %s9, (, %s11) ; CHECK-NEXT: b.l.t (, %s10) ; -; PIC-LABEL: test_frame65536: +; PIC-LABEL: test_frame32_var: ; PIC: # %bb.0: ; PIC-NEXT: st %s9, (, %s11) ; PIC-NEXT: st %s10, 8(, %s11) ; PIC-NEXT: st %s15, 24(, %s11) ; PIC-NEXT: st %s16, 32(, %s11) ; PIC-NEXT: or %s9, 0, %s11 -; PIC-NEXT: lea %s13, -65776 +; PIC-NEXT: lea %s13, -272 ; PIC-NEXT: and %s13, %s13, (32)0 ; PIC-NEXT: lea.sl %s11, -1(%s13, %s11) ; PIC-NEXT: brge.l.t %s11, %s8, .LBB4_2 @@ -332,11 +435,17 @@ ; PIC-NEXT: monc ; PIC-NEXT: or %s0, 0, %s62 ; PIC-NEXT: .LBB4_2: -; PIC-NEXT: or %s1, 0, %s0 ; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; PIC-NEXT: lea %s1, data@got_lo +; PIC-NEXT: and %s1, %s1, (32)0 +; PIC-NEXT: lea.sl %s1, data@got_hi(, %s1) +; PIC-NEXT: ld %s1, (%s1, %s15) +; PIC-NEXT: ld1b.zx %s2, (, %s1) +; PIC-NEXT: or %s1, 0, %s0 +; PIC-NEXT: st1b %s2, 240(, %s11) ; PIC-NEXT: lea %s12, fun@plt_lo(-24) ; PIC-NEXT: and %s12, %s12, (32)0 ; PIC-NEXT: sic %s16 @@ -349,26 +458,31 @@ ; PIC-NEXT: ld %s10, 8(, %s11) ; PIC-NEXT: ld %s9, (, %s11) ; PIC-NEXT: b.l.t (, %s10) - %2 = alloca [65536 x i8], align 1 - %3 = getelementptr inbounds [65536 x i8], [65536 x i8]* %2, i64 0, i64 0 - call void @llvm.lifetime.start.p0i8(i64 65536, i8* nonnull %3) - %4 = call i8* @fun(i8* nonnull %3, i8* %0) - call void @llvm.lifetime.end.p0i8(i64 65536, i8* nonnull %3) - ret i8* %4 + %2 = alloca [32 x i8], align 1 + %3 = getelementptr inbounds [32 x i8], [32 x i8]* %2, i64 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %3) + %4 = load i8, i8* @data, align 1 + store i8 %4, i8* %3, align 1 + %5 = call i8* @fun(i8* nonnull %3, i8* %0) + call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %3) + ret i8* %5 } ; Function Attrs: nounwind -define i8* @test_frame4294967296(i8* %0) { -; CHECK-LABEL: test_frame4294967296: +define i8* @test_align32_var(i32 signext %0, i8* nocapture readnone %1) { +; CHECK-LABEL: test_align32_var: ; CHECK: # %bb.0: ; CHECK-NEXT: st %s9, (, %s11) ; CHECK-NEXT: st %s10, 8(, %s11) ; CHECK-NEXT: st %s15, 24(, %s11) ; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) ; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s13, -240 +; CHECK-NEXT: lea %s13, -288 ; CHECK-NEXT: and %s13, %s13, (32)0 -; CHECK-NEXT: lea.sl %s11, -2(%s13, %s11) +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: or %s17, 0, %s11 ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB5_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: ld %s61, 24(, %s14) @@ -380,29 +494,47 @@ ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB5_2: -; CHECK-NEXT: or %s1, 0, %s0 +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) +; CHECK-NEXT: or %s1, -32, (0)1 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: lea %s1, data@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s1, data@hi(, %s1) +; CHECK-NEXT: ld1b.zx %s2, (, %s1) +; CHECK-NEXT: lea %s0, 31(, %s0) +; CHECK-NEXT: and %s1, -32, %s0 +; CHECK-NEXT: st1b %s2, (, %s1) ; CHECK-NEXT: lea %s0, fun@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, fun@hi(, %s0) -; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: lea %s0, 256(, %s17) ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) ; CHECK-NEXT: ld %s16, 32(, %s11) ; CHECK-NEXT: ld %s15, 24(, %s11) ; CHECK-NEXT: ld %s10, 8(, %s11) ; CHECK-NEXT: ld %s9, (, %s11) ; CHECK-NEXT: b.l.t (, %s10) ; -; PIC-LABEL: test_frame4294967296: +; PIC-LABEL: test_align32_var: ; PIC: # %bb.0: ; PIC-NEXT: st %s9, (, %s11) ; PIC-NEXT: st %s10, 8(, %s11) ; PIC-NEXT: st %s15, 24(, %s11) ; PIC-NEXT: st %s16, 32(, %s11) +; PIC-NEXT: st %s17, 40(, %s11) ; PIC-NEXT: or %s9, 0, %s11 -; PIC-NEXT: lea %s13, -240 +; PIC-NEXT: lea %s13, -288 ; PIC-NEXT: and %s13, %s13, (32)0 -; PIC-NEXT: lea.sl %s11, -2(%s13, %s11) +; PIC-NEXT: lea.sl %s11, -1(%s13, %s11) +; PIC-NEXT: and %s11, %s11, (59)1 +; PIC-NEXT: or %s17, 0, %s11 ; PIC-NEXT: brge.l.t %s11, %s8, .LBB5_2 ; PIC-NEXT: # %bb.1: ; PIC-NEXT: ld %s61, 24(, %s14) @@ -414,27 +546,48 @@ ; PIC-NEXT: monc ; PIC-NEXT: or %s0, 0, %s62 ; PIC-NEXT: .LBB5_2: -; PIC-NEXT: or %s1, 0, %s0 ; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; PIC-NEXT: lea %s0, 15(, %s0) +; PIC-NEXT: and %s0, -16, %s0 +; PIC-NEXT: lea %s12, __ve_grow_stack_align@plt_lo(-24) +; PIC-NEXT: and %s12, %s12, (32)0 +; PIC-NEXT: sic %s16 +; PIC-NEXT: lea.sl %s12, __ve_grow_stack_align@plt_hi(%s16, %s12) +; PIC-NEXT: or %s1, -32, (0)1 +; PIC-NEXT: bsic %s10, (, %s12) +; PIC-NEXT: lea %s0, data@got_lo +; PIC-NEXT: and %s0, %s0, (32)0 +; PIC-NEXT: lea.sl %s0, data@got_hi(, %s0) +; PIC-NEXT: ld %s0, (%s0, %s15) +; PIC-NEXT: lea %s1, 240(, %s11) +; PIC-NEXT: ld1b.zx %s0, (, %s0) +; PIC-NEXT: lea %s1, 31(, %s1) +; PIC-NEXT: and %s1, -32, %s1 +; PIC-NEXT: st1b %s0, (, %s1) ; PIC-NEXT: lea %s12, fun@plt_lo(-24) ; PIC-NEXT: and %s12, %s12, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s12, fun@plt_hi(%s16, %s12) -; PIC-NEXT: lea %s0, 240(, %s11) +; PIC-NEXT: lea %s0, 256(, %s17) ; PIC-NEXT: bsic %s10, (, %s12) ; PIC-NEXT: or %s11, 0, %s9 +; PIC-NEXT: ld %s17, 40(, %s11) ; PIC-NEXT: ld %s16, 32(, %s11) ; PIC-NEXT: ld %s15, 24(, %s11) ; PIC-NEXT: ld %s10, 8(, %s11) ; PIC-NEXT: ld %s9, (, %s11) ; PIC-NEXT: b.l.t (, %s10) - %2 = alloca [4294967296 x i8], align 1 - %3 = getelementptr inbounds [4294967296 x i8], [4294967296 x i8]* %2, i64 0, i64 0 - call void @llvm.lifetime.start.p0i8(i64 4294967296, i8* nonnull %3) - %4 = call i8* @fun(i8* nonnull %3, i8* %0) - call void @llvm.lifetime.end.p0i8(i64 4294967296, i8* nonnull %3) - ret i8* %4 + %3 = alloca [32 x i8], align 32 + %4 = getelementptr inbounds [32 x i8], [32 x i8]* %3, i64 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %4) + %5 = sext i32 %0 to i64 + %6 = alloca i8, i64 %5, align 32 + %7 = load i8, i8* @data, align 1 + store i8 %7, i8* %6, align 32 + %8 = call i8* @fun(i8* nonnull %4, i8* nonnull %6) + call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %4) + ret i8* %8 } diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll --- a/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll +++ b/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll @@ -1,30 +1,32 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=ve | FileCheck %s -; RUN: llc < %s -mtriple=ve -relocation-model=pic | FileCheck %s --check-prefix=PIC +; RUN: llc < %s -mtriple=ve -relocation-model=pic \ +; RUN: | FileCheck %s --check-prefix=PIC -;; Check stack frame allocation of a function which doesn't call other functions +;;; Check stack frame allocation of a function which does not calls other +;;; functions under following conditions and combinations of them: +;;; - access variable or not +;;; - no stack object, a stack object using BP, or a stack object not using BP +;;; - isPositionIndependent or not @data = external global i8, align 1 ; Function Attrs: norecurse nounwind readnone -define signext i32 @test_frame0(i32 signext %0) { +define i8* @test_frame0(i8* nocapture readnone %0, i8* readnone returned %1) { ; CHECK-LABEL: test_frame0: ; CHECK: # %bb.0: -; CHECK-NEXT: adds.w.sx %s0, 3, %s0 -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: or %s0, 0, %s1 ; CHECK-NEXT: b.l.t (, %s10) ; ; PIC-LABEL: test_frame0: ; PIC: # %bb.0: -; PIC-NEXT: adds.w.sx %s0, 3, %s0 -; PIC-NEXT: adds.w.sx %s0, %s0, (0)1 +; PIC-NEXT: or %s0, 0, %s1 ; PIC-NEXT: b.l.t (, %s10) - %2 = add nsw i32 %0, 3 - ret i32 %2 + ret i8* %1 } -; Function Attrs: nounwind -define i8* @test_frame32(i8* %0) { +; Function Attrs: nofree nounwind +define nonnull i8* @test_frame32(i8* nocapture readonly %0) { ; CHECK-LABEL: test_frame32: ; CHECK: # %bb.0: ; CHECK-NEXT: st %s9, (, %s11) @@ -46,9 +48,6 @@ ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: lea %s0, data@lo -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, data@hi(, %s0) ; CHECK-NEXT: ld1b.zx %s1, (, %s0) ; CHECK-NEXT: lea %s0, 176(, %s11) ; CHECK-NEXT: st1b %s1, 176(, %s11) @@ -80,14 +79,6 @@ ; PIC-NEXT: monc ; PIC-NEXT: or %s0, 0, %s62 ; PIC-NEXT: .LBB1_2: -; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) -; PIC-NEXT: and %s15, %s15, (32)0 -; PIC-NEXT: sic %s16 -; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) -; PIC-NEXT: lea %s0, data@got_lo -; PIC-NEXT: and %s0, %s0, (32)0 -; PIC-NEXT: lea.sl %s0, data@got_hi(, %s0) -; PIC-NEXT: ld %s0, (%s0, %s15) ; PIC-NEXT: ld1b.zx %s1, (, %s0) ; PIC-NEXT: lea %s0, 176(, %s11) ; PIC-NEXT: st1b %s1, 176(, %s11) @@ -99,31 +90,34 @@ ; PIC-NEXT: b.l.t (, %s10) %2 = alloca [32 x i8], align 1 %3 = getelementptr inbounds [32 x i8], [32 x i8]* %2, i64 0, i64 0 - %4 = load i8, i8* @data, align 1 - store i8 %4, i8* %3, align 1 + call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %3) + %4 = load i8, i8* %0, align 1 + store volatile i8 %4, i8* %3, align 1 + call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %3) ret i8* %3 } ; Function Attrs: argmemonly nofree nosync nounwind willreturn declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) -declare i8* @fun(i8*, i8*) - ; Function Attrs: argmemonly nofree nosync nounwind willreturn declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) -; Function Attrs: nounwind -define i8* @test_frame64(i8* %0) { -; CHECK-LABEL: test_frame64: +; Function Attrs: nofree nounwind +define noalias nonnull i8* @test_align32(i32 signext %0, i8* nocapture readonly %1) { +; CHECK-LABEL: test_align32: ; CHECK: # %bb.0: ; CHECK-NEXT: st %s9, (, %s11) ; CHECK-NEXT: st %s10, 8(, %s11) ; CHECK-NEXT: st %s15, 24(, %s11) ; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) ; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s13, -240 +; CHECK-NEXT: lea %s13, -288 ; CHECK-NEXT: and %s13, %s13, (32)0 ; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: or %s17, 0, %s11 ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB2_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: ld %s61, 24(, %s14) @@ -135,29 +129,41 @@ ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: lea %s0, data@lo -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, data@hi(, %s0) -; CHECK-NEXT: ld1b.zx %s1, (, %s0) -; CHECK-NEXT: lea %s0, 176(, %s11) -; CHECK-NEXT: st1b %s1, 176(, %s11) +; CHECK-NEXT: or %s2, 0, %s1 +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) +; CHECK-NEXT: or %s1, -32, (0)1 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: ld1b.zx %s1, (, %s2) +; CHECK-NEXT: lea %s0, 31(, %s0) +; CHECK-NEXT: and %s2, -32, %s0 +; CHECK-NEXT: lea %s0, 256(, %s17) +; CHECK-NEXT: st1b %s1, (, %s2) ; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) ; CHECK-NEXT: ld %s16, 32(, %s11) ; CHECK-NEXT: ld %s15, 24(, %s11) ; CHECK-NEXT: ld %s10, 8(, %s11) ; CHECK-NEXT: ld %s9, (, %s11) ; CHECK-NEXT: b.l.t (, %s10) ; -; PIC-LABEL: test_frame64: +; PIC-LABEL: test_align32: ; PIC: # %bb.0: ; PIC-NEXT: st %s9, (, %s11) ; PIC-NEXT: st %s10, 8(, %s11) ; PIC-NEXT: st %s15, 24(, %s11) ; PIC-NEXT: st %s16, 32(, %s11) +; PIC-NEXT: st %s17, 40(, %s11) ; PIC-NEXT: or %s9, 0, %s11 -; PIC-NEXT: lea %s13, -240 +; PIC-NEXT: lea %s13, -288 ; PIC-NEXT: and %s13, %s13, (32)0 ; PIC-NEXT: lea.sl %s11, -1(%s13, %s11) +; PIC-NEXT: and %s11, %s11, (59)1 +; PIC-NEXT: or %s17, 0, %s11 ; PIC-NEXT: brge.l.t %s11, %s8, .LBB2_2 ; PIC-NEXT: # %bb.1: ; PIC-NEXT: ld %s61, 24(, %s14) @@ -169,74 +175,62 @@ ; PIC-NEXT: monc ; PIC-NEXT: or %s0, 0, %s62 ; PIC-NEXT: .LBB2_2: +; PIC-NEXT: or %s2, 0, %s1 ; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) -; PIC-NEXT: lea %s0, data@got_lo -; PIC-NEXT: and %s0, %s0, (32)0 -; PIC-NEXT: lea.sl %s0, data@got_hi(, %s0) -; PIC-NEXT: ld %s0, (%s0, %s15) -; PIC-NEXT: ld1b.zx %s1, (, %s0) -; PIC-NEXT: lea %s0, 176(, %s11) -; PIC-NEXT: st1b %s1, 176(, %s11) +; PIC-NEXT: lea %s0, 15(, %s0) +; PIC-NEXT: and %s0, -16, %s0 +; PIC-NEXT: lea %s12, __ve_grow_stack_align@plt_lo(-24) +; PIC-NEXT: and %s12, %s12, (32)0 +; PIC-NEXT: sic %s16 +; PIC-NEXT: lea.sl %s12, __ve_grow_stack_align@plt_hi(%s16, %s12) +; PIC-NEXT: or %s1, -32, (0)1 +; PIC-NEXT: bsic %s10, (, %s12) +; PIC-NEXT: lea %s0, 240(, %s11) +; PIC-NEXT: ld1b.zx %s1, (, %s2) +; PIC-NEXT: lea %s0, 31(, %s0) +; PIC-NEXT: and %s2, -32, %s0 +; PIC-NEXT: lea %s0, 256(, %s17) +; PIC-NEXT: st1b %s1, (, %s2) ; PIC-NEXT: or %s11, 0, %s9 +; PIC-NEXT: ld %s17, 40(, %s11) ; PIC-NEXT: ld %s16, 32(, %s11) ; PIC-NEXT: ld %s15, 24(, %s11) ; PIC-NEXT: ld %s10, 8(, %s11) ; PIC-NEXT: ld %s9, (, %s11) ; PIC-NEXT: b.l.t (, %s10) - %2 = alloca [64 x i8], align 1 - %3 = getelementptr inbounds [64 x i8], [64 x i8]* %2, i64 0, i64 0 - %4 = load i8, i8* @data, align 1 - store i8 %4, i8* %3, align 1 - ret i8* %3 + %3 = alloca [32 x i8], align 32 + %4 = getelementptr inbounds [32 x i8], [32 x i8]* %3, i64 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %4) + %5 = sext i32 %0 to i64 + %6 = alloca i8, i64 %5, align 32 + %7 = load i8, i8* %1, align 1 + store volatile i8 %7, i8* %6, align 32 + call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %4) + ret i8* %4 } -; Function Attrs: nounwind -define i8* @test_frame128(i8* %0) { -; CHECK-LABEL: test_frame128: +; Function Attrs: nofree norecurse nounwind +define i8* @test_frame0_var(i8* returned %0, i8* nocapture readnone %1) { +; CHECK-LABEL: test_frame0_var: ; CHECK: # %bb.0: -; CHECK-NEXT: st %s9, (, %s11) -; CHECK-NEXT: st %s10, 8(, %s11) -; CHECK-NEXT: st %s15, 24(, %s11) -; CHECK-NEXT: st %s16, 32(, %s11) -; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s13, -304 -; CHECK-NEXT: and %s13, %s13, (32)0 -; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) -; CHECK-NEXT: brge.l.t %s11, %s8, .LBB3_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: ld %s61, 24(, %s14) -; CHECK-NEXT: or %s62, 0, %s0 -; CHECK-NEXT: lea %s63, 315 -; CHECK-NEXT: shm.l %s63, (%s61) -; CHECK-NEXT: shm.l %s8, 8(%s61) -; CHECK-NEXT: shm.l %s11, 16(%s61) -; CHECK-NEXT: monc -; CHECK-NEXT: or %s0, 0, %s62 -; CHECK-NEXT: .LBB3_2: -; CHECK-NEXT: lea %s0, data@lo -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, data@hi(, %s0) -; CHECK-NEXT: ld1b.zx %s1, (, %s0) -; CHECK-NEXT: lea %s0, 176(, %s11) -; CHECK-NEXT: st1b %s1, 176(, %s11) -; CHECK-NEXT: or %s11, 0, %s9 -; CHECK-NEXT: ld %s16, 32(, %s11) -; CHECK-NEXT: ld %s15, 24(, %s11) -; CHECK-NEXT: ld %s10, 8(, %s11) -; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: lea %s1, data@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s1, data@hi(, %s1) +; CHECK-NEXT: ld1b.zx %s1, (, %s1) +; CHECK-NEXT: st1b %s1, (, %s0) ; CHECK-NEXT: b.l.t (, %s10) ; -; PIC-LABEL: test_frame128: +; PIC-LABEL: test_frame0_var: ; PIC: # %bb.0: ; PIC-NEXT: st %s9, (, %s11) ; PIC-NEXT: st %s10, 8(, %s11) ; PIC-NEXT: st %s15, 24(, %s11) ; PIC-NEXT: st %s16, 32(, %s11) ; PIC-NEXT: or %s9, 0, %s11 -; PIC-NEXT: lea %s13, -304 +; PIC-NEXT: lea %s13, -176 ; PIC-NEXT: and %s13, %s13, (32)0 ; PIC-NEXT: lea.sl %s11, -1(%s13, %s11) ; PIC-NEXT: brge.l.t %s11, %s8, .LBB3_2 @@ -254,36 +248,33 @@ ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) -; PIC-NEXT: lea %s0, data@got_lo -; PIC-NEXT: and %s0, %s0, (32)0 -; PIC-NEXT: lea.sl %s0, data@got_hi(, %s0) -; PIC-NEXT: ld %s0, (%s0, %s15) -; PIC-NEXT: ld1b.zx %s1, (, %s0) -; PIC-NEXT: lea %s0, 176(, %s11) -; PIC-NEXT: st1b %s1, 176(, %s11) +; PIC-NEXT: lea %s1, data@got_lo +; PIC-NEXT: and %s1, %s1, (32)0 +; PIC-NEXT: lea.sl %s1, data@got_hi(, %s1) +; PIC-NEXT: ld %s1, (%s1, %s15) +; PIC-NEXT: ld1b.zx %s1, (, %s1) +; PIC-NEXT: st1b %s1, (, %s0) ; PIC-NEXT: or %s11, 0, %s9 ; PIC-NEXT: ld %s16, 32(, %s11) ; PIC-NEXT: ld %s15, 24(, %s11) ; PIC-NEXT: ld %s10, 8(, %s11) ; PIC-NEXT: ld %s9, (, %s11) ; PIC-NEXT: b.l.t (, %s10) - %2 = alloca [128 x i8], align 1 - %3 = getelementptr inbounds [128 x i8], [128 x i8]* %2, i64 0, i64 0 - %4 = load i8, i8* @data, align 1 - store i8 %4, i8* %3, align 1 - ret i8* %3 + %3 = load i8, i8* @data, align 1 + store i8 %3, i8* %0, align 1 + ret i8* %0 } -; Function Attrs: nounwind -define i8* @test_frame65536(i8* %0) { -; CHECK-LABEL: test_frame65536: +; Function Attrs: nofree nounwind +define nonnull i8* @test_frame32_var(i8* nocapture readnone %0) { +; CHECK-LABEL: test_frame32_var: ; CHECK: # %bb.0: ; CHECK-NEXT: st %s9, (, %s11) ; CHECK-NEXT: st %s10, 8(, %s11) ; CHECK-NEXT: st %s15, 24(, %s11) ; CHECK-NEXT: st %s16, 32(, %s11) ; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s13, -65712 +; CHECK-NEXT: lea %s13, -208 ; CHECK-NEXT: and %s13, %s13, (32)0 ; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB4_2 @@ -310,14 +301,14 @@ ; CHECK-NEXT: ld %s9, (, %s11) ; CHECK-NEXT: b.l.t (, %s10) ; -; PIC-LABEL: test_frame65536: +; PIC-LABEL: test_frame32_var: ; PIC: # %bb.0: ; PIC-NEXT: st %s9, (, %s11) ; PIC-NEXT: st %s10, 8(, %s11) ; PIC-NEXT: st %s15, 24(, %s11) ; PIC-NEXT: st %s16, 32(, %s11) ; PIC-NEXT: or %s9, 0, %s11 -; PIC-NEXT: lea %s13, -65712 +; PIC-NEXT: lea %s13, -208 ; PIC-NEXT: and %s13, %s13, (32)0 ; PIC-NEXT: lea.sl %s11, -1(%s13, %s11) ; PIC-NEXT: brge.l.t %s11, %s8, .LBB4_2 @@ -348,25 +339,30 @@ ; PIC-NEXT: ld %s10, 8(, %s11) ; PIC-NEXT: ld %s9, (, %s11) ; PIC-NEXT: b.l.t (, %s10) - %2 = alloca [65536 x i8], align 1 - %3 = getelementptr inbounds [65536 x i8], [65536 x i8]* %2, i64 0, i64 0 + %2 = alloca [32 x i8], align 1 + %3 = getelementptr inbounds [32 x i8], [32 x i8]* %2, i64 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %3) %4 = load i8, i8* @data, align 1 - store i8 %4, i8* %3, align 1 + store volatile i8 %4, i8* %3, align 1 + call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %3) ret i8* %3 } -; Function Attrs: nounwind -define i8* @test_frame4294967296(i8* %0) { -; CHECK-LABEL: test_frame4294967296: +; Function Attrs: nofree nounwind +define noalias nonnull i8* @test_align32_var(i32 signext %0, i8* nocapture readonly %1) { +; CHECK-LABEL: test_align32_var: ; CHECK: # %bb.0: ; CHECK-NEXT: st %s9, (, %s11) ; CHECK-NEXT: st %s10, 8(, %s11) ; CHECK-NEXT: st %s15, 24(, %s11) ; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: st %s17, 40(, %s11) ; CHECK-NEXT: or %s9, 0, %s11 -; CHECK-NEXT: lea %s13, -176 +; CHECK-NEXT: lea %s13, -288 ; CHECK-NEXT: and %s13, %s13, (32)0 -; CHECK-NEXT: lea.sl %s11, -2(%s13, %s11) +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: and %s11, %s11, (59)1 +; CHECK-NEXT: or %s17, 0, %s11 ; CHECK-NEXT: brge.l.t %s11, %s8, .LBB5_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: ld %s61, 24(, %s14) @@ -378,29 +374,41 @@ ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB5_2: -; CHECK-NEXT: lea %s0, data@lo -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, data@hi(, %s0) -; CHECK-NEXT: ld1b.zx %s1, (, %s0) -; CHECK-NEXT: lea %s0, 176(, %s11) -; CHECK-NEXT: st1b %s1, 176(, %s11) +; CHECK-NEXT: or %s2, 0, %s1 +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) +; CHECK-NEXT: or %s1, -32, (0)1 +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: lea %s0, 240(, %s11) +; CHECK-NEXT: ld1b.zx %s1, (, %s2) +; CHECK-NEXT: lea %s0, 31(, %s0) +; CHECK-NEXT: and %s2, -32, %s0 +; CHECK-NEXT: lea %s0, 256(, %s17) +; CHECK-NEXT: st1b %s1, (, %s2) ; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s17, 40(, %s11) ; CHECK-NEXT: ld %s16, 32(, %s11) ; CHECK-NEXT: ld %s15, 24(, %s11) ; CHECK-NEXT: ld %s10, 8(, %s11) ; CHECK-NEXT: ld %s9, (, %s11) ; CHECK-NEXT: b.l.t (, %s10) ; -; PIC-LABEL: test_frame4294967296: +; PIC-LABEL: test_align32_var: ; PIC: # %bb.0: ; PIC-NEXT: st %s9, (, %s11) ; PIC-NEXT: st %s10, 8(, %s11) ; PIC-NEXT: st %s15, 24(, %s11) ; PIC-NEXT: st %s16, 32(, %s11) +; PIC-NEXT: st %s17, 40(, %s11) ; PIC-NEXT: or %s9, 0, %s11 -; PIC-NEXT: lea %s13, -176 +; PIC-NEXT: lea %s13, -288 ; PIC-NEXT: and %s13, %s13, (32)0 -; PIC-NEXT: lea.sl %s11, -2(%s13, %s11) +; PIC-NEXT: lea.sl %s11, -1(%s13, %s11) +; PIC-NEXT: and %s11, %s11, (59)1 +; PIC-NEXT: or %s17, 0, %s11 ; PIC-NEXT: brge.l.t %s11, %s8, .LBB5_2 ; PIC-NEXT: # %bb.1: ; PIC-NEXT: ld %s61, 24(, %s14) @@ -412,26 +420,39 @@ ; PIC-NEXT: monc ; PIC-NEXT: or %s0, 0, %s62 ; PIC-NEXT: .LBB5_2: +; PIC-NEXT: or %s2, 0, %s1 ; PIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_@pc_lo(-24) ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) -; PIC-NEXT: lea %s0, data@got_lo -; PIC-NEXT: and %s0, %s0, (32)0 -; PIC-NEXT: lea.sl %s0, data@got_hi(, %s0) -; PIC-NEXT: ld %s0, (%s0, %s15) -; PIC-NEXT: ld1b.zx %s1, (, %s0) -; PIC-NEXT: lea %s0, 176(, %s11) -; PIC-NEXT: st1b %s1, 176(, %s11) +; PIC-NEXT: lea %s0, 15(, %s0) +; PIC-NEXT: and %s0, -16, %s0 +; PIC-NEXT: lea %s12, __ve_grow_stack_align@plt_lo(-24) +; PIC-NEXT: and %s12, %s12, (32)0 +; PIC-NEXT: sic %s16 +; PIC-NEXT: lea.sl %s12, __ve_grow_stack_align@plt_hi(%s16, %s12) +; PIC-NEXT: or %s1, -32, (0)1 +; PIC-NEXT: bsic %s10, (, %s12) +; PIC-NEXT: lea %s0, 240(, %s11) +; PIC-NEXT: ld1b.zx %s1, (, %s2) +; PIC-NEXT: lea %s0, 31(, %s0) +; PIC-NEXT: and %s2, -32, %s0 +; PIC-NEXT: lea %s0, 256(, %s17) +; PIC-NEXT: st1b %s1, (, %s2) ; PIC-NEXT: or %s11, 0, %s9 +; PIC-NEXT: ld %s17, 40(, %s11) ; PIC-NEXT: ld %s16, 32(, %s11) ; PIC-NEXT: ld %s15, 24(, %s11) ; PIC-NEXT: ld %s10, 8(, %s11) ; PIC-NEXT: ld %s9, (, %s11) ; PIC-NEXT: b.l.t (, %s10) - %2 = alloca [4294967296 x i8], align 1 - %3 = getelementptr inbounds [4294967296 x i8], [4294967296 x i8]* %2, i64 0, i64 0 - %4 = load i8, i8* @data, align 1 - store i8 %4, i8* %3, align 1 - ret i8* %3 + %3 = alloca [32 x i8], align 32 + %4 = getelementptr inbounds [32 x i8], [32 x i8]* %3, i64 0, i64 0 + call void @llvm.lifetime.start.p0i8(i64 32, i8* nonnull %4) + %5 = sext i32 %0 to i64 + %6 = alloca i8, i64 %5, align 32 + %7 = load i8, i8* %1, align 1 + store volatile i8 %7, i8* %6, align 32 + call void @llvm.lifetime.end.p0i8(i64 32, i8* nonnull %4) + ret i8* %4 } diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_size.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_size.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Scalar/stackframe_size.ll @@ -0,0 +1,289 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=ve | FileCheck %s + +;;; Check stack frame allocation especially about the new SP calculation +;;; using multiple sizes as a test of emitSPAdjustment(). + +; Function Attrs: norecurse nounwind readnone +define signext i32 @test_frame0(i32 signext %0) { +; CHECK-LABEL: test_frame0: +; CHECK: # %bb.0: +; CHECK-NEXT: adds.w.sx %s0, 3, %s0 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: b.l.t (, %s10) + %2 = add nsw i32 %0, 3 + ret i32 %2 +} + +; Function Attrs: nounwind +define i8* @test_frame8(i8* %0) { +; CHECK-LABEL: test_frame8: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -192 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: ld1b.zx %s1, (, %s0) +; CHECK-NEXT: lea %s0, 184(, %s11) +; CHECK-NEXT: st1b %s1, 184(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca [8 x i8], align 1 + %3 = getelementptr inbounds [8 x i8], [8 x i8]* %2, i64 0, i64 0 + %4 = load i8, i8* %0, align 1 + store i8 %4, i8* %3, align 1 + ret i8* %3 +} + +; Function Attrs: nounwind +define i8* @test_frame16(i8* %0) { +; CHECK-LABEL: test_frame16: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -192 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: ld1b.zx %s1, (, %s0) +; CHECK-NEXT: lea %s0, 176(, %s11) +; CHECK-NEXT: st1b %s1, 176(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca [16 x i8], align 1 + %3 = getelementptr inbounds [16 x i8], [16 x i8]* %2, i64 0, i64 0 + %4 = load i8, i8* %0, align 1 + store i8 %4, i8* %3, align 1 + ret i8* %3 +} + +; Function Attrs: nounwind +define i8* @test_frame32(i8* %0) { +; CHECK-LABEL: test_frame32: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -208 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB3_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: ld1b.zx %s1, (, %s0) +; CHECK-NEXT: lea %s0, 176(, %s11) +; CHECK-NEXT: st1b %s1, 176(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca [32 x i8], align 1 + %3 = getelementptr inbounds [32 x i8], [32 x i8]* %2, i64 0, i64 0 + %4 = load i8, i8* %0, align 1 + store i8 %4, i8* %3, align 1 + ret i8* %3 +} + +; Function Attrs: nounwind +define i8* @test_frame64(i8* %0) { +; CHECK-LABEL: test_frame64: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -240 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: ld1b.zx %s1, (, %s0) +; CHECK-NEXT: lea %s0, 176(, %s11) +; CHECK-NEXT: st1b %s1, 176(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca [64 x i8], align 1 + %3 = getelementptr inbounds [64 x i8], [64 x i8]* %2, i64 0, i64 0 + %4 = load i8, i8* %0, align 1 + store i8 %4, i8* %3, align 1 + ret i8* %3 +} + +; Function Attrs: nounwind +define i8* @test_frame128(i8* %0) { +; CHECK-LABEL: test_frame128: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -304 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: ld1b.zx %s1, (, %s0) +; CHECK-NEXT: lea %s0, 176(, %s11) +; CHECK-NEXT: st1b %s1, 176(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca [128 x i8], align 1 + %3 = getelementptr inbounds [128 x i8], [128 x i8]* %2, i64 0, i64 0 + %4 = load i8, i8* %0, align 1 + store i8 %4, i8* %3, align 1 + ret i8* %3 +} + +; Function Attrs: nounwind +define i8* @test_frame65536(i8* %0) { +; CHECK-LABEL: test_frame65536: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -65712 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB6_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: ld1b.zx %s1, (, %s0) +; CHECK-NEXT: lea %s0, 176(, %s11) +; CHECK-NEXT: st1b %s1, 176(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca [65536 x i8], align 1 + %3 = getelementptr inbounds [65536 x i8], [65536 x i8]* %2, i64 0, i64 0 + %4 = load i8, i8* %0, align 1 + store i8 %4, i8* %3, align 1 + ret i8* %3 +} + +; Function Attrs: nounwind +define i8* @test_frame4294967296(i8* %0) { +; CHECK-LABEL: test_frame4294967296: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (, %s11) +; CHECK-NEXT: st %s10, 8(, %s11) +; CHECK-NEXT: st %s15, 24(, %s11) +; CHECK-NEXT: st %s16, 32(, %s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -176 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -2(%s13, %s11) +; CHECK-NEXT: brge.l.t %s11, %s8, .LBB7_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(, %s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: ld1b.zx %s1, (, %s0) +; CHECK-NEXT: lea %s0, 176(, %s11) +; CHECK-NEXT: st1b %s1, 176(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(, %s11) +; CHECK-NEXT: ld %s15, 24(, %s11) +; CHECK-NEXT: ld %s10, 8(, %s11) +; CHECK-NEXT: ld %s9, (, %s11) +; CHECK-NEXT: b.l.t (, %s10) + %2 = alloca [4294967296 x i8], align 1 + %3 = getelementptr inbounds [4294967296 x i8], [4294967296 x i8]* %2, i64 0, i64 0 + %4 = load i8, i8* %0, align 1 + store i8 %4, i8* %3, align 1 + ret i8* %3 +}