|
| 1 | +; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s |
| 2 | + |
| 3 | +%struct = type { i8, i64, i8, double, i8, <2 x float>, i8, <4 x float> } |
| 4 | + |
| 5 | +define i32 @test_i64_align() { |
| 6 | +; CHECK-LABEL: test_i64_align: |
| 7 | +; CHECL: movs r0, #8 |
| 8 | + ret i32 ptrtoint(i64* getelementptr(%struct, %struct* null, i32 0, i32 1) to i32) |
| 9 | +} |
| 10 | + |
| 11 | +define i32 @test_f64_align() { |
| 12 | +; CHECK-LABEL: test_f64_align: |
| 13 | +; CHECL: movs r0, #24 |
| 14 | + ret i32 ptrtoint(double* getelementptr(%struct, %struct* null, i32 0, i32 3) to i32) |
| 15 | +} |
| 16 | + |
| 17 | +define i32 @test_v2f32_align() { |
| 18 | +; CHECK-LABEL: test_v2f32_align: |
| 19 | +; CHECL: movs r0, #40 |
| 20 | + ret i32 ptrtoint(<2 x float>* getelementptr(%struct, %struct* null, i32 0, i32 5) to i32) |
| 21 | +} |
| 22 | + |
| 23 | +define i32 @test_v4f32_align() { |
| 24 | +; CHECK-LABEL: test_v4f32_align: |
| 25 | +; CHECL: movs r0, #64 |
| 26 | + ret i32 ptrtoint(<4 x float>* getelementptr(%struct, %struct* null, i32 0, i32 7) to i32) |
| 27 | +} |
| 28 | + |
| 29 | +; Key point here is than an extra register has to be saved so that the DPRs end |
| 30 | +; up in an aligned location (as prologue/epilogue inserter had calculated). |
| 31 | +define void @test_dpr_unwind_align() { |
| 32 | +; CHECK-LABEL: test_dpr_unwind_align: |
| 33 | +; CHECK: push {r5, r6, r7, lr} |
| 34 | +; CHECK-NOT: sub sp |
| 35 | +; CHECK: vpush {d8, d9} |
| 36 | +; [...] |
| 37 | +; CHECK: bl _test_i64_align |
| 38 | +; CHECK-NOT: add sp, |
| 39 | +; CHECK: vpop {d8, d9} |
| 40 | +; CHECK-NOT: add sp, |
| 41 | +; CHECK: pop {r5, r6, r7, pc} |
| 42 | + |
| 43 | + call void asm sideeffect "", "~{r6},~{d8},~{d9}"() |
| 44 | + |
| 45 | + ; Whatever |
| 46 | + call i32 @test_i64_align() |
| 47 | + ret void |
| 48 | +} |
| 49 | + |
| 50 | +; This time, there's no viable way to tack CS-registers onto the list: a real SP |
| 51 | +; adjustment needs to be performed to put d8 and d9 where they should be. |
| 52 | +define void @test_dpr_unwind_align_manually() { |
| 53 | +; CHECK-LABEL: test_dpr_unwind_align_manually: |
| 54 | +; CHECK: push {r4, r5, r6, r7, lr} |
| 55 | +; CHECK-NOT: sub sp |
| 56 | +; CHECK: push.w {r8, r11} |
| 57 | +; CHECK: sub sp, #4 |
| 58 | +; CHECK: vpush {d8, d9} |
| 59 | +; [...] |
| 60 | +; CHECK: bl _test_i64_align |
| 61 | +; CHECK-NOT: add sp, |
| 62 | +; CHECK: vpop {d8, d9} |
| 63 | +; CHECK: add sp, #4 |
| 64 | +; CHECK: pop.w {r8, r11} |
| 65 | +; CHECK: pop {r4, r5, r6, r7, pc} |
| 66 | + |
| 67 | + call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{d8},~{d9}"() |
| 68 | + |
| 69 | + ; Whatever |
| 70 | + call i32 @test_i64_align() |
| 71 | + ret void |
| 72 | +} |
| 73 | + |
| 74 | +; If there's only a CS1 area, the sub should be in the right place: |
| 75 | +define void @test_dpr_unwind_align_just_cs1() { |
| 76 | +; CHECK-LABEL: test_dpr_unwind_align_just_cs1: |
| 77 | +; CHECK: push {r4, r5, r6, r7, lr} |
| 78 | +; CHECK: sub sp, #4 |
| 79 | +; CHECK: vpush {d8, d9} |
| 80 | +; CHECK: sub sp, #8 |
| 81 | +; [...] |
| 82 | +; CHECK: bl _test_i64_align |
| 83 | +; CHECK: add sp, #8 |
| 84 | +; CHECK: vpop {d8, d9} |
| 85 | +; CHECK: add sp, #4 |
| 86 | +; CHECK: pop {r4, r5, r6, r7, pc} |
| 87 | + |
| 88 | + call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{d8},~{d9}"() |
| 89 | + |
| 90 | + ; Whatever |
| 91 | + call i32 @test_i64_align() |
| 92 | + ret void |
| 93 | +} |
| 94 | + |
| 95 | +; If there are no DPRs, we shouldn't try to align the stack in stages anyway |
| 96 | +define void @test_dpr_unwind_align_no_dprs() { |
| 97 | +; CHECK-LABEL: test_dpr_unwind_align_no_dprs: |
| 98 | +; CHECK: push {r4, r5, r6, r7, lr} |
| 99 | +; CHECK: sub sp, #12 |
| 100 | +; [...] |
| 101 | +; CHECK: bl _test_i64_align |
| 102 | +; CHECK: add sp, #12 |
| 103 | +; CHECK: pop {r4, r5, r6, r7, pc} |
| 104 | + |
| 105 | + call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7}"() |
| 106 | + |
| 107 | + ; Whatever |
| 108 | + call i32 @test_i64_align() |
| 109 | + ret void |
| 110 | +} |
| 111 | + |
| 112 | +; 128-bit vectors should use 128-bit (i.e. correctly aligned) slots on |
| 113 | +; the stack. |
| 114 | +define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) { |
| 115 | +; CHECK-LABEL: test_v128_stack_pass: |
| 116 | +; CHECK: add r[[ADDR:[0-9]+]], sp, #16 |
| 117 | +; CHECK: vld1.64 {d0, d1}, [r[[ADDR]]:128] |
| 118 | + |
| 119 | + ret <4 x float> %in |
| 120 | +} |
| 121 | + |
| 122 | +declare void @varargs(i32, ...) |
| 123 | + |
| 124 | +; When varargs are enabled, we go down a different route. Still want 128-bit |
| 125 | +; alignment though. |
| 126 | +define void @test_v128_stack_pass_varargs(<4 x float> %in) { |
| 127 | +; CHECK-LABEL: test_v128_stack_pass_varargs: |
| 128 | +; CHECK: add r[[ADDR:[0-9]+]], sp, #16 |
| 129 | +; CHECK: vst1.64 {d0, d1}, [r[[ADDR]]:128] |
| 130 | + |
| 131 | + call void(i32, ...) @varargs(i32 undef, [3 x i32] undef, float undef, <4 x float> %in) |
| 132 | + ret void |
| 133 | +} |
| 134 | + |
| 135 | +; To be compatible with AAPCS's va_start model (store r0-r3 at incoming SP, give |
| 136 | +; a single pointer), 64-bit quantities must be pass |
| 137 | +define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) { |
| 138 | +; CHECK-LABEL: test_64bit_gpr_align: |
| 139 | +; CHECK: ldr [[RHS:r[0-9]+]], [sp] |
| 140 | +; CHECK: adds r0, [[RHS]], r2 |
| 141 | +; CHECK: adc r1, r3, #0 |
| 142 | + |
| 143 | + %ext = zext i32 %sp to i64 |
| 144 | + %sum = add i64 %ext, %r2_r3 |
| 145 | + ret i64 %sum |
| 146 | +} |
0 commit comments