diff --git a/llvm/lib/Target/AVR/AVRCallingConv.td b/llvm/lib/Target/AVR/AVRCallingConv.td --- a/llvm/lib/Target/AVR/AVRCallingConv.td +++ b/llvm/lib/Target/AVR/AVRCallingConv.td @@ -27,6 +27,8 @@ // Calling convention for variadic functions. def ArgCC_AVR_Vararg : CallingConv<[ + // i8 are always passed through the stack with a byte slot and byte alignment. + CCIfType<[i8], CCAssignToStack<1, 1>>, // i16 are always passed through the stack with an alignment of 1. CCAssignToStack<2, 1> ]>; diff --git a/llvm/test/CodeGen/AVR/calling-conv/c/basic.ll b/llvm/test/CodeGen/AVR/calling-conv/c/basic.ll --- a/llvm/test/CodeGen/AVR/calling-conv/c/basic.ll +++ b/llvm/test/CodeGen/AVR/calling-conv/c/basic.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=avr | FileCheck %s +; RUN: llc -mtriple=avr < %s | FileCheck %s ; CHECK-LABEL: ret_void_args_i8 define void @ret_void_args_i8(i8 %a) { @@ -97,3 +97,75 @@ store volatile i16 %c, i16* inttoptr (i64 4 to i16*) ret void } + +; NOTE: Both %a (i8) and %b (i8) cost two registers. +define i8 @foo0(i8 %a, i8 %b) { +; CHECK-LABEL: foo0: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub r24, r22 +; CHECK-NEXT: ret + %c = sub i8 %a, %b + ret i8 %c +} + +; NOTE: Both %a (i16) and %b (i16) cost two registers. +define i16 @foo1(i16 %a, i16 %b) { +; CHECK-LABEL: foo1: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub r24, r22 +; CHECK-NEXT: sbc r25, r23 +; CHECK-NEXT: ret + %c = sub i16 %a, %b + ret i16 %c +} + +; NOTE: Both %a (i32) and %b (i32) cost four registers. +define i32 @foo2(i32 %a, i32 %b) { +; CHECK-LABEL: foo2: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub r22, r18 +; CHECK-NEXT: sbc r23, r19 +; CHECK-NEXT: sbc r24, r20 +; CHECK-NEXT: sbc r25, r21 +; CHECK-NEXT: ret + %c = sub i32 %a, %b + ret i32 %c +} + +; NOTE: Each argument costs four registers, and total 16 registers are used. +define i32 @foo3(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK-LABEL: foo3: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub r22, r10 +; CHECK-NEXT: sbc r23, r11 +; CHECK-NEXT: sbc r24, r12 +; CHECK-NEXT: sbc r25, r13 +; CHECK-NEXT: ret + %e = sub nsw i32 %a, %d + ret i32 %e +} + +; NOTE: Each argument (except %e) cost four registers, and total 16 registers +; NOTE: are used. Though there are still 2 registers are vacant, the %e has +; NOTE: to be dropped to the stack. +define i32 @foo4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { +; CHECK-LABEL: foo4: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r28 +; CHECK-NEXT: push r29 +; CHECK-NEXT: in r28, 61 +; CHECK-NEXT: in r29, 62 +; CHECK-NEXT: ldd r18, Y+5 +; CHECK-NEXT: ldd r19, Y+6 +; CHECK-NEXT: ldd r20, Y+7 +; CHECK-NEXT: ldd r21, Y+8 +; CHECK-NEXT: sub r22, r18 +; CHECK-NEXT: sbc r23, r19 +; CHECK-NEXT: sbc r24, r20 +; CHECK-NEXT: sbc r25, r21 +; CHECK-NEXT: pop r29 +; CHECK-NEXT: pop r28 +; CHECK-NEXT: ret + %f = sub nsw i32 %a, %e + ret i32 %f +} diff --git a/llvm/test/CodeGen/AVR/calling-conv/c/basic_aggr.ll b/llvm/test/CodeGen/AVR/calling-conv/c/basic_aggr.ll --- a/llvm/test/CodeGen/AVR/calling-conv/c/basic_aggr.ll +++ b/llvm/test/CodeGen/AVR/calling-conv/c/basic_aggr.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=avr | FileCheck %s +; RUN: llc -mtriple=avr < %s | FileCheck %s ; CHECK-LABEL: ret_void_args_struct_i8_i32 define void @ret_void_args_struct_i8_i32({ i8, i32 } %a) { @@ -82,3 +82,94 @@ ret void } +; NOTE: The %0 (8-byte array) costs 8 registers and %1 (10-byte array) +; NOTE: costs 10 registers. +define i8 @foo0([8 x i8] %0, [10 x i8] %1) { +; CHECK-LABEL: foo0: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub r18, r8 +; CHECK-NEXT: mov r24, r18 +; CHECK-NEXT: ret + %3 = extractvalue [8 x i8] %0, 0 + %4 = extractvalue [10 x i8] %1, 0 + %5 = sub i8 %3, %4 + ret i8 %5 +} + +; NOTE: The %0 (7-byte array) costs 8 registers and %1 (9-byte array) +; NOTE: costs 10 registers. +define i8 @foo1([7 x i8] %0, [9 x i8] %1) { +; CHECK-LABEL: foo1: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub r18, r8 +; CHECK-NEXT: mov r24, r18 +; CHECK-NEXT: ret + %3 = extractvalue [7 x i8] %0, 0 + %4 = extractvalue [9 x i8] %1, 0 + %5 = sub i8 %3, %4 + ret i8 %5 +} + +; NOTE: Each argument (6-byte array) costs 6 registers. +define i8 @foo2([6 x i8] %0, [6 x i8] %1, [6 x i8] %2) { +; CHECK-LABEL: foo2: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub r20, r14 +; CHECK-NEXT: add r20, r8 +; CHECK-NEXT: mov r24, r20 +; CHECK-NEXT: ret + %4 = extractvalue [6 x i8] %0, 0 + %5 = extractvalue [6 x i8] %1, 0 + %6 = extractvalue [6 x i8] %2, 0 + %7 = sub i8 %4, %5 + %8 = add i8 %7, %6 + ret i8 %8 +} + +; NOTE: The %0 (9-byte array) costs 10 registers. Though there are +; NOTE: 8 registers are vacant, the %b (9-byte array) has to be dropped +; NOTE: to the stack. +define i8 @foo3([9 x i8] %0, [9 x i8] %1) { +; CHECK-LABEL: foo3: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r16 +; CHECK-NEXT: push r28 +; CHECK-NEXT: push r29 +; CHECK-NEXT: in r28, 61 +; CHECK-NEXT: in r29, 62 +; CHECK-NEXT: ldd r24, Y+6 +; CHECK-NEXT: sub r16, r24 +; CHECK-NEXT: mov r24, r16 +; CHECK-NEXT: pop r29 +; CHECK-NEXT: pop r28 +; CHECK-NEXT: pop r16 +; CHECK-NEXT: ret + %3 = extractvalue [9 x i8] %0, 0 + %4 = extractvalue [9 x i8] %1, 0 + %5 = sub i8 %3, %4 + ret i8 %5 +} + +; NOTE: Both %0 and %1 are 7-byte arrays, and cost total 16 registers. +; NOTE: Though there are 2 registers are vacant, the %2 (7-byte array) has to +; NOTE: be dropped to the stack. +define i8 @foo4([7 x i8] %0, [7 x i8] %1, [7 x i8] %2) { +; CHECK-LABEL: foo4: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r28 +; CHECK-NEXT: push r29 +; CHECK-NEXT: in r28, 61 +; CHECK-NEXT: in r29, 62 +; CHECK-NEXT: sub r18, r10 +; CHECK-NEXT: ldd r24, Y+5 +; CHECK-NEXT: add r24, r18 +; CHECK-NEXT: pop r29 +; CHECK-NEXT: pop r28 +; CHECK-NEXT: ret + %4 = extractvalue [7 x i8] %0, 0 + %5 = extractvalue [7 x i8] %1, 0 + %6 = extractvalue [7 x i8] %2, 0 + %7 = sub i8 %4, %5 + %8 = add i8 %7, %6 + ret i8 %8 +} diff --git a/llvm/test/CodeGen/AVR/calling-conv/c/stack.ll b/llvm/test/CodeGen/AVR/calling-conv/c/stack.ll --- a/llvm/test/CodeGen/AVR/calling-conv/c/stack.ll +++ b/llvm/test/CodeGen/AVR/calling-conv/c/stack.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=avr | FileCheck %s +; RUN: llc -mtriple=avr < %s | FileCheck %s ; CHECK-LABEL: ret_void_args_i64_i64_i32 define void @ret_void_args_i64_i64_i32(i64 %a, i64 %b, i32 %c) { @@ -30,3 +30,112 @@ store volatile i32 %c, i32* inttoptr (i64 4 to i32*) ret void } + +; NOTE: All arguments are passed via the stack for varargs functions. +; NOTE: Both %a & %b occupy a 1-byte stack slot. +define i8 @foo0(i8 %a, i8 %b, ...) { +; CHECK-LABEL: foo0: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r28 +; CHECK-NEXT: push r29 +; CHECK-NEXT: in r28, 61 +; CHECK-NEXT: in r29, 62 +; CHECK-NEXT: ldd r25, Y+6 +; CHECK-NEXT: ldd r24, Y+5 +; CHECK-NEXT: sub r24, r25 +; CHECK-NEXT: pop r29 +; CHECK-NEXT: pop r28 +; CHECK-NEXT: ret + %c = sub i8 %a, %b + ret i8 %c +} + +; NOTE: All arguments are passed via the stack since the argument %a is too large. +define i8 @foo1([19 x i8] %a, i8 %b) { +; CHECK-LABEL: foo1: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r28 +; CHECK-NEXT: push r29 +; CHECK-NEXT: in r28, 61 +; CHECK-NEXT: in r29, 62 +; CHECK-NEXT: ldd r25, Y+24 +; CHECK-NEXT: ldd r24, Y+5 +; CHECK-NEXT: sub r24, r25 +; CHECK-NEXT: pop r29 +; CHECK-NEXT: pop r28 +; CHECK-NEXT: ret + %c = extractvalue [19 x i8] %a, 0 + %d = sub i8 %c, %b + ret i8 %d +} + +; NOTE: The argument %b is passed via the stack, since the argument %a costs +; NOTE: total 18 registers though it is a 17-byte array. +define i8 @foo2([17 x i8] %a, i8 %b) { +; CHECK-LABEL: foo2: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r8 +; CHECK-NEXT: push r28 +; CHECK-NEXT: push r29 +; CHECK-NEXT: in r28, 61 +; CHECK-NEXT: in r29, 62 +; CHECK-NEXT: ldd r24, Y+6 +; CHECK-NEXT: sub r8, r24 +; CHECK-NEXT: mov r24, r8 +; CHECK-NEXT: pop r29 +; CHECK-NEXT: pop r28 +; CHECK-NEXT: pop r8 +; CHECK-NEXT: ret + %c = extractvalue [17 x i8] %a, 0 + %d = sub i8 %c, %b + ret i8 %d +} + +; NOTE: Though %a costs 16 registers and 2 registers are vacant, the 4-byte +; NOTE: %b has to be dropped to the stack. +; NOTE: total 18 registers. +define i32 @foo3([4 x i32] %a, i32 %b) { +; CHECK-LABEL: foo3: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r28 +; CHECK-NEXT: push r29 +; CHECK-NEXT: in r28, 61 +; CHECK-NEXT: in r29, 62 +; CHECK-NEXT: ldd r22, Y+5 +; CHECK-NEXT: ldd r23, Y+6 +; CHECK-NEXT: ldd r24, Y+7 +; CHECK-NEXT: ldd r25, Y+8 +; CHECK-NEXT: sub r22, r10 +; CHECK-NEXT: sbc r23, r11 +; CHECK-NEXT: sbc r24, r12 +; CHECK-NEXT: sbc r25, r13 +; CHECK-NEXT: pop r29 +; CHECK-NEXT: pop r28 +; CHECK-NEXT: ret + %c = extractvalue [4 x i32] %a, 0 + %d = sub nsw i32 %b, %c + ret i32 %d +} + +; NOTE: Both %1 and %2 are passed via stack, and each has a 1-byte slot. +define i8 @foo4([17 x i8] %0, i8 %1, i8 %2) { +; CHECK-LABEL: foo4: +; CHECK: ; %bb.0: +; CHECK-NEXT: push r8 +; CHECK-NEXT: push r28 +; CHECK-NEXT: push r29 +; CHECK-NEXT: in r28, 61 +; CHECK-NEXT: in r29, 62 +; CHECK-NEXT: ldd r24, Y+6 +; CHECK-NEXT: sub r8, r24 +; CHECK-NEXT: ldd r24, Y+7 +; CHECK-NEXT: add r24, r8 +; CHECK-NEXT: pop r29 +; CHECK-NEXT: pop r28 +; CHECK-NEXT: pop r8 +; CHECK-NEXT: ret + %4 = extractvalue [17 x i8] %0, 0 + %5 = sub i8 %4, %1 + %6 = add i8 %5, %2 + ret i8 %6 +}