diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6396,12 +6396,14 @@ report_fatal_error( "variadic arguments for vector types are unimplemented for AIX"); - if (unsigned VReg = State.AllocateReg(VR)) + if (unsigned VReg = State.AllocateReg(VR)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo)); - else { - report_fatal_error( - "passing vector parameters to the stack is unimplemented for AIX"); + return false; } + + const unsigned VecSize = 16; + const unsigned Offset = State.AllocateStack(VecSize, Align(VecSize)); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } } @@ -6549,10 +6551,6 @@ CCValAssign &VA = ArgLocs[I++]; MVT LocVT = VA.getLocVT(); ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags; - if (VA.isMemLoc() && VA.getValVT().isVector()) - report_fatal_error( - "passing vector parameters to the stack is unimplemented for AIX"); - // For compatibility with the AIX XL compiler, the float args in the // parameter save area are initialized even if the argument is available // in register. The caller is required to initialize both the register @@ -6903,10 +6901,6 @@ const MVT LocVT = VA.getLocVT(); const MVT ValVT = VA.getValVT(); - if (VA.isMemLoc() && VA.getValVT().isVector()) - report_fatal_error( - "passing vector parameters to the stack is unimplemented for AIX"); - switch (VA.getLocInfo()) { default: report_fatal_error("Unexpected argument extension type."); diff --git a/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-callee.ll b/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-callee.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-callee.ll @@ -0,0 +1,68 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ +; RUN: -vec-extabi -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=32BIT + +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec -vec-extabi \ +; RUN: -stop-after=machine-cp -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=MIR32 + +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ +; RUN: -vec-extabi -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=64BIT + +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec -vec-extabi \ +; RUN: -stop-after=machine-cp -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=MIR64 + +%struct.Test = type { double, double, double, double } + +define double @test(i32 signext %r3, i32 signext %r4, double %fpr1, double %fpr2, <2 x double> %v2, <2 x double> %v3, <2 x double> %v4, <2 x double> %v5, <2 x double> %v6, <2 x double> %v7, <2 x double> %v8, <2 x double> %v9, <2 x double> %v10, <2 x double> %v11, <2 x double> %v12, <2 x double> %v13, <2 x double> %vSpill, double %fpr3, double %fpr4, double %fpr5, double %fpr6, double %fpr7, double %fpr8, double %fpr9, double %fpr10, double %fpr11, double %fpr12, double %fpr13, i32 signext %gprSpill, %struct.Test* nocapture readonly byval(%struct.Test) align 4 %t) { +entry: + %vecext = extractelement <2 x double> %vSpill, i32 0 + %x = getelementptr inbounds %struct.Test, %struct.Test* %t, i32 0, i32 0 + %0 = load double, double* %x, align 4 + %add = fadd double %vecext, %0 + ret double %add +} + +; 32BIT-LABEL: .test: +; 32BIT-DAG: lfd {{[0-9]+}}, 48(1) +; 32BIT-DAG: lfd {{[0-9]+}}, 156(1) + +; MIR32: name: test +; MIR32: fixedStack: +; MIR32: - { id: 0, type: default, offset: 156, size: 32, alignment: 4, stack-id: default, +; MIR32: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true, +; MIR32: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +; MIR32: - { id: 1, type: default, offset: 152, size: 4, alignment: 8, stack-id: default, +; MIR32: isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, +; MIR32: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +; MIR32: - { id: 2, type: default, offset: 48, size: 16, alignment: 16, stack-id: default, +; MIR32: isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, +; MIR32: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + +; MIR32: renamable $[[GPR1:r[0-9]+]] = ADDI %fixed-stack.2, 0 +; MIR32: renamable $[[GPR2:r[0-9]+]] = ADDI %fixed-stack.0, 0 +; MIR32: renamable $f{{[0-9]+}} = XFLOADf64 $zero, killed renamable $[[GPR1]] +; MIR32: renamable $f{{[0-9]+}} = XFLOADf64 $zero, killed renamable $[[GPR2]] + +; 64BIT-LABEL: .test: +; 64BIT-DAG: lfd {{[0-9]+}}, 80(1) +; 64BIT-DAG: lfd {{[0-9]+}}, 192(1) + +; MIR64: name: test +; MIR64: fixedStack: +; MIR64: - { id: 0, type: default, offset: 192, size: 32, alignment: 16, stack-id: default, +; MIR64: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true, +; MIR64: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +; MIR64: - { id: 1, type: default, offset: 188, size: 4, alignment: 4, stack-id: default, +; MIR64: isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, +; MIR64: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +; MIR64: - { id: 2, type: default, offset: 80, size: 16, alignment: 16, stack-id: default, +; MIR64: isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, +; MIR64: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + +; MIR64: renamable $[[GPR1:x[0-9]+]] = ADDI8 %fixed-stack.2, 0 +; MIR64: renamable $[[GPR2:x[0-9]+]] = ADDI8 %fixed-stack.0, 0 +; MIR64: renamable $f{{[0-9]+}} = XFLOADf64 $zero8, killed renamable $[[GPR1]] +; MIR64: renamable $f{{[0-9]+}} = XFLOADf64 $zero8, killed renamable $[[GPR2]] diff --git a/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll b/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills-mir.ll @@ -0,0 +1,141 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec -vec-extabi \ +; RUN: -stop-after=machine-cp -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=MIR32 + +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec -vec-extabi \ +; RUN: -stop-after=machine-cp -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=MIR64 + +%struct.Test = type { double, double, double, double } + +@__const.caller.t = private unnamed_addr constant %struct.Test { double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00 }, align 8 + +define double @caller() { +; MIR32-LABEL: name: caller +; MIR32: bb.0.entry: +; MIR32: renamable $r3 = LWZtoc @__const.caller.t, $r2 :: (load 4 from got) +; MIR32: renamable $r4 = LI 31 +; MIR32: renamable $v2 = LVX renamable $r3, killed renamable $r4 +; MIR32: renamable $r4 = LI 16 +; MIR32: renamable $v3 = LVX renamable $r3, killed renamable $r4 +; MIR32: renamable $v4 = LVSL $zero, renamable $r3 +; MIR32: renamable $v2 = VPERM renamable $v3, killed renamable $v2, renamable $v4 +; MIR32: renamable $r4 = LI 172 +; MIR32: STXVW4X killed renamable $v2, $r1, killed renamable $r4 :: (store 16 + 16, align 4) +; MIR32: renamable $v2 = LVX $zero, killed renamable $r3 +; MIR32: renamable $v2 = VPERM killed renamable $v2, killed renamable $v3, killed renamable $v4 +; MIR32: renamable $r3 = LI 156 +; MIR32: STXVW4X killed renamable $v2, $r1, killed renamable $r3 :: (store 16, align 4) +; MIR32: ADJCALLSTACKDOWN 188, 0, implicit-def dead $r1, implicit $r1 +; MIR32: renamable $vsl0 = XXLXORz +; MIR32: $f1 = XXLXORdpz +; MIR32: $f2 = XXLXORdpz +; MIR32: $v2 = XXLXORz +; MIR32: $v3 = XXLXORz +; MIR32: $v4 = XXLXORz +; MIR32: $v5 = XXLXORz +; MIR32: $v6 = XXLXORz +; MIR32: $v7 = XXLXORz +; MIR32: $v8 = XXLXORz +; MIR32: $v9 = XXLXORz +; MIR32: $v10 = XXLXORz +; MIR32: $v11 = XXLXORz +; MIR32: $v12 = XXLXORz +; MIR32: $v13 = XXLXORz +; MIR32: $f3 = XXLXORdpz +; MIR32: $f4 = XXLXORdpz +; MIR32: $f5 = XXLXORdpz +; MIR32: $f6 = XXLXORdpz +; MIR32: $f7 = XXLXORdpz +; MIR32: renamable $r3 = LI 136 +; MIR32: $f8 = XXLXORdpz +; MIR32: renamable $r4 = LI 120 +; MIR32: renamable $r5 = LWZtoc %const.0, $r2 :: (load 4 from got) +; MIR32: STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store 16, align 8) +; MIR32: $f9 = XXLXORdpz +; MIR32: renamable $r3 = LI 104 +; MIR32: STXVW4X renamable $vsl0, $r1, killed renamable $r4 :: (store 16, align 8) +; MIR32: $f10 = XXLXORdpz +; MIR32: STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store 16, align 8) +; MIR32: renamable $r3 = LI 88 +; MIR32: $f11 = XXLXORdpz +; MIR32: STXVW4X renamable $vsl0, $r1, killed renamable $r3 :: (store 16, align 8) +; MIR32: renamable $r3 = LI 72 +; MIR32: renamable $v0 = LXVD2X $zero, killed renamable $r5 :: (load 16 from constant-pool) +; MIR32: $f12 = XXLXORdpz +; MIR32: STXVW4X killed renamable $vsl0, $r1, killed renamable $r3 :: (store 16, align 8) +; MIR32: $f13 = XXLXORdpz +; MIR32: renamable $r5 = LI 48 +; MIR32: renamable $r6 = LI 512 +; MIR32: $r3 = LI 128 +; MIR32: $r4 = LI 256 +; MIR32: STXVD2X killed renamable $v0, $r1, killed renamable $r5 :: (store 16) +; MIR32: STW killed renamable $r6, 152, $r1 :: (store 4) +; MIR32: BL_NOP , csr_aix32_altivec, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r4, implicit $f1, implicit $f2, implicit $v2, implicit $v3, implicit $v4, implicit $v5, implicit killed $v6, implicit killed $v7, implicit killed $v8, implicit killed $v9, implicit killed $v10, implicit killed $v11, implicit killed $v12, implicit killed $v13, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $r2, implicit-def $r1, implicit-def $f1 +; MIR32: ADJCALLSTACKUP 188, 0, implicit-def dead $r1, implicit $r1 +; MIR32: BLR implicit $lr, implicit $rm, implicit $f1 + +; MIR64-LABEL: name: caller +; MIR64: bb.0.entry: +; MIR64: renamable $x3 = LDtoc @__const.caller.t, $x2 :: (load 8 from got) +; MIR64: renamable $x4 = LI8 16 +; MIR64: renamable $vsl0 = LXVD2X renamable $x3, killed renamable $x4 :: (load 16 + 16, align 8) +; MIR64: renamable $x4 = LI8 208 +; MIR64: STXVD2X killed renamable $vsl0, $x1, killed renamable $x4 :: (store 16 + 16, align 4) +; MIR64: renamable $vsl0 = LXVD2X $zero8, killed renamable $x3 :: (load 16, align 8) +; MIR64: renamable $x3 = LI8 192 +; MIR64: STXVD2X killed renamable $vsl0, $x1, killed renamable $x3 :: (store 16, align 4) +; MIR64: ADJCALLSTACKDOWN 224, 0, implicit-def dead $r1, implicit $r1 +; MIR64: $f1 = XXLXORdpz +; MIR64: $f2 = XXLXORdpz +; MIR64: $v2 = XXLXORz +; MIR64: $v3 = XXLXORz +; MIR64: $v4 = XXLXORz +; MIR64: $v5 = XXLXORz +; MIR64: $v6 = XXLXORz +; MIR64: $v7 = XXLXORz +; MIR64: $v8 = XXLXORz +; MIR64: $v9 = XXLXORz +; MIR64: $v10 = XXLXORz +; MIR64: $v11 = XXLXORz +; MIR64: $v12 = XXLXORz +; MIR64: $v13 = XXLXORz +; MIR64: $f3 = XXLXORdpz +; MIR64: renamable $x3 = LDtocCPT %const.0, $x2 :: (load 8 from got) +; MIR64: $f4 = XXLXORdpz +; MIR64: $f5 = XXLXORdpz +; MIR64: $f6 = XXLXORdpz +; MIR64: renamable $x4 = LDtocCPT %const.1, $x2 :: (load 8 from got) +; MIR64: renamable $vsl0 = LXVD2X $zero8, killed renamable $x3 :: (load 16 from constant-pool) +; MIR64: $f7 = XXLXORdpz +; MIR64: $f8 = XXLXORdpz +; MIR64: renamable $x3 = LI8 160 +; MIR64: $f9 = XXLXORdpz +; MIR64: renamable $x5 = LI8 144 +; MIR64: renamable $vsl13 = LXVD2X $zero8, killed renamable $x4 :: (load 16 from constant-pool) +; MIR64: STXVD2X renamable $vsl0, $x1, killed renamable $x3 :: (store 16, align 8) +; MIR64: $f10 = XXLXORdpz +; MIR64: renamable $x3 = LI8 128 +; MIR64: STXVD2X renamable $vsl0, $x1, killed renamable $x5 :: (store 16, align 8) +; MIR64: $f11 = XXLXORdpz +; MIR64: renamable $x4 = LI8 80 +; MIR64: STXVD2X killed renamable $vsl0, $x1, killed renamable $x3 :: (store 16, align 8) +; MIR64: $f12 = XXLXORdpz +; MIR64: STXVD2X killed renamable $vsl13, $x1, killed renamable $x4 :: (store 16) +; MIR64: $f13 = XXLXORdpz +; MIR64: renamable $x5 = LI8 512 +; MIR64: renamable $x6 = LI8 0 +; MIR64: $x3 = LI8 128 +; MIR64: $x4 = LI8 256 +; MIR64: STD killed renamable $x5, 184, $x1 :: (store 8) +; MIR64: STD killed renamable $x6, 176, $x1 :: (store 8) +; MIR64: BL8_NOP , csr_ppc64_altivec, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $f1, implicit $f2, implicit killed $v2, implicit killed $v3, implicit killed $v4, implicit killed $v5, implicit killed $v6, implicit killed $v7, implicit killed $v8, implicit killed $v9, implicit killed $v10, implicit killed $v11, implicit killed $v12, implicit killed $v13, implicit $f3, implicit $f4, implicit $f5, implicit $f6, implicit $f7, implicit $f8, implicit $f9, implicit $f10, implicit $f11, implicit $f12, implicit $f13, implicit $x2, implicit-def $r1, implicit-def $f1 +; MIR64: ADJCALLSTACKUP 224, 0, implicit-def dead $r1, implicit $r1 +; MIR64: BLR8 implicit $lr8, implicit $rm, implicit $f1 + entry: + %call = tail call double @callee(i32 signext 128, i32 signext 256, double 0.000000e+00, double 0.000000e+00, <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, i32 signext 512, %struct.Test* nonnull byval(%struct.Test) align 4 @__const.caller.t) + ret double %call +} + +declare double @callee(i32 signext, i32 signext, double, double, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, double, double, double, double, double, double, double, double, double, double, double, i32 signext, %struct.Test* byval(%struct.Test) align 8) diff --git a/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll b/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-vec-arg-spills.ll @@ -0,0 +1,149 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ +; RUN: -vec-extabi -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=32BIT + +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ +; RUN: -vec-extabi -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=64BIT +%struct.Test = type { double, double, double, double } + +@__const.caller.t = private unnamed_addr constant %struct.Test { double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00 }, align 8 + +define double @caller() { +; 32BIT-LABEL: caller: +; 32BIT: # %bb.0: # %entry +; 32BIT-NEXT: mflr 0 +; 32BIT-NEXT: stw 0, 8(1) +; 32BIT-NEXT: stwu 1, -192(1) +; 32BIT-NEXT: lwz 3, L..C0(2) +; 32BIT-NEXT: li 4, 31 +; 32BIT-NEXT: xxlxor 0, 0, 0 +; 32BIT-NEXT: lwz 5, L..C1(2) +; 32BIT-NEXT: li 6, 512 +; 32BIT-NEXT: xxlxor 1, 1, 1 +; 32BIT-NEXT: xxlxor 2, 2, 2 +; 32BIT-NEXT: lvx 2, 3, 4 +; 32BIT-NEXT: li 4, 16 +; 32BIT-NEXT: lvsl 4, 0, 3 +; 32BIT-NEXT: xxlxor 37, 37, 37 +; 32BIT-NEXT: lvx 3, 3, 4 +; 32BIT-NEXT: li 4, 172 +; 32BIT-NEXT: lxvd2x 32, 0, 5 +; 32BIT-NEXT: xxlxor 38, 38, 38 +; 32BIT-NEXT: xxlxor 39, 39, 39 +; 32BIT-NEXT: li 5, 48 +; 32BIT-NEXT: vperm 2, 3, 2, 4 +; 32BIT-NEXT: xxlxor 40, 40, 40 +; 32BIT-NEXT: xxlxor 41, 41, 41 +; 32BIT-NEXT: xxlxor 42, 42, 42 +; 32BIT-NEXT: xxlxor 43, 43, 43 +; 32BIT-NEXT: xxlxor 44, 44, 44 +; 32BIT-NEXT: stxvw4x 34, 1, 4 +; 32BIT-NEXT: li 4, 120 +; 32BIT-NEXT: xxlxor 45, 45, 45 +; 32BIT-NEXT: lvx 2, 0, 3 +; 32BIT-NEXT: li 3, 156 +; 32BIT-NEXT: xxlxor 3, 3, 3 +; 32BIT-NEXT: xxlxor 4, 4, 4 +; 32BIT-NEXT: vperm 2, 2, 3, 4 +; 32BIT-NEXT: xxlxor 35, 35, 35 +; 32BIT-NEXT: xxlxor 36, 36, 36 +; 32BIT-NEXT: xxlxor 5, 5, 5 +; 32BIT-NEXT: xxlxor 6, 6, 6 +; 32BIT-NEXT: xxlxor 7, 7, 7 +; 32BIT-NEXT: stxvw4x 34, 1, 3 +; 32BIT-NEXT: li 3, 136 +; 32BIT-NEXT: xxlxor 34, 34, 34 +; 32BIT-NEXT: stxvw4x 0, 1, 3 +; 32BIT-NEXT: li 3, 104 +; 32BIT-NEXT: stxvw4x 0, 1, 4 +; 32BIT-NEXT: li 4, 256 +; 32BIT-NEXT: stxvw4x 0, 1, 3 +; 32BIT-NEXT: li 3, 88 +; 32BIT-NEXT: xxlxor 8, 8, 8 +; 32BIT-NEXT: xxlxor 9, 9, 9 +; 32BIT-NEXT: stxvw4x 0, 1, 3 +; 32BIT-NEXT: li 3, 72 +; 32BIT-NEXT: xxlxor 10, 10, 10 +; 32BIT-NEXT: stxvw4x 0, 1, 3 +; 32BIT-NEXT: li 3, 128 +; 32BIT-NEXT: xxlxor 11, 11, 11 +; 32BIT-NEXT: stxvd2x 32, 1, 5 +; 32BIT-NEXT: stw 6, 152(1) +; 32BIT-NEXT: xxlxor 12, 12, 12 +; 32BIT-NEXT: xxlxor 13, 13, 13 +; 32BIT-NEXT: bl .callee[PR] +; 32BIT-NEXT: nop +; 32BIT-NEXT: addi 1, 1, 192 +; 32BIT-NEXT: lwz 0, 8(1) +; 32BIT-NEXT: mtlr 0 +; 32BIT-NEXT: blr + +; 64BIT-LABEL: caller: +; 64BIT: # %bb.0: # %entry +; 64BIT-NEXT: mflr 0 +; 64BIT-NEXT: std 0, 16(1) +; 64BIT-NEXT: stdu 1, -224(1) +; 64BIT-NEXT: ld 3, L..C0(2) +; 64BIT-NEXT: li 4, 16 +; 64BIT-NEXT: li 5, 144 +; 64BIT-NEXT: xxlxor 1, 1, 1 +; 64BIT-NEXT: li 6, 0 +; 64BIT-NEXT: xxlxor 2, 2, 2 +; 64BIT-NEXT: xxlxor 34, 34, 34 +; 64BIT-NEXT: lxvd2x 0, 3, 4 +; 64BIT-NEXT: li 4, 208 +; 64BIT-NEXT: xxlxor 35, 35, 35 +; 64BIT-NEXT: xxlxor 36, 36, 36 +; 64BIT-NEXT: xxlxor 37, 37, 37 +; 64BIT-NEXT: stxvd2x 0, 1, 4 +; 64BIT-NEXT: ld 4, L..C1(2) +; 64BIT-NEXT: xxlxor 38, 38, 38 +; 64BIT-NEXT: lxvd2x 0, 0, 3 +; 64BIT-NEXT: li 3, 192 +; 64BIT-NEXT: xxlxor 39, 39, 39 +; 64BIT-NEXT: xxlxor 40, 40, 40 +; 64BIT-NEXT: lxvd2x 13, 0, 4 +; 64BIT-NEXT: li 4, 80 +; 64BIT-NEXT: xxlxor 41, 41, 41 +; 64BIT-NEXT: stxvd2x 0, 1, 3 +; 64BIT-NEXT: ld 3, L..C2(2) +; 64BIT-NEXT: xxlxor 42, 42, 42 +; 64BIT-NEXT: xxlxor 43, 43, 43 +; 64BIT-NEXT: xxlxor 44, 44, 44 +; 64BIT-NEXT: lxvd2x 0, 0, 3 +; 64BIT-NEXT: li 3, 160 +; 64BIT-NEXT: xxlxor 45, 45, 45 +; 64BIT-NEXT: xxlxor 3, 3, 3 +; 64BIT-NEXT: xxlxor 4, 4, 4 +; 64BIT-NEXT: stxvd2x 0, 1, 3 +; 64BIT-NEXT: li 3, 128 +; 64BIT-NEXT: xxlxor 5, 5, 5 +; 64BIT-NEXT: xxlxor 6, 6, 6 +; 64BIT-NEXT: stxvd2x 0, 1, 5 +; 64BIT-NEXT: li 5, 512 +; 64BIT-NEXT: xxlxor 7, 7, 7 +; 64BIT-NEXT: stxvd2x 0, 1, 3 +; 64BIT-NEXT: xxlxor 8, 8, 8 +; 64BIT-NEXT: stxvd2x 13, 1, 4 +; 64BIT-NEXT: li 4, 256 +; 64BIT-NEXT: std 5, 184(1) +; 64BIT-NEXT: xxlxor 9, 9, 9 +; 64BIT-NEXT: std 6, 176(1) +; 64BIT-NEXT: xxlxor 10, 10, 10 +; 64BIT-NEXT: xxlxor 11, 11, 11 +; 64BIT-NEXT: xxlxor 12, 12, 12 +; 64BIT-NEXT: xxlxor 13, 13, 13 +; 64BIT-NEXT: bl .callee[PR] +; 64BIT-NEXT: nop +; 64BIT-NEXT: addi 1, 1, 224 +; 64BIT-NEXT: ld 0, 16(1) +; 64BIT-NEXT: mtlr 0 +; 64BIT-NEXT: blr + entry: + %call = tail call double @callee(i32 signext 128, i32 signext 256, double 0.000000e+00, double 0.000000e+00, <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , <2 x double> , double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, i32 signext 512, %struct.Test* nonnull byval(%struct.Test) align 4 @__const.caller.t) + ret double %call +} + +declare double @callee(i32 signext, i32 signext, double, double, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, <2 x double>, double, double, double, double, double, double, double, double, double, double, double, i32 signext, %struct.Test* byval(%struct.Test) align 8) diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-stack-caller.ll b/llvm/test/CodeGen/PowerPC/aix-vector-stack-caller.ll --- a/llvm/test/CodeGen/PowerPC/aix-vector-stack-caller.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vector-stack-caller.ll @@ -1,17 +1,98 @@ -; RUN: not --crash llc < %s -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ -; RUN: -vec-extabi -mtriple powerpc-ibm-aix-xcoff 2>&1 | \ -; RUN: FileCheck %s --check-prefix=AIX-ERROR +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ +; RUN: -vec-extabi -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefixes=32BIT,LITERAL -; RUN: not --crash llc < %s -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ -; RUN: -vec-extabi -mtriple powerpc64-ibm-aix-xcoff 2>&1 | \ -; RUN: FileCheck %s --check-prefix=AIX-ERROR +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ +; RUN: -vec-extabi -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefixes=64BIT,LITERAL define dso_local i32 @vec_caller() { +; LITERAL: L..CPI0_0: +; LITERAL-NEXT: .vbyte 4, 53 +; LITERAL-NEXT: .vbyte 4, 54 +; LITERAL-NEXT: .vbyte 4, 55 +; LITERAL-NEXT: .vbyte 4, 56 +; LITERAL-NEXT: L..CPI0_1: +; LITERAL-NEXT: .vbyte 4, 49 +; LITERAL-NEXT: .vbyte 4, 50 +; LITERAL-NEXT: .vbyte 4, 51 +; LITERAL-NEXT: .vbyte 4, 52 + +; 32BIT-LABEL: vec_caller: +; 32BIT: # %bb.0: # %entry +; 32BIT-NEXT: mflr 0 +; 32BIT-NEXT: stw 0, 8(1) +; 32BIT-NEXT: stwu 1, -64(1) +; 32BIT-NEXT: lwz 3, L..C0(2) +; 32BIT-NEXT: lwz 4, L..C1(2) +; 32BIT-NEXT: xxlxor 34, 34, 34 +; 32BIT-NEXT: xxlxor 35, 35, 35 +; 32BIT-NEXT: xxlxor 36, 36, 36 +; 32BIT-NEXT: lxvw4x 0, 0, 3 +; 32BIT-NEXT: lxvw4x 1, 0, 4 +; 32BIT-NEXT: xxlxor 37, 37, 37 +; 32BIT-NEXT: li 3, 48 +; 32BIT-NEXT: xxlxor 38, 38, 38 +; 32BIT-NEXT: li 4, 32 +; 32BIT-NEXT: xxlxor 39, 39, 39 +; 32BIT-NEXT: xxlxor 40, 40, 40 +; 32BIT-NEXT: stxvw4x 0, 1, 3 +; 32BIT-NEXT: xxlxor 41, 41, 41 +; 32BIT-NEXT: stxvw4x 1, 1, 4 +; 32BIT-NEXT: xxlxor 42, 42, 42 +; 32BIT-NEXT: xxlxor 43, 43, 43 +; 32BIT-NEXT: xxlxor 44, 44, 44 +; 32BIT-NEXT: xxlxor 45, 45, 45 +; 32BIT-NEXT: bl .vec_callee_stack[PR] +; 32BIT-NEXT: nop +; 32BIT-NEXT: addi 1, 1, 64 +; 32BIT-NEXT: lwz 0, 8(1) +; 32BIT-NEXT: mtlr 0 +; 32BIT-NEXT: blr + + +; 64BIT-LABEL: vec_caller: +; 64BIT: # %bb.0: # %entry +; 64BIT-NEXT: mflr 0 +; 64BIT-NEXT: std 0, 16(1) +; 64BIT-NEXT: stdu 1, -112(1) +; 64BIT-NEXT: ld 3, L..C0(2) +; 64BIT-NEXT: ld 4, L..C1(2) +; 64BIT-NEXT: xxlxor 34, 34, 34 +; 64BIT-NEXT: xxlxor 35, 35, 35 +; 64BIT-NEXT: xxlxor 36, 36, 36 +; 64BIT-NEXT: lxvw4x 0, 0, 3 +; 64BIT-NEXT: lxvw4x 1, 0, 4 +; 64BIT-NEXT: xxlxor 37, 37, 37 +; 64BIT-NEXT: li 3, 64 +; 64BIT-NEXT: xxlxor 38, 38, 38 +; 64BIT-NEXT: li 4, 48 +; 64BIT-NEXT: xxlxor 39, 39, 39 +; 64BIT-NEXT: xxlxor 40, 40, 40 +; 64BIT-NEXT: stxvw4x 0, 1, 3 +; 64BIT-NEXT: xxlxor 41, 41, 41 +; 64BIT-NEXT: stxvw4x 1, 1, 4 +; 64BIT-NEXT: xxlxor 42, 42, 42 +; 64BIT-NEXT: xxlxor 43, 43, 43 +; 64BIT-NEXT: xxlxor 44, 44, 44 +; 64BIT-NEXT: xxlxor 45, 45, 45 +; 64BIT-NEXT: bl .vec_callee_stack[PR] +; 64BIT-NEXT: nop +; 64BIT-NEXT: addi 1, 1, 112 +; 64BIT-NEXT: ld 0, 16(1) +; 64BIT-NEXT: mtlr 0 +; 64BIT-NEXT: blr + +; LITERAL: .toc +; LITERAL: L..C0: +; LITERAL-NEXT: .tc L..CPI0_0[TC],L..CPI0_0 +; LITERAL-NEXT: L..C1: +; LITERAL-NEXT: .tc L..CPI0_1[TC],L..CPI0_1 + entry: - %call = call i32 bitcast (i32 (...)* @vec_callee_stack to i32 (<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)*)(<4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> ) - ret i32 0 + %call = call i32 bitcast (i32 (...)* @vec_callee_stack to i32 (<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>)*)(<4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> ) + ret i32 %call } declare i32 @vec_callee_stack(...) - -; AIX-ERROR: LLVM ERROR: passing vector parameters to the stack is unimplemented for AIX diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-stack.ll b/llvm/test/CodeGen/PowerPC/aix-vector-stack.ll --- a/llvm/test/CodeGen/PowerPC/aix-vector-stack.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vector-stack.ll @@ -1,27 +1,18 @@ -; RUN: not --crash llc < %s -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ -; RUN: -vec-extabi -mtriple powerpc-ibm-aix-xcoff 2>&1 | \ -; RUN: FileCheck %s --check-prefix=AIX-ERROR +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ +; RUN: -vec-extabi -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=32BIT -; RUN: not --crash llc < %s -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ -; RUN: -vec-extabi -mtriple powerpc64-ibm-aix-xcoff 2>&1 | \ -; RUN: FileCheck %s --check-prefix=AIX-ERROR +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ +; RUN: -vec-extabi -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck %s --check-prefix=64BIT -define dso_local <4 x i32> @vec_callee_stack(<4 x i32> %vec1, <4 x i32> %vec2, <4 x i32> %vec3, <4 x i32> %vec4, <4 x i32> %vec5, <4 x i32> %vec6, <4 x i32> %vec7, <4 x i32> %vec8, <4 x i32> %vec9, <4 x i32> %vec10, <4 x i32> %vec11, <4 x i32> %vec12, <4 x i32> %vec13, <4 x i32> %vec14) { +define dso_local <4 x i32> @vec_callee_stack(<4 x i32> %vr2, <4 x i32> %vr3, <4 x i32> %vr4, <4 x i32> %vr5, <4 x i32> %vr6, <4 x i32> %vr7, <4 x i32> %vr8, <4 x i32> %vr9, <4 x i32> %vr10, <4 x i32> %vr11, <4 x i32> %vr12, <4 x i32> %vr13, <4 x i32> %vSpill1, <4 x i32> %vSpill2) { entry: - %add = add <4 x i32> %vec1, %vec2 - %add1 = add <4 x i32> %add, %vec3 - %add2 = add <4 x i32> %add1, %vec4 - %add3 = add <4 x i32> %add2, %vec5 - %add4 = add <4 x i32> %add3, %vec6 - %add5 = add <4 x i32> %add4, %vec7 - %add6 = add <4 x i32> %add5, %vec8 - %add7 = add <4 x i32> %add6, %vec9 - %add8 = add <4 x i32> %add7, %vec10 - %add9 = add <4 x i32> %add8, %vec11 - %add10 = add <4 x i32> %add9, %vec12 - %add11 = add <4 x i32> %add10, %vec13 - %add12 = add <4 x i32> %add11, %vec14 - ret <4 x i32> %add12 + ret <4 x i32> %vSpill2 } -; AIX-ERROR: LLVM ERROR: passing vector parameters to the stack is unimplemented for AIX +; 32BIT: addi [[SCRATCH:[0-9]+]], 1, 48 +; 32BIT-NEXT: lxvw4x 34, 0, [[SCRATCH]] + +; 64BIT: addi [[SCRATCH:[0-9]+]], 1, 64 +; 64BIT-NEXT: lxvw4x 34, 0, [[SCRATCH]]