diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7004,24 +7004,12 @@ .Options.EnableAIXExtendedAltivecABI) report_fatal_error("the default Altivec AIX ABI is not yet supported"); - if (ValVT.isVector() && State.getMachineFunction() - .getTarget() - .Options.EnableAIXExtendedAltivecABI) - report_fatal_error("the extended Altivec AIX ABI is not yet supported"); - - assert((!ValVT.isInteger() || - (ValVT.getFixedSizeInBits() <= RegVT.getFixedSizeInBits())) && - "Integer argument exceeds register size: should have been legalized"); - if (ValVT == MVT::f128) report_fatal_error("f128 is unimplemented on AIX."); if (ArgFlags.isNest()) report_fatal_error("Nest arguments are unimplemented."); - if (ValVT.isVector() || LocVT.isVector()) - report_fatal_error("Vector arguments are unimplemented on AIX."); - static const MCPhysReg GPR_32[] = {// 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10}; @@ -7029,6 +7017,11 @@ PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10}; + static const MCPhysReg VR[] = {// Vector registers. + PPC::V2, PPC::V3, PPC::V4, PPC::V5, + PPC::V6, PPC::V7, PPC::V8, PPC::V9, + PPC::V10, PPC::V11, PPC::V12, PPC::V13}; + if (ArgFlags.isByVal()) { if (ArgFlags.getNonZeroByValAlign() > PtrAlign) report_fatal_error("Pass-by-value arguments with alignment greater than " @@ -7124,6 +7117,24 @@ return false; } + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + case MVT::v2i64: + case MVT::v2f64: + case MVT::v1i128: { + if (State.isVarArg()) + report_fatal_error( + "variadic arguments for vector types are unimplemented for AIX"); + + const unsigned Offset = State.AllocateStack(16, Align(16)); + if (unsigned VReg = State.AllocateReg(VR)) + State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo)); + else + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } } return true; } @@ -7144,6 +7155,14 @@ return &PPC::F4RCRegClass; case MVT::f64: return &PPC::F8RCRegClass; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + case MVT::v2i64: + case MVT::v2f64: + case MVT::v1i128: + return &PPC::VRRCRegClass; } } @@ -7457,8 +7476,6 @@ const PPCSubtarget& Subtarget = static_cast(DAG.getSubtarget()); - if (Subtarget.hasAltivec()) - report_fatal_error("Altivec support is unimplemented on AIX."); MachineFunction &MF = DAG.getMachineFunction(); SmallVector ArgLocs; @@ -7748,10 +7765,6 @@ SDValue Arg = OutVals[RealResIdx]; - if (Subtarget.isAIXABI() && - (VA.getLocVT().isVector() || VA.getValVT().isVector())) - report_fatal_error("Returning vector types not yet supported on AIX."); - switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -231,8 +231,10 @@ } if (Subtarget.isAIXABI()) { - assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet."); - return TM.isPPC64() ? CSR_PPC64_RegMask : CSR_AIX32_RegMask; + return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask + : CSR_PPC64_RegMask) + : (Subtarget.hasAltivec() ? CSR_AIX32_Altivec_RegMask + : CSR_AIX32_RegMask); } if (CC == CallingConv::Cold) { diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll b/llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll --- a/llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll @@ -1,12 +1,7 @@ -; RUN: not --crash llc < %s -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s -; RUN: not --crash llc < %s -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s - -; This test expects a compiler diagnostic for an AIX limitation on Altivec -; support. When the Altivec limitation diagnostic is removed, this test -; should compile clean and fail in order to alert the author to validate the -; instructions emitted to initialize the GPR for the double vararg. -; The mfvsrwz and mfvsrd instructions should be used to initialize the GPR for -; the double vararg without going through memory. +; RUN: llc < %s -mtriple powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr8 2>&1 +; | FileCheck %s --check-prefix=ASM64 +; RUN: llc < %s -mtriple powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr8 2>&1 +; | FileCheck %s --check-prefix=ASM32 @f1 = global float 0.000000e+00, align 4 @@ -20,4 +15,15 @@ declare void @test_vararg(i32, ...) -; CHECK: LLVM ERROR: Altivec support is unimplemented on AIX. + +; ASM64: xscvdpspn +; ASM64: mffprd +; ASM64: xxsldwi +; ASM64: mffprwz + + +; ASM32: lfsx +; ASM32: fmr +; ASM32: stfs +; ASM32: lwz +; ASM32: stfd diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-ext-vec-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-ext-vec-abi.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-cc-ext-vec-abi.ll @@ -0,0 +1,164 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ +; RUN: -vec-extabi -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=ASM32 %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ +; RUN: -vec-extabi -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=ASM64 %s + +define dso_local <4 x i32> @vec_callee(<4 x i32> %vec1, <4 x i32> %vec2, <4 x i32> %vec3, <4 x i32> %vec4, <4 x i32> %vec5, <4 x i32> %vec6, <4 x i32> %vec7, <4 x i32> %vec8, <4 x i32> %vec9, <4 x i32> %vec10, <4 x i32> %vec11, <4 x i32> %vec12, <4 x i32> %vec13, <4 x i32> %vec14) { +entry: + %add = add <4 x i32> %vec1, %vec2 + %add1 = add <4 x i32> %add, %vec3 + %add2 = add <4 x i32> %add1, %vec4 + %add3 = add <4 x i32> %add2, %vec5 + %add4 = add <4 x i32> %add3, %vec6 + %add5 = add <4 x i32> %add4, %vec7 + %add6 = add <4 x i32> %add5, %vec8 + %add7 = add <4 x i32> %add6, %vec9 + %add8 = add <4 x i32> %add7, %vec10 + %add9 = add <4 x i32> %add8, %vec11 + %add10 = add <4 x i32> %add9, %vec12 + %add11 = add <4 x i32> %add10, %vec13 + %add12 = add <4 x i32> %add11, %vec14 + ret <4 x i32> %add12 +} + +; ASM32-LABEL: vec_callee: + +; ASM32: # %bb.0: # %entry +; ASM32-DAG: vadduwm 2, 2, 3 +; ASM32-DAG: addi 3, 1, 224 +; ASM32-DAG: lxvw4x {{[0-9]+}}, 0, 3 +; ASM32-DAG: addi 3, 1, 240 +; ASM32-DAG: vadduwm 2, 2, 4 +; ASM32-DAG: lxvw4x {{[0-9]+}}, 0, 3 +; ASM32-DAG: vadduwm 2, 2, 5 +; ASM32-DAG: vadduwm 2, 2, 6 +; ASM32-DAG: vadduwm 2, 2, 7 +; ASM32-DAG: vadduwm 2, 2, 8 +; ASM32-DAG: vadduwm 2, 2, 9 +; ASM32-DAG: vadduwm 2, 2, 10 +; ASM32-DAG: vadduwm 2, 2, 11 +; ASM32-DAG: vadduwm 2, 2, 12 +; ASM32-DAG: vadduwm 2, 2, 13 +; ASM32-DAG: vadduwm 2, 2, {{[0-9]+}} +; ASM32-DAG: vadduwm 2, 2, {{[0-9]+}} +; ASM32: blr + +; ASM64-LABEL: vec_callee: + +; ASM64: # %bb.0: # %entry +; ASM64-NEXT: vadduwm 2, 2, 3 +; ASM64-NEXT: addi 3, 1, 240 +; ASM64-NEXT: lxvw4x {{[0-9]+}}, 0, 3 +; ASM64-NEXT: addi 3, 1, 256 +; ASM64-NEXT: vadduwm 2, 2, 4 +; ASM64-NEXT: lxvw4x {{[0-9]+}}, 0, 3 +; ASM64-NEXT: vadduwm 2, 2, 5 +; ASM64-NEXT: vadduwm 2, 2, 6 +; ASM64-NEXT: vadduwm 2, 2, 7 +; ASM64-NEXT: vadduwm 2, 2, 8 +; ASM64-NEXT: vadduwm 2, 2, 9 +; ASM64-NEXT: vadduwm 2, 2, 10 +; ASM64-NEXT: vadduwm 2, 2, 11 +; ASM64-NEXT: vadduwm 2, 2, 12 +; ASM64-NEXT: vadduwm 2, 2, 13 +; ASM64-NEXT: vadduwm 2, 2, {{[0-9]+}} +; ASM64-NEXT: vadduwm 2, 2, {{[0-9]+}} +; ASM64-NEXT: blr + +define dso_local i32 @vec_caller() { +entry: + %call = call <4 x i32> @vec_callee(<4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> ) + ret i32 0 +} + +; ASM32-LABEL: .vec_caller: +; ASM32-DAG: # %bb.0: # %entry +; ASM32-DAG: mflr 0 +; ASM32-DAG: stw 0, 8(1) +; ASM32-DAG: stwu 1, -256(1) +; ASM32-DAG: lwz 3, L..C0(2) +; ASM32-DAG: lxvw4x 34, 0, 3 +; ASM32-DAG: lwz 4, L..C1(2) +; ASM32-DAG: lxvw4x 35, 0, 4 +; ASM32-DAG: lwz 5, L..C2(2) +; ASM32-DAG: lxvw4x 36, 0, 5 +; ASM32-DAG: lwz 6, L..C3(2) +; ASM32-DAG: lxvw4x 37, 0, 6 +; ASM32-DAG: lwz 7, L..C4(2) +; ASM32-DAG: lxvw4x 38, 0, 7 +; ASM32-DAG: lwz 8, L..C5(2) +; ASM32-DAG: lxvw4x 39, 0, 8 +; ASM32-DAG: lwz 3, L..C6(2) +; ASM32-DAG: lxvw4x 40, 0, 3 +; ASM32-DAG: lwz 4, L..C7(2) +; ASM32-DAG: lxvw4x 41, 0, 4 +; ASM32-DAG: lwz 5, L..C8(2) +; ASM32-DAG: lxvw4x 42, 0, 5 +; ASM32-DAG: lwz 6, L..C9(2) +; ASM32-DAG: lxvw4x 43, 0, 6 +; ASM32-DAG: lwz 7, L..C10(2) +; ASM32-DAG: lxvw4x 44, 0, 7 +; ASM32-DAG: lwz 8, L..C11(2) +; ASM32-DAG: lxvw4x 45, 0, 8 +; ASM32-DAG: lwz 3, L..C12(2) +; ASM32-DAG: lxvw4x 0, 0, 3 +; ASM32-DAG: lwz 4, L..C13(2) +; ASM32-DAG: lxvw4x 1, 0, 4 +; ASM32-DAG: li 3, 240 +; ASM32-DAG: li 4, 224 +; ASM32-DAG: stxvw4x 0, 1, 3 +; ASM32-DAG: stxvw4x 1, 1, 4 +; ASM32-DAG: bl .vec_callee +; ASM32-DAG: li 3, 0 +; ASM32-DAG: addi 1, 1, 256 +; ASM32-DAG: lwz 0, 8(1) +; ASM32-DAG: mtlr 0 +; ASM32-DAG: blr + +; ASM64-LABEL: .vec_caller: + +; ASM64: # %bb.0: # %entry +; ASM64-DAG: mflr 0 +; ASM64-DAG: std 0, 16(1) +; ASM64-DAG: stdu 1, -272(1) +; ASM64-DAG: ld 3, L..C0(2) +; ASM64-DAG: lxvw4x 34, 0, 3 +; ASM64-DAG: ld 4, L..C1(2) +; ASM64-DAG: lxvw4x 35, 0, 4 +; ASM64-DAG: ld 5, L..C2(2) +; ASM64-DAG: lxvw4x 36, 0, 5 +; ASM64-DAG: ld 6, L..C3(2) +; ASM64-DAG: lxvw4x 37, 0, 6 +; ASM64-DAG: ld 7, L..C4(2) +; ASM64-DAG: lxvw4x 38, 0, 7 +; ASM64-DAG: ld 8, L..C5(2) +; ASM64-DAG: lxvw4x 39, 0, 8 +; ASM64-DAG: ld 3, L..C6(2) +; ASM64-DAG: lxvw4x 40, 0, 3 +; ASM64-DAG: ld 4, L..C7(2) +; ASM64-DAG: lxvw4x 41, 0, 4 +; ASM64-DAG: ld 5, L..C8(2) +; ASM64-DAG: lxvw4x 42, 0, 5 +; ASM64-DAG: ld 6, L..C9(2) +; ASM64-DAG: lxvw4x 43, 0, 6 +; ASM64-DAG: ld 7, L..C10(2) +; ASM64-DAG: lxvw4x 44, 0, 7 +; ASM64-DAG: ld 8, L..C11(2) +; ASM64-DAG: lxvw4x 45, 0, 8 +; ASM64-DAG: ld 3, L..C12(2) +; ASM64-DAG: lxvw4x 0, 0, 3 +; ASM64-DAG: ld 4, L..C13(2) +; ASM64-DAG: lxvw4x 1, 0, 4 +; ASM64-DAG: li 3, 256 +; ASM64-DAG: li 4, 240 +; ASM64-DAG: stxvw4x 0, 1, 3 +; ASM64-DAG: stxvw4x 1, 1, 4 +; ASM64-DAG: bl .vec_callee +; ASM64-DAG: li 3, 0 +; ASM64-DAG: addi 1, 1, 272 +; ASM64-DAG: ld 0, 16(1) +; ASM64-DAG: mtlr 0 +; ASM64: blr diff --git a/llvm/test/CodeGen/PowerPC/aix-vec-abi.ll b/llvm/test/CodeGen/PowerPC/aix-vec-abi.ll --- a/llvm/test/CodeGen/PowerPC/aix-vec-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-vec-abi.ll @@ -1,12 +1,8 @@ ; RUN: not --crash llc < %s -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s --check-prefix=DFLTERROR ; RUN: not --crash llc < %s -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s --check-prefix=DFLTERROR -; RUN: not --crash llc < %s -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 -vec-extabi 2>&1 | FileCheck %s --check-prefix=VEXTERROR -; RUN: not --crash llc < %s -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 -vec-extabi 2>&1 | FileCheck %s --check-prefix=VEXTERROR - define void @vec_callee(<4 x i32> %vec1) { ret void } ; DFLTERROR: LLVM ERROR: the default Altivec AIX ABI is not yet supported -; VEXTERROR: LLVM ERROR: the extended Altivec AIX ABI is not yet supported diff --git a/llvm/test/CodeGen/PowerPC/aix-vector-return.ll b/llvm/test/CodeGen/PowerPC/aix-vector-return.ll deleted file mode 100644 --- a/llvm/test/CodeGen/PowerPC/aix-vector-return.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: not --crash llc --verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff \ -; RUN: -mattr=+altivec 2>&1 < %s | FileCheck %s - -; RUN: not --crash llc --verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff \ -; RUN: -mattr=+altivec 2>&1 < %s | FileCheck %s - -; CHECK: LLVM ERROR: Returning vector types not yet supported on AIX. - -define dso_local <4 x i32> @test() local_unnamed_addr #0 { - entry: - ret <4 x i32> zeroinitializer -}