diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td --- a/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -291,6 +291,8 @@ F27, F28, F29, F30, F31, CR2, CR3, CR4 )>; +def CSR_AIX32_Altivec: CalleeSavedRegs<(add CSR_AIX32, CSR_Altivec)>; + // Common CalleeSavedRegs for SVR4 and AIX. def CSR_PPC64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, X21, X22, X23, X24, X25, X26, X27, X28, diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -223,13 +223,17 @@ CALLEE_SAVED_GPRS32, // Add AIX's extra CSR. {PPC::R13, -76}, - // TODO: Update when we add vector support for AIX. + // VRSAVE save area offset. + {PPC::VRSAVE, -4}, + CALLEE_SAVED_VRS }; static const SpillSlot AIXOffsets64[] = { CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, - // TODO: Update when we add vector support for AIX. + // VRSAVE save area offset. + {PPC::VRSAVE, -4}, + CALLEE_SAVED_VRS }; if (Subtarget.is64BitELFABI()) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6794,9 +6794,10 @@ const Align PtrAlign = IsPPC64 ? Align(8) : Align(4); const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; - assert((!ValVT.isInteger() || - (ValVT.getSizeInBits() <= RegVT.getSizeInBits())) && - "Integer argument exceeds register size: should have been legalized"); + if (ValVT.isInteger() && !ValVT.isVector()) + assert( + (ValVT.getSizeInBits() <= RegVT.getSizeInBits()) && + "Integer argument exceeds register size: should have been legalized"); if (ValVT == MVT::f128) report_fatal_error("f128 is unimplemented on AIX."); @@ -6804,9 +6805,6 @@ if (ArgFlags.isNest()) report_fatal_error("Nest arguments are unimplemented."); - if (ValVT.isVector() || LocVT.isVector()) - report_fatal_error("Vector arguments are unimplemented on AIX."); - static const MCPhysReg GPR_32[] = {// 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10}; @@ -6814,6 +6812,11 @@ PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10}; + static const MCPhysReg VR[] = {// Vector registers. + PPC::V2, PPC::V3, PPC::V4, PPC::V5, + PPC::V6, PPC::V7, PPC::V8, PPC::V9, + PPC::V10, PPC::V11, PPC::V12, PPC::V13}; + if (ArgFlags.isByVal()) { if (ArgFlags.getNonZeroByValAlign() > PtrAlign) report_fatal_error("Pass-by-value arguments with alignment greater than " @@ -6909,6 +6912,24 @@ return false; } + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + case MVT::v2f64: + case MVT::v2i64: + case MVT::v1i128: { + if (State.isVarArg()) + report_fatal_error( + "variadic arguments for vector are not yet implemented for AIX"); + + const unsigned Offset = State.AllocateStack(16, Align(16)); + if (unsigned VReg = State.AllocateReg(VR)) + State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo)); + else + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } } return true; } @@ -6929,6 +6950,14 @@ return &PPC::F4RCRegClass; case MVT::f64: return &PPC::F8RCRegClass; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + case MVT::v2f64: + case MVT::v2i64: + case MVT::v1i128: + return &PPC::VRRCRegClass; } } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -188,7 +188,8 @@ } // 32-bit targets. if (Subtarget.isAIXABI()) - return CSR_AIX32_SaveList; + return (Subtarget.hasAltivec()) ? CSR_AIX32_Altivec_SaveList + : CSR_AIX32_SaveList; if (Subtarget.hasAltivec()) return CSR_SVR432_Altivec_SaveList; else if (Subtarget.hasSPE()) @@ -209,8 +210,11 @@ } if (Subtarget.isAIXABI()) { - assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet."); - return TM.isPPC64() ? CSR_PPC64_RegMask : CSR_AIX32_RegMask; + return TM.isPPC64() + ? (Subtarget.hasAltivec() ? CSR_64_AllRegs_Altivec_RegMask + : CSR_PPC64_RegMask) + : (Subtarget.hasAltivec() ? CSR_AIX32_Altivec_RegMask + : CSR_AIX32_RegMask); } if (CC == CallingConv::Cold) { diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -308,9 +308,6 @@ bool hasFusion() const { return HasFusion; } bool hasAddiLoadFusion() const { return HasAddiLoadFusion; } bool hasAddisLoadFusion() const { return HasAddisLoadFusion; } - bool needsSwapsForVSXMemOps() const { - return hasVSX() && isLittleEndian() && !hasP9Vector(); - } POPCNTDKind hasPOPCNTD() const { return HasPOPCNTD; } @@ -328,6 +325,9 @@ bool is32BitELFABI() const { return isSVR4ABI() && !isPPC64(); } bool isUsingPCRelativeCalls() const; + bool needsSwapsForVSXMemOps() const { + return hasVSX() && (isLittleEndian() || isAIXABI()) && !hasP9Vector(); + } /// Originally, this function return hasISEL(). Now we always enable it, /// but may expand the ISEL instruction later. bool enableEarlyIfConversion() const override { return true; } diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-vector.ll b/llvm/test/CodeGen/PowerPC/aix-cc-vector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-cc-vector.ll @@ -0,0 +1,254 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=altivec \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=CHECKASM,ASM32 %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=altivec \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=CHECKASM,ASM64 %s + +define dso_local <4 x i32> @int(<4 x i32> %int0, <4 x i32> %int1, <4 x i32> %int2, <4 x i32> %int3, <4 x i32> %int4, <4 x i32> %int5, <4 x i32> %int6, <4 x i32> %int7, <4 x i32> %int8, <4 x i32> %int9, <4 x i32> %int10, <4 x i32> %int11, <4 x i32> %int12, <4 x i32> %int13, <4 x i32> %int14) { +entry: + %add = add <4 x i32> %int0, %int1 + %add1 = add <4 x i32> %add, %int2 + %add2 = add <4 x i32> %add1, %int3 + %add3 = add <4 x i32> %add2, %int4 + %add4 = add <4 x i32> %add3, %int5 + %add5 = add <4 x i32> %add4, %int6 + %add6 = add <4 x i32> %add5, %int7 + %add7 = add <4 x i32> %add6, %int8 + %add8 = add <4 x i32> %add7, %int9 + %add9 = add <4 x i32> %add8, %int10 + %add10 = add <4 x i32> %add9, %int11 + %add11 = add <4 x i32> %add10, %int12 + %add12 = add <4 x i32> %add11, %int13 + %add13 = add <4 x i32> %add12, %int14 + ret <4 x i32> %add13 +} + +; CHECKASM-LABEL: .int: + +; ASM32-DAG: addi [[REG1:[0-9]+]], 1, 224 +; ASM32-DAG: lxvw4x 35, 0, [[REG1]] +; ASM32-DAG: addi [[REG2:[0-9]+]], 1, 240 +; ASM32-DAG: lxvw4x 36, 0, [[REG2]] +; ASM32-DAG: addi [[REG3:[0-9]+]], 1, 256 +; ASM32-DAG: lxvw4x 35, 0, [[REG3]] +; ASM32-DAG: vadduwm 2, 2, 3 +; ASM32-DAG: vadduwm 2, 2, 4 +; ASM32-DAG: vadduwm 2, 2, 5 +; ASM32-DAG: vadduwm 2, 2, 6 +; ASM32-DAG: vadduwm 2, 2, 7 +; ASM32-DAG: vadduwm 2, 2, 8 +; ASM32-DAG: vadduwm 2, 2, 9 +; ASM32-DAG: vadduwm 2, 2, 10 +; ASM32-DAG: vadduwm 2, 2, 11 +; ASM32-DAG: vadduwm 2, 2, 12 +; ASM32-DAG: vadduwm 2, 2, 13 +; ASM32-DAG: vadduwm 2, 2, 3 +; ASM32-DAG: vadduwm 2, 2, 4 +; ASM32-DAG: vadduwm 2, 2, 3 + +; ASM64-DAG: addi [[REG1:[0-9]+]], 1, 240 +; ASM64-DAG: lxvw4x 35, 0, [[REG1]] +; ASM64-DAG: addi [[REG2:[0-9]+]], 1, 256 +; ASM64-DAG: lxvw4x 36, 0, [[REG2]] +; ASM64-DAG: addi [[REG3:[0-9]+]], 1, 272 +; ASM64-DAG: lxvw4x 35, 0, [[REG3]] +; ASM64-DAG: vadduwm 2, 2, 3 +; ASM64-DAG: vadduwm 2, 2, 4 +; ASM64-DAG: vadduwm 2, 2, 5 +; ASM64-DAG: vadduwm 2, 2, 6 +; ASM64-DAG: vadduwm 2, 2, 7 +; ASM64-DAG: vadduwm 2, 2, 8 +; ASM64-DAG: vadduwm 2, 2, 9 +; ASM64-DAG: vadduwm 2, 2, 10 +; ASM64-DAG: vadduwm 2, 2, 11 +; ASM64-DAG: vadduwm 2, 2, 12 +; ASM64-DAG: vadduwm 2, 2, 13 +; ASM64-DAG: vadduwm 2, 2, 3 +; ASM64-DAG: vadduwm 2, 2, 4 +; ASM64-DAG: vadduwm 2, 2, 3 +; ASM64-DAG: blr + +define dso_local <2 x i64> @longint(<2 x i64> %ll0, <2 x i64> %ll1, <2 x i64> %ll2, <2 x i64> %ll3, <2 x i64> %ll4, <2 x i64> %ll5, <2 x i64> %ll6, <2 x i64> %ll7, <2 x i64> %ll8, <2 x i64> %ll9, <2 x i64> %ll10, <2 x i64> %ll11, <2 x i64> %ll12, <2 x i64> %ll13, <2 x i64> %ll14) { entry: %add = add <2 x i64> %ll0, %ll1 %add1 = add <2 x i64> %add, %ll2 %add2 = add <2 x i64> %add1, %ll3 + %add3 = add <2 x i64> %add2, %ll4 + %add4 = add <2 x i64> %add3, %ll5 + %add5 = add <2 x i64> %add4, %ll6 + %add6 = add <2 x i64> %add5, %ll7 + %add7 = add <2 x i64> %add6, %ll8 + %add8 = add <2 x i64> %add7, %ll9 + %add9 = add <2 x i64> %add8, %ll10 + %add10 = add <2 x i64> %add9, %ll11 + %add11 = add <2 x i64> %add10, %ll12 + %add12 = add <2 x i64> %add11, %ll13 + %add13 = add <2 x i64> %add12, %ll14 + ret <2 x i64> %add13 +} + +; CHECKASM-LABEL: .longint: + +; ASM32-DAG: stwu 1, -336(1) +; ASM32-DAG: addi 3, 1, 112 +; ASM32-DAG: addi 4, 1, 96 +; ASM32-DAG: addi 8, 1, 128 +; ASM32-DAG: stxvw4x 35, 0, 3 +; ASM32-DAG: stxvw4x 34, 0, 4 +; ASM32-DAG: stxvw4x 36, 0, 8 +; ASM32-DAG: stxvw4x 37, 0, 9 +; ASM32-DAG: stxvw4x 38, 0, 7 +; ASM32-DAG: stxvw4x 39, 0, 8 +; ASM32-DAG: stxvw4x 40, 0, 7 +; ASM32-DAG: stxvw4x 41, 0, 8 +; ASM32-DAG: stxvw4x 42, 0, 7 +; ASM32-DAG: stxvw4x 43, 0, 8 +; ASM32-DAG: stxvw4x 44, 0, 7 +; ASM32-DAG: stxvw4x 45, 0, 8 + +; ASM64-DAG: stdu 1, -368(1) +; ASM64-DAG: xxswapd 1, 34 +; ASM64-DAG: xxswapd 0, 35 +; ASM64-DAG: xxswapd 2, 36 +; ASM64-DAG: xxswapd 0, 37 +; ASM64-DAG: xxswapd 1, 38 +; ASM64-DAG: xxswapd 0, 39 +; ASM64-DAG: xxswapd 1, 40 +; ASM64-DAG: xxswapd 0, 41 +; ASM64-DAG: xxswapd 1, 42 +; ASM64-DAG: xxswapd 2, 43 +; ASM64-DAG: xxswapd 3, 44 +; ASM64-DAG: xxswapd 2, 45 +; ASM64-DAG: xxswapd 34, 0 +; ASM64-DAG: addi 3, 1, 64 +; ASM64-DAG: blr + +define dso_local <4 x float> @float(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2, <4 x float> %vf3, <4 x float> %vf4, <4 x float> %vf5, <4 x float> %vf6, <4 x float> %vf7, <4 x float> %vf8, <4 x float> %vf9, <4 x float> %vf10, <4 x float> %vf11, <4 x float> %vf12, <4 x float> %vf13, <4 x float> %vf14) { +entry: + %add = fadd <4 x float> %vf0, %vf1 + %add1 = fadd <4 x float> %add, %vf2 + %add2 = fadd <4 x float> %add1, %vf3 + %add3 = fadd <4 x float> %add2, %vf4 + %add4 = fadd <4 x float> %add3, %vf5 + %add5 = fadd <4 x float> %add4, %vf6 + %add6 = fadd <4 x float> %add5, %vf7 + %add7 = fadd <4 x float> %add6, %vf8 + %add8 = fadd <4 x float> %add7, %vf9 + %add9 = fadd <4 x float> %add8, %vf10 + %add10 = fadd <4 x float> %add9, %vf11 + %add11 = fadd <4 x float> %add10, %vf12 + %add12 = fadd <4 x float> %add11, %vf13 + %add13 = fadd <4 x float> %add12, %vf14 + ret <4 x float> %add13 +} + +; CHECKASM-LABEL: .float: +; ASM32-DAG: addi 3, 1, 224 +; ASM32-DAG: xvaddsp 0, 34, 35 +; ASM32-DAG: lxvw4x 1, 0, 3 +; ASM32-DAG: addi 3, 1, 240 +; ASM32-DAG: xvaddsp 0, 0, 36 +; ASM32-DAG: xvaddsp 0, 0, 37 +; ASM32-DAG: xvaddsp 0, 0, 38 +; ASM32-DAG: xvaddsp 0, 0, 39 +; ASM32-DAG: xvaddsp 0, 0, 40 +; ASM32-DAG: xvaddsp 0, 0, 41 +; ASM32-DAG: xvaddsp 0, 0, 42 +; ASM32-DAG: xvaddsp 0, 0, 43 +; ASM32-DAG: xvaddsp 0, 0, 44 +; ASM32-DAG: xvaddsp 0, 0, 45 +; ASM32-DAG: xvaddsp 0, 0, 1 +; ASM32-DAG: lxvw4x 1, 0, 3 +; ASM32-DAG: addi 3, 1, 256 +; ASM32-DAG: xvaddsp 0, 0, 1 +; ASM32-DAG: lxvw4x 1, 0, 3 +; ASM32-DAG: xvaddsp 34, 0, 1 +; ASM32-DAG: blr + +; ASM64-DAG: xvaddsp 0, 34, 35 +; ASM64-DAG: addi 3, 1, 240 +; ASM64-DAG: lxvw4x 1, 0, 3 +; ASM64-DAG: addi 3, 1, 256 +; ASM64-DAG: xvaddsp 0, 0, 36 +; ASM64-DAG: xvaddsp 0, 0, 37 +; ASM64-DAG: xvaddsp 0, 0, 38 +; ASM64-DAG: xvaddsp 0, 0, 39 +; ASM64-DAG: xvaddsp 0, 0, 40 +; ASM64-DAG: xvaddsp 0, 0, 41 +; ASM64-DAG: xvaddsp 0, 0, 42 +; ASM64-DAG: xvaddsp 0, 0, 43 +; ASM64-DAG: xvaddsp 0, 0, 44 +; ASM64-DAG: xvaddsp 0, 0, 45 +; ASM64-DAG: xvaddsp 0, 0, 1 +; ASM64-DAG: lxvw4x 1, 0, 3 +; ASM64-DAG: addi 3, 1, 272 +; ASM64-DAG: xvaddsp 0, 0, 1 +; ASM64-DAG: lxvw4x 1, 0, 3 +; ASM64-DAG: xvaddsp 34, 0, 1 +; ASM64-DAG: blr + +define dso_local <2 x double> @double(<2 x double> %double0, <2 x double> %double1, <2 x double> %double2, <2 x double> %double3, <2 x double> %double4, <2 x double> %double5, <2 x double> %double6, <2 x double> %double7, <2 x double> %double8, <2 x double> %double9, <2 x double> %double10, <2 x double> %double11, <2 x double> %double12, <2 x double> %double13, <2 x double> %double14) { +entry: + %add = fadd <2 x double> %double0, %double1 + %add1 = fadd <2 x double> %add, %double2 + %add2 = fadd <2 x double> %add1, %double3 + %add3 = fadd <2 x double> %add2, %double4 + %add4 = fadd <2 x double> %add3, %double5 + %add5 = fadd <2 x double> %add4, %double6 + %add6 = fadd <2 x double> %add5, %double7 + %add7 = fadd <2 x double> %add6, %double8 + %add8 = fadd <2 x double> %add7, %double9 + %add9 = fadd <2 x double> %add8, %double10 + %add10 = fadd <2 x double> %add9, %double11 + %add11 = fadd <2 x double> %add10, %double12 + %add12 = fadd <2 x double> %add11, %double13 + %add13 = fadd <2 x double> %add12, %double14 + ret <2 x double> %add13 +} + +; CHECKASM-LABEL: .double: + +; ASM32-DAG: xvadddp 0, 34, 35 +; ASM32-DAG: addi 3, 1, 224 +; ASM32-DAG: lxvd2x 1, 0, 3 +; ASM32-DAG: addi 3, 1, 240 +; ASM32-DAG: lxvd2x 2, 0, 3 +; ASM32-DAG: addi 3, 1, 256 +; ASM32-DAG: xxswapd 1, 1 +; ASM32-DAG: xvadddp 0, 0, 36 +; ASM32-DAG: xvadddp 0, 0, 37 +; ASM32-DAG: xvadddp 0, 0, 38 +; ASM32-DAG: xvadddp 0, 0, 39 +; ASM32-DAG: xvadddp 0, 0, 40 +; ASM32-DAG: xvadddp 0, 0, 41 +; ASM32-DAG: xvadddp 0, 0, 42 +; ASM32-DAG: xvadddp 0, 0, 43 +; ASM32-DAG: xvadddp 0, 0, 44 +; ASM32-DAG: xvadddp 0, 0, 45 +; ASM32-DAG: xvadddp 0, 0, 1 +; ASM32-DAG: xxswapd 1, 2 +; ASM32-DAG: lxvd2x 2, 0, 3 +; ASM32-DAG: xvadddp 0, 0, 1 +; ASM32-DAG: xxswapd 1, 2 +; ASM32-DAG: xvadddp 34, 0, 1 +; ASM32-DAG: blr + +; ASM64-DAG: xvadddp 0, 34, 35 +; ASM64-DAG: addi 3, 1, 240 +; ASM64-DAG: lxvd2x 1, 0, 3 +; ASM64-DAG: addi 3, 1, 256 +; ASM64-DAG: xvadddp 0, 0, 36 +; ASM64-DAG: xvadddp 0, 0, 37 +; ASM64-DAG: xvadddp 0, 0, 38 +; ASM64-DAG: xvadddp 0, 0, 39 +; ASM64-DAG: xvadddp 0, 0, 40 +; ASM64-DAG: xvadddp 0, 0, 41 +; ASM64-DAG: xvadddp 0, 0, 42 +; ASM64-DAG: xvadddp 0, 0, 43 +; ASM64-DAG: xvadddp 0, 0, 44 +; ASM64-DAG: xvadddp 0, 0, 45 +; ASM64-DAG: xvadddp 0, 0, 1 +; ASM64-DAG: lxvd2x 2, 0, 3 +; ASM64-DAG: addi 3, 1, 272 +; ASM64-DAG: xvadddp 0, 0, 1 +; ASM64-DAG: lxvd2x 2, 0, 3 +; ASM64-DAG: xvadddp 34, 0, 1 +; ASM64-DAG: blr