Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6922,9 +6922,10 @@ const Align PtrAlign = IsPPC64 ? Align(8) : Align(4); const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; - assert((!ValVT.isInteger() || - (ValVT.getSizeInBits() <= RegVT.getSizeInBits())) && - "Integer argument exceeds register size: should have been legalized"); + if (ValVT.isInteger() && !ValVT.isVector()) + assert( + (ValVT.getSizeInBits() <= RegVT.getSizeInBits()) && + "Integer argument exceeds register size: should have been legalized"); if (ValVT == MVT::f128) report_fatal_error("f128 is unimplemented on AIX."); @@ -6932,9 +6933,6 @@ if (ArgFlags.isNest()) report_fatal_error("Nest arguments are unimplemented."); - if (ValVT.isVector() || LocVT.isVector()) - report_fatal_error("Vector arguments are unimplemented on AIX."); - static const MCPhysReg GPR_32[] = {// 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10}; @@ -6942,6 +6940,11 @@ PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10}; + static const MCPhysReg VR[] = {// Vector registers. + PPC::V2, PPC::V3, PPC::V4, PPC::V5, + PPC::V6, PPC::V7, PPC::V8, PPC::V9, + PPC::V10, PPC::V11, PPC::V12, PPC::V13}; + if (ArgFlags.isByVal()) { if (ArgFlags.getNonZeroByValAlign() > PtrAlign) report_fatal_error("Pass-by-value arguments with alignment greater than " @@ -7037,6 +7040,24 @@ return false; } + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + case MVT::v2f64: + case MVT::v2i64: + case MVT::v1i128: { + if (State.isVarArg()) + report_fatal_error( + "variadic arguments for vector types are unimplemented for AIX"); + + const unsigned Offset = State.AllocateStack(16, Align(16)); + if (unsigned VReg = State.AllocateReg(VR)) + State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo)); + else + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return false; + } } return true; } @@ -7057,6 +7078,14 @@ return &PPC::F4RCRegClass; case MVT::f64: return &PPC::F8RCRegClass; + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + case MVT::v2f64: + case MVT::v2i64: + case MVT::v1i128: + return &PPC::VRRCRegClass; } } @@ -7370,8 +7399,6 @@ const PPCSubtarget& Subtarget = static_cast(DAG.getSubtarget()); - if (Subtarget.hasAltivec()) - report_fatal_error("Altivec support is unimplemented on AIX."); MachineFunction &MF = DAG.getMachineFunction(); SmallVector ArgLocs; @@ -7660,10 +7687,6 @@ SDValue Arg = OutVals[RealResIdx]; - if (Subtarget.isAIXABI() && - (VA.getLocVT().isVector() || VA.getValVT().isVector())) - report_fatal_error("Returning vector types not yet supported on AIX."); - switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; Index: llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll =================================================================== --- llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll +++ llvm/test/CodeGen/PowerPC/aix-cc-altivec.ll @@ -1,12 +1,7 @@ -; RUN: not --crash llc < %s -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s -; RUN: not --crash llc < %s -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 2>&1 | FileCheck %s - -; This test expects a compiler diagnostic for an AIX limitation on Altivec -; support. When the Altivec limitation diagnostic is removed, this test -; should compile clean and fail in order to alert the author to validate the -; instructions emitted to initialize the GPR for the double vararg. -; The mfvsrwz and mfvsrd instructions should be used to initialize the GPR for -; the double vararg without going through memory. +; RUN: llc < %s -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr8 2>&1 +; | FileCheck %s --check-prefix=ASM64 +; RUN: llc < %s -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr8 2>&1 +; | FileCheck %s --check-prefix=ASM32 @f1 = global float 0.000000e+00, align 4 @@ -20,4 +15,15 @@ declare void @test_vararg(i32, ...) -; CHECK: LLVM ERROR: Altivec support is unimplemented on AIX. + +; ASM64: xscvdpspn +; ASM64: mffprd +; ASM64: xxsldwi +; ASM64: mffprwz + + +; ASM32: lfsx +; ASM32: fmr +; ASM32: stfs +; ASM32: lwz +; ASM32: stfd Index: llvm/test/CodeGen/PowerPC/aix-cc-ext-vec-abi.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/aix-cc-ext-vec-abi.ll @@ -0,0 +1,164 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ +; RUN: -mtriple powerpc-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=ASM32 %s + +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+altivec \ +; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s | \ +; RUN: FileCheck --check-prefixes=ASM64 %s + +define dso_local <4 x i32> @vec_callee(<4 x i32> %vec1, <4 x i32> %vec2, <4 x i32> %vec3, <4 x i32> %vec4, <4 x i32> %vec5, <4 x i32> %vec6, <4 x i32> %vec7, <4 x i32> %vec8, <4 x i32> %vec9, <4 x i32> %vec10, <4 x i32> %vec11, <4 x i32> %vec12, <4 x i32> %vec13, <4 x i32> %vec14) { +entry: + %add = add <4 x i32> %vec1, %vec2 + %add1 = add <4 x i32> %add, %vec3 + %add2 = add <4 x i32> %add1, %vec4 + %add3 = add <4 x i32> %add2, %vec5 + %add4 = add <4 x i32> %add3, %vec6 + %add5 = add <4 x i32> %add4, %vec7 + %add6 = add <4 x i32> %add5, %vec8 + %add7 = add <4 x i32> %add6, %vec9 + %add8 = add <4 x i32> %add7, %vec10 + %add9 = add <4 x i32> %add8, %vec11 + %add10 = add <4 x i32> %add9, %vec12 + %add11 = add <4 x i32> %add10, %vec13 + %add12 = add <4 x i32> %add11, %vec14 + ret <4 x i32> %add12 +} + +; ASM32-LABEL: vec_callee: + +; ASM32: # %bb.0: # %entry +; ASM32-DAG: vadduwm 2, 2, 3 +; ASM32-DAG: addi 3, 1, 224 +; ASM32-DAG: lxvw4x {{[0-9]+}}, 0, 3 +; ASM32-DAG: addi 3, 1, 240 +; ASM32-DAG: vadduwm 2, 2, 4 +; ASM32-DAG: lxvw4x {{[0-9]+}}, 0, 3 +; ASM32-DAG: vadduwm 2, 2, 5 +; ASM32-DAG: vadduwm 2, 2, 6 +; ASM32-DAG: vadduwm 2, 2, 7 +; ASM32-DAG: vadduwm 2, 2, 8 +; ASM32-DAG: vadduwm 2, 2, 9 +; ASM32-DAG: vadduwm 2, 2, 10 +; ASM32-DAG: vadduwm 2, 2, 11 +; ASM32-DAG: vadduwm 2, 2, 12 +; ASM32-DAG: vadduwm 2, 2, 13 +; ASM32-DAG: vadduwm 2, 2, {{[0-9]+}} +; ASM32-DAG: vadduwm 2, 2, {{[0-9]+}} +; ASM32: blr + +; ASM64-LABEL: vec_callee: + +; ASM64: # %bb.0: # %entry +; ASM64-NEXT: vadduwm 2, 2, 3 +; ASM64-NEXT: addi 3, 1, 240 +; ASM64-NEXT: lxvw4x {{[0-9]+}}, 0, 3 +; ASM64-NEXT: addi 3, 1, 256 +; ASM64-NEXT: vadduwm 2, 2, 4 +; ASM64-NEXT: lxvw4x {{[0-9]+}}, 0, 3 +; ASM64-NEXT: vadduwm 2, 2, 5 +; ASM64-NEXT: vadduwm 2, 2, 6 +; ASM64-NEXT: vadduwm 2, 2, 7 +; ASM64-NEXT: vadduwm 2, 2, 8 +; ASM64-NEXT: vadduwm 2, 2, 9 +; ASM64-NEXT: vadduwm 2, 2, 10 +; ASM64-NEXT: vadduwm 2, 2, 11 +; ASM64-NEXT: vadduwm 2, 2, 12 +; ASM64-NEXT: vadduwm 2, 2, 13 +; ASM64-NEXT: vadduwm 2, 2, {{[0-9]+}} +; ASM64-NEXT: vadduwm 2, 2, {{[0-9]+}} +; ASM64-NEXT: blr + +define dso_local i32 @vec_caller() { +entry: + %call = call <4 x i32> @vec_callee(<4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> , <4 x i32> ) + ret i32 0 +} + +; ASM32-LABEL: .vec_caller: +; ASM32-DAG: # %bb.0: # %entry +; ASM32-DAG: mflr 0 +; ASM32-DAG: stw 0, 8(1) +; ASM32-DAG: stwu 1, -256(1) +; ASM32-DAG: lwz 3, L..C0(2) +; ASM32-DAG: lxvw4x 34, 0, 3 +; ASM32-DAG: lwz 4, L..C1(2) +; ASM32-DAG: lxvw4x 35, 0, 4 +; ASM32-DAG: lwz 5, L..C2(2) +; ASM32-DAG: lxvw4x 36, 0, 5 +; ASM32-DAG: lwz 6, L..C3(2) +; ASM32-DAG: lxvw4x 37, 0, 6 +; ASM32-DAG: lwz 7, L..C4(2) +; ASM32-DAG: lxvw4x 38, 0, 7 +; ASM32-DAG: lwz 8, L..C5(2) +; ASM32-DAG: lxvw4x 39, 0, 8 +; ASM32-DAG: lwz 3, L..C6(2) +; ASM32-DAG: lxvw4x 40, 0, 3 +; ASM32-DAG: lwz 4, L..C7(2) +; ASM32-DAG: lxvw4x 41, 0, 4 +; ASM32-DAG: lwz 5, L..C8(2) +; ASM32-DAG: lxvw4x 42, 0, 5 +; ASM32-DAG: lwz 6, L..C9(2) +; ASM32-DAG: lxvw4x 43, 0, 6 +; ASM32-DAG: lwz 7, L..C10(2) +; ASM32-DAG: lxvw4x 44, 0, 7 +; ASM32-DAG: lwz 8, L..C11(2) +; ASM32-DAG: lxvw4x 45, 0, 8 +; ASM32-DAG: lwz 3, L..C12(2) +; ASM32-DAG: lxvw4x 0, 0, 3 +; ASM32-DAG: lwz 4, L..C13(2) +; ASM32-DAG: lxvw4x 1, 0, 4 +; ASM32-DAG: li 3, 240 +; ASM32-DAG: li 4, 224 +; ASM32-DAG: stxvw4x 0, 1, 3 +; ASM32-DAG: stxvw4x 1, 1, 4 +; ASM32-DAG: bl .vec_callee +; ASM32-DAG: li 3, 0 +; ASM32-DAG: addi 1, 1, 256 +; ASM32-DAG: lwz 0, 8(1) +; ASM32-DAG: mtlr 0 +; ASM32-DAG: blr + +; ASM64-LABEL: .vec_caller: + +; ASM64: # %bb.0: # %entry +; ASM64-DAG: mflr 0 +; ASM64-DAG: std 0, 16(1) +; ASM64-DAG: stdu 1, -272(1) +; ASM64-DAG: ld 3, L..C0(2) +; ASM64-DAG: lxvw4x 34, 0, 3 +; ASM64-DAG: ld 4, L..C1(2) +; ASM64-DAG: lxvw4x 35, 0, 4 +; ASM64-DAG: ld 5, L..C2(2) +; ASM64-DAG: lxvw4x 36, 0, 5 +; ASM64-DAG: ld 6, L..C3(2) +; ASM64-DAG: lxvw4x 37, 0, 6 +; ASM64-DAG: ld 7, L..C4(2) +; ASM64-DAG: lxvw4x 38, 0, 7 +; ASM64-DAG: ld 8, L..C5(2) +; ASM64-DAG: lxvw4x 39, 0, 8 +; ASM64-DAG: ld 3, L..C6(2) +; ASM64-DAG: lxvw4x 40, 0, 3 +; ASM64-DAG: ld 4, L..C7(2) +; ASM64-DAG: lxvw4x 41, 0, 4 +; ASM64-DAG: ld 5, L..C8(2) +; ASM64-DAG: lxvw4x 42, 0, 5 +; ASM64-DAG: ld 6, L..C9(2) +; ASM64-DAG: lxvw4x 43, 0, 6 +; ASM64-DAG: ld 7, L..C10(2) +; ASM64-DAG: lxvw4x 44, 0, 7 +; ASM64-DAG: ld 8, L..C11(2) +; ASM64-DAG: lxvw4x 45, 0, 8 +; ASM64-DAG: ld 3, L..C12(2) +; ASM64-DAG: lxvw4x 0, 0, 3 +; ASM64-DAG: ld 4, L..C13(2) +; ASM64-DAG: lxvw4x 1, 0, 4 +; ASM64-DAG: li 3, 256 +; ASM64-DAG: li 4, 240 +; ASM64-DAG: stxvw4x 0, 1, 3 +; ASM64-DAG: stxvw4x 1, 1, 4 +; ASM64-DAG: bl .vec_callee +; ASM64-DAG: li 3, 0 +; ASM64-DAG: addi 1, 1, 272 +; ASM64-DAG: ld 0, 16(1) +; ASM64-DAG: mtlr 0 +; ASM64: blr Index: llvm/test/CodeGen/PowerPC/aix-vector-return.ll =================================================================== --- llvm/test/CodeGen/PowerPC/aix-vector-return.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: not --crash llc --verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff \ -; RUN: -mattr=+altivec 2>&1 < %s | FileCheck %s - -; RUN: not --crash llc --verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff \ -; RUN: -mattr=+altivec 2>&1 < %s | FileCheck %s - -; CHECK: LLVM ERROR: Returning vector types not yet supported on AIX. - -define dso_local <4 x i32> @test() local_unnamed_addr #0 { - entry: - ret <4 x i32> zeroinitializer -}