Index: include/llvm/CodeGen/CallingConvLower.h =================================================================== --- include/llvm/CodeGen/CallingConvLower.h +++ include/llvm/CodeGen/CallingConvLower.h @@ -112,6 +112,23 @@ return Ret; } + // There is no need to differentiate between a pending CCValAssign and other + // kinds, as they are stored in a different list. + static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, + LocInfo HTP) { + return getReg(ValNo, ValVT, 0, LocVT, HTP); + } + + void convertToReg(unsigned RegNo) { + Loc = RegNo; + isMem = false; + } + + void convertToMem(unsigned Offset) { + Loc = Offset; + isMem = true; + } + unsigned getValNo() const { return ValNo; } MVT getValVT() const { return ValVT; } @@ -164,6 +181,7 @@ unsigned StackOffset; SmallVector UsedRegs; + SmallVector PendingLocs; // ByValInfo and SmallVector ByValRegs: // @@ -317,6 +335,31 @@ return Reg; } + /// AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive + /// registers. If this is not possible, return zero. Otherwise, return the first + /// register of the block that were allocated, marking the entire block as allocated. + unsigned AllocateRegBlock(const uint16_t *Regs, unsigned NumRegs, unsigned RegsRequired) { + for (unsigned StartIdx = 0; StartIdx <= NumRegs - RegsRequired; ++StartIdx) { + bool BlockAvailable = true; + // Check for already-allocated regs in this block + for (unsigned BlockIdx = 0; BlockIdx < RegsRequired; ++BlockIdx) { + if (isAllocated(Regs[StartIdx + BlockIdx])) { + BlockAvailable = false; + break; + } + } + if (BlockAvailable) { + // Mark the entire block as allocated + for (unsigned BlockIdx = 0; BlockIdx < RegsRequired; ++BlockIdx) { + MarkAllocated(Regs[StartIdx + BlockIdx]); + } + return Regs[StartIdx]; + } + } + // No block was available + return 0; + } + /// Version of AllocateReg with list of registers to be shadowed. unsigned AllocateReg(const MCPhysReg *Regs, const MCPhysReg *ShadowRegs, unsigned NumRegs) { @@ -411,6 +454,11 @@ ParmContext getCallOrPrologue() const { return CallOrPrologue; } + // Get list of pending assignments + SmallVectorImpl &getPendingLocs() { + return PendingLocs; + } + private: /// MarkAllocated - Mark a register and all of its aliases as allocated. void MarkAllocated(unsigned Reg); Index: include/llvm/Target/TargetCallingConv.h =================================================================== --- include/llvm/Target/TargetCallingConv.h +++ include/llvm/Target/TargetCallingConv.h @@ -47,8 +47,12 @@ static const uint64_t InAllocaOffs = 12; static const uint64_t OrigAlign = 0x1FULL<<27; static const uint64_t OrigAlignOffs = 27; - static const uint64_t ByValSize = 0xffffffffULL<<32; ///< Struct size + static const uint64_t ByValSize = 0x3fffffffULL<<32; ///< Struct size static const uint64_t ByValSizeOffs = 32; + static const uint64_t InConsecutiveRegsLast = 0x1ULL<<62; ///< Struct size + static const uint64_t InConsecutiveRegsLastOffs = 62; + static const uint64_t InConsecutiveRegs = 0x1ULL<<63; ///< Struct size + static const uint64_t InConsecutiveRegsOffs = 63; static const uint64_t One = 1ULL; ///< 1 of this type, for shifts @@ -80,6 +84,12 @@ bool isReturned() const { return Flags & Returned; } void setReturned() { Flags |= One << ReturnedOffs; } + bool isInConsecutiveRegs() const { return Flags & InConsecutiveRegs; } + void setInConsecutiveRegs() { Flags |= One << InConsecutiveRegsOffs; } + + bool isInConsecutiveRegsLast() const { return Flags & InConsecutiveRegsLast; } + void setInConsecutiveRegsLast() { Flags |= One << InConsecutiveRegsLastOffs; } + unsigned getByValAlign() const { return (unsigned) ((One << ((Flags & ByValAlign) >> ByValAlignOffs)) / 2); Index: include/llvm/Target/TargetCallingConv.td =================================================================== --- include/llvm/Target/TargetCallingConv.td +++ include/llvm/Target/TargetCallingConv.td @@ -42,6 +42,11 @@ class CCIfByVal : CCIf<"ArgFlags.isByVal()", A> { } +/// CCIfConsecutiveRegs - If the current argument has InConsecutiveRegs +/// parameter attribute, apply Action A. +class CCIfConsecutiveRegs : CCIf<"ArgFlags.isInConsecutiveRegs()", A> { +} + /// CCIfCC - Match if the current calling convention is 'CC'. class CCIfCC : CCIf {} Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -2233,6 +2233,15 @@ return VT.bitsLT(MinVT) ? MinVT : VT; } + /// For some targets, an LLVM struct type must be broken down into multiple + /// simple types, but the calling convention specifies that the entire struct + /// must be passed in a block of consecutive registers. + virtual bool + functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, + bool isVarArg) const { + return false; + } + /// Returns a 0 terminated array of registers that can be safely used as /// scratch registers. virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7128,8 +7128,13 @@ for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector ValueVTs; ComputeValueVTs(*this, Args[i].Ty, ValueVTs); - for (unsigned Value = 0, NumValues = ValueVTs.size(); - Value != NumValues; ++Value) { + Type *FinalType = Args[i].Ty; + if (Args[i].isByVal) + FinalType = cast(Args[i].Ty)->getElementType(); + bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( + FinalType, CLI.CallConv, CLI.IsVarArg); + for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; + ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); SDValue Op = SDValue(Args[i].Node.getNode(), @@ -7171,6 +7176,11 @@ } if (Args[i].isNest) Flags.setNest(); + if (NeedsRegBlock) { + Flags.setInConsecutiveRegs(); + if (Value == NumValues - 1) + Flags.setInConsecutiveRegsLast(); + } Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); @@ -7356,6 +7366,11 @@ ComputeValueVTs(*TLI, I->getType(), ValueVTs); bool isArgValueUsed = !I->use_empty(); unsigned PartBase = 0; + Type *FinalType = I->getType(); + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + FinalType = cast(FinalType)->getElementType(); + bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( + FinalType, F.getCallingConv(), F.isVarArg()); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; @@ -7397,6 +7412,11 @@ } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); + if (NeedsRegBlock) { + Flags.setInConsecutiveRegs(); + if (Value == NumValues - 1) + Flags.setInConsecutiveRegsLast(); + } Flags.setOrigAlign(OriginalAlignment); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); Index: lib/Target/ARM/ARMCallingConv.h =================================================================== --- lib/Target/ARM/ARMCallingConv.h +++ lib/Target/ARM/ARMCallingConv.h @@ -160,6 +160,96 @@ State); } +static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, + ARM::S4, ARM::S5, ARM::S6, ARM::S7, + ARM::S8, ARM::S9, ARM::S10, ARM::S11, + ARM::S12, ARM::S13, ARM::S14, ARM::S15 }; +static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; +static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; + +// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA +// has InConsecutiveRegs set, and that the last member also has +// InConsecutiveRegsLast set. We must process all members of the HA before +// we can allocate it, as we need to know the total number of registers that +// will be needed in order to (attempt to) allocate a contiguous block. +static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + SmallVectorImpl &PendingHAMembers = State.getPendingLocs(); + + // AAPCS HFAs must have 1-4 elements, all of the same type + assert(PendingHAMembers.size() < 4); + if (PendingHAMembers.size() > 0) + assert(PendingHAMembers[0].getLocVT() == LocVT); + + // Add the argument to the list to be allocated once we know the size of the + // HA + PendingHAMembers.push_back( + CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); + + if (ArgFlags.isInConsecutiveRegsLast()) { + assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 && + "Homogeneous aggregates must have between 1 and 4 members"); + + // Try to allocate a contiguous block of registers, each of the correct + // size to hold one member. + const uint16_t *RegList; + unsigned NumRegs; + switch (LocVT.SimpleTy) { + case MVT::f32: + RegList = SRegList; + NumRegs = 16; + break; + case MVT::f64: + RegList = DRegList; + NumRegs = 8; + break; + case MVT::v2f64: + RegList = QRegList; + NumRegs = 4; + break; + default: + llvm_unreachable("Unexpected member type for HA"); + break; + } + + unsigned RegResult = + State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size()); + + if (RegResult) { + for (SmallVectorImpl::iterator It = PendingHAMembers.begin(); + It != PendingHAMembers.end(); ++It) { + It->convertToReg(RegResult); + State.addLoc(*It); + ++RegResult; + } + PendingHAMembers.clear(); + return true; + } + + // Register allocation failed, fall back to the stack + + // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp) + for (unsigned regNo = 0; regNo < 16; ++regNo) + State.AllocateReg(SRegList[regNo]); + + unsigned Size = LocVT.getSizeInBits() / 8; + unsigned Align = LocVT.SimpleTy == MVT::v2f64 ? 8 : Size; + + for (auto It : PendingHAMembers) { + It.convertToMem(State.AllocateStack(Size, Align)); + State.addLoc(It); + } + + // All pending members have now been allocated + PendingHAMembers.clear(); + } + + // This will be allocated by the last member of the HA + return true; +} + } // End llvm namespace #endif Index: lib/Target/ARM/ARMCallingConv.td =================================================================== --- lib/Target/ARM/ARMCallingConv.td +++ lib/Target/ARM/ARMCallingConv.td @@ -174,6 +174,9 @@ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + // HFAs are passed in a contiguous block of registers, or on the stack + CCIfConsecutiveRegs>, + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -384,6 +384,11 @@ bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; + /// \brief Returns true if an argument of type Ty needs to be passed in a + /// contiguous block of registers in calling convention CallConv. + bool functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; + Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, @@ -424,6 +429,8 @@ SDValue &Root, SelectionDAG &DAG, SDLoc dl) const; + CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC, + bool isVarArg) const; CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, @@ -576,7 +583,6 @@ OtherModImm }; - namespace ARM { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -43,6 +43,7 @@ #include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" @@ -1211,40 +1212,58 @@ #include "ARMGenCallingConv.inc" -/// CCAssignFnForNode - Selects the correct CCAssignFn for a the -/// given CallingConvention value. -CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, - bool Return, - bool isVarArg) const { +/// getEffectiveCallingConv - Get the effective calling convention, taking into +/// account presence of floating point hardware and calling convention +/// limitations, such as support for variadic functions. +CallingConv::ID +ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, + bool isVarArg) const { switch (CC) { default: llvm_unreachable("Unsupported calling convention"); - case CallingConv::Fast: - if (Subtarget->hasVFP2() && !isVarArg) { - if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); - // For AAPCS ABI targets, just use VFP variant of the calling convention. - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - } - // Fallthrough - case CallingConv::C: { - // Use target triple & subtarget features to do actual dispatch. + case CallingConv::ARM_AAPCS: + case CallingConv::ARM_APCS: + case CallingConv::GHC: + return CC; + case CallingConv::ARM_AAPCS_VFP: + return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; + case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + return CallingConv::ARM_APCS; else if (Subtarget->hasVFP2() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && !isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; + case CallingConv::Fast: + if (!Subtarget->isAAPCS_ABI()) { + if (Subtarget->hasVFP2() && !isVarArg) + return CallingConv::Fast; + return CallingConv::ARM_APCS; + } else if (Subtarget->hasVFP2() && !isVarArg) + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; } - case CallingConv::ARM_AAPCS_VFP: - if (!isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - // Fallthrough - case CallingConv::ARM_AAPCS: - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); +} + +/// CCAssignFnForNode - Selects the correct CCAssignFn for the given +/// CallingConvention. +CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, + bool Return, + bool isVarArg) const { + switch (getEffectiveCallingConv(CC, isVarArg)) { + default: + llvm_unreachable("Unsupported calling convention"); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + case CallingConv::ARM_AAPCS: + return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + case CallingConv::ARM_AAPCS_VFP: + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + case CallingConv::Fast: + return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); case CallingConv::GHC: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); } @@ -10604,3 +10623,77 @@ Val, Strex->getFunctionType()->getParamType(0)), Addr); } + +enum HABaseType { + HA_UNKNOWN = 0, + HA_FLOAT, + HA_DOUBLE, + HA_VECT64, + HA_VECT128 +}; + +static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, + uint64_t &Members) { + if (const StructType *ST = dyn_cast(Ty)) { + for (unsigned i = 0; i < ST->getNumElements(); ++i) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers)) + return false; + Members += SubMembers; + } + } else if (const ArrayType *AT = dyn_cast(Ty)) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers)) + return false; + Members += SubMembers * AT->getNumElements(); + } else if (Ty->isFloatTy()) { + if (Base != HA_UNKNOWN && Base != HA_FLOAT) + return false; + Members = 1; + Base = HA_FLOAT; + } else if (Ty->isDoubleTy()) { + if (Base != HA_UNKNOWN && Base != HA_DOUBLE) + return false; + Members = 1; + Base = HA_DOUBLE; + } else if (const VectorType *VT = dyn_cast(Ty)) { + Members = 1; + switch (Base) { + case HA_FLOAT: + case HA_DOUBLE: + return false; + case HA_VECT64: + return VT->getBitWidth() == 64; + case HA_VECT128: + return VT->getBitWidth() == 128; + case HA_UNKNOWN: + switch (VT->getBitWidth()) { + case 64: + Base = HA_VECT64; + return true; + case 128: + Base = HA_VECT128; + return true; + default: + return false; + } + } + } + + return (Members > 0 && Members <= 4); +} + +/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. +bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { + if (getEffectiveCallingConv(CallConv, isVarArg) == + CallingConv::ARM_AAPCS_VFP) { + HABaseType Base = HA_UNKNOWN; + uint64_t Members = 0; + bool result = isHomogeneousAggregate(Ty, Base, Members); + DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n"); + return result; + } else { + return false; + } +} Index: test/CodeGen/ARM/aapcs-hfa.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/aapcs-hfa.ll @@ -0,0 +1,163 @@ +; RUN: llc < %s -float-abi=hard -debug-only arm-isel 2>&1 | FileCheck %s +; RUN: llc < %s -float-abi=soft -debug-only arm-isel 2>&1 | FileCheck %s --check-prefix=SOFT + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +target triple = "armv7-none--eabi" + +; SOFT-NOT: isHA + +; CHECK: isHA: 1 { float } +define void @f0b({ float } %a) { + ret void +} + +; CHECK: isHA: 1 { float, float } +define void @f1({ float, float } %a) { + ret void +} + +; CHECK: isHA: 1 { float, float, float } +define void @f1b({ float, float, float } %a) { + ret void +} + +; CHECK: isHA: 1 { float, float, float, float } +define void @f1c({ float, float, float, float } %a) { + ret void +} + +; CHECK: isHA: 0 { float, float, float, float, float } +define void @f2({ float, float, float, float, float } %a) { + ret void +} + +; CHECK: isHA: 1 { double } +define void @f3({ double } %a) { + ret void +} + +; CHECK: isHA: 1 { double, double, double, double } +define void @f4({ double, double, double, double } %a) { + ret void +} + +; CHECK: isHA: 0 { double, double, double, double, double } +define void @f5({ double, double, double, double, double } %a) { + ret void +} + +; CHECK: isHA: 0 { i32, i32 } +define void @f5b({ i32, i32 } %a) { + ret void +} + +; CHECK: isHA: 1 { [1 x float] } +define void @f6({ [1 x float] } %a) { + ret void +} + +; CHECK: isHA: 1 { [4 x float] } +define void @f7({ [4 x float] } %a) { + ret void +} + +; CHECK: isHA: 0 { [5 x float] } +define void @f8({ [5 x float] } %a) { + ret void +} + +; CHECK: isHA: 1 [1 x float] +define void @f6b([1 x float] %a) { + ret void +} + +; CHECK: isHA: 1 [4 x float] +define void @f7b([4 x float] %a) { + ret void +} + +; CHECK: isHA: 0 [5 x float] +define void @f8b([5 x float] %a) { + ret void +} + +; CHECK: isHA: 1 { [2 x float], [2 x float] } +define void @f9({ [2 x float], [2 x float] } %a) { + ret void +} + +; CHECK: isHA: 1 { [1 x float], [3 x float] } +define void @f9b({ [1 x float], [3 x float] } %a) { + ret void +} + +; CHECK: isHA: 0 { [3 x float], [3 x float] } +define void @f10({ [3 x float], [3 x float] } %a) { + ret void +} + +; CHECK: isHA: 1 { <2 x float> } +define void @f11({ <2 x float> } %a) { + ret void +} + +; CHECK: isHA: 0 { <3 x float> } +define void @f12({ <3 x float> } %a) { + ret void +} + +; CHECK: isHA: 1 { <4 x float> } +define void @f13({ <4 x float> } %a) { + ret void +} + +; CHECK: isHA: 1 { <2 x float>, <2 x float> } +define void @f15({ <2 x float>, <2 x float> } %a) { + ret void +} + +; CHECK: isHA: 0 { <2 x float>, float } +define void @f15b({ <2 x float>, float } %a) { + ret void +} + +; CHECK: isHA: 0 { <2 x float>, [2 x float] } +define void @f15c({ <2 x float>, [2 x float] } %a) { + ret void +} + +; CHECK: isHA: 0 { <2 x float>, <4 x float> } +define void @f16({ <2 x float>, <4 x float> } %a) { + ret void +} + +; CHECK: isHA: 1 { <2 x double> } +define void @f17({ <2 x double> } %a) { + ret void +} + +; CHECK: isHA: 1 { <2 x i32> } +define void @f18({ <2 x i32> } %a) { + ret void +} + +; CHECK: isHA: 1 { <2 x i64>, <4 x i32> } +define void @f19({ <2 x i64>, <4 x i32> } %a) { + ret void +} + +; CHECK: isHA: 1 { [4 x <4 x float>] } +define void @f20({ [4 x <4 x float>] } %a) { + ret void +} + +; CHECK: isHA: 0 { [5 x <4 x float>] } +define void @f21({ [5 x <4 x float>] } %a) { + ret void +} + +; CHECK-NOT: isHA +define void @f22({ float } %a, ...) { + ret void +} + Index: test/CodeGen/ARM/hfa-in-contiguous-registers.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/hfa-in-contiguous-registers.ll @@ -0,0 +1,94 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +target triple = "armv7-none--gnueabihf" + +%struct.s = type { float, float } +%union.t = type { [4 x float] } + +; Equivalent C code: +; struct s { float a; float b; }; +; float foo(float a, double b, struct s c) { return c.a; } +; Argument allocation: +; a -> s0 +; b -> d1 +; c -> s4, s5 +; s1 is unused +; return in s0 +define float @test1(float %a, double %b, %struct.s %c) { +entry: +; CHECK-LABEL: test1 +; CHECK: vmov.f32 s0, s4 +; CHECK-NOT: vmov.f32 s0, s1 + + %result = extractvalue %struct.s %c, 0 + ret float %result +} + +; Equivalent C code: +; union t { float a[4] }; +; float foo(float a, double b, union s c) { return c.a[0]; } +; Argument allocation: +; a -> s0 +; b -> d1 +; c -> s4..s7 +define float @test2(float %a, double %b, %union.t %c) #0 { +entry: +; CHECK-LABEL: test2 +; CHECK: vmov.f32 s0, s4 +; CHECK-NOT: vmov.f32 s0, s1 + + %result = extractvalue %union.t %c, 0, 0 + ret float %result +} + +; Equivalent C code: +; struct s { float a; float b; }; +; float foo(float a, double b, struct s c, float d) { return d; } +; Argument allocation: +; a -> s0 +; b -> d1 +; c -> s4, s5 +; d -> s1 +; return in s0 +define float @test3(float %a, double %b, %struct.s %c, float %d) { +entry: +; CHECK-LABEL: test3 +; CHECK: vmov.f32 s0, s1 +; CHECK-NOT: vmov.f32 s0, s5 + + ret float %d +} + +; Equivalent C code: +; struct s { float a; float b; }; +; float foo(struct s a, struct s b) { return b.b; } +; Argument allocation: +; a -> s0, s1 +; b -> s2, s3 +; return in s0 +define float @test4(%struct.s %a, %struct.s %b) { +entry: +; CHECK-LABEL: test4 +; CHECK: vmov.f32 s0, s3 + + %result = extractvalue %struct.s %b, 1 + ret float %result +} + +; Equivalent C code: +; struct s { float a; float b; }; +; float foo(struct s a, float b, struct s c) { return c.a; } +; Argument allocation: +; a -> s0, s1 +; b -> s2 +; c -> s3, s4 +; return in s0 +define float @test5(%struct.s %a, float %b, %struct.s %c) { +entry: +; CHECK-LABEL: test5 +; CHECK: vmov.f32 s0, s3 + + %result = extractvalue %struct.s %c, 0 + ret float %result +}