diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1101,16 +1101,7 @@ /// contiguous block of registers in calling convention CallConv. bool functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg, - const DataLayout &DL) const override { - // We support any array type as "consecutive" block in the parameter - // save area. The element type defines the alignment requirement and - // whether the argument should go in GPRs, FPRs, or VRs if available. - // - // Note that clang uses this capability both to implement the ELFv2 - // homogeneous float/vector aggregate ABI, and to avoid having to use - // "byval" when passing aggregates that might fully fit in registers. - return Ty->isArrayTy(); - } + const DataLayout &DL) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -34,6 +34,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -16769,6 +16770,27 @@ return PPC::createFastISel(FuncInfo, LibInfo); } +bool PPCTargetLowering::functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg, + const DataLayout &DL) const { + // We support any array type and splat struct type as "consecutive" block in + // the parameter save area. The element type defines the alignment requirement + // and whether the argument should go in GPRs, FPRs, or VRs if available. + // + // Note that clang uses this capability both to implement the ELFv2 + // homogeneous float/vector aggregate ABI, and to avoid having to use + // "byval" when passing aggregates that might fully fit in registers. + if (Ty->isArrayTy()) + return true; + + if (!Ty->isStructTy()) + return false; + + SmallVector ValueVTs; + ComputeValueVTs(*this, DL, Ty, ValueVTs); + return is_splat(ValueVTs); +} + // 'Inverted' means the FMA opcode after negating one multiplicand. // For example, (fma -a b c) = (fnmsub a b c) static unsigned invertFMAOpcode(unsigned Opc) { diff --git a/llvm/test/CodeGen/PowerPC/splat-struct.ll b/llvm/test/CodeGen/PowerPC/splat-struct.ll --- a/llvm/test/CodeGen/PowerPC/splat-struct.ll +++ b/llvm/test/CodeGen/PowerPC/splat-struct.ll @@ -9,20 +9,19 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: std r0, 16(r1) -; CHECK-NEXT: stdu r1, -144(r1) -; CHECK-NEXT: lfs f8, 140(r1) -; CHECK-NEXT: lfs f7, 136(r1) -; CHECK-NEXT: li r3, 10 -; CHECK-NEXT: lfs f6, 132(r1) -; CHECK-NEXT: lfs f5, 128(r1) -; CHECK-NEXT: std r3, 96(r1) -; CHECK-NEXT: lfs f4, 124(r1) -; CHECK-NEXT: lfs f3, 120(r1) -; CHECK-NEXT: lfs f2, 116(r1) -; CHECK-NEXT: lfs f1, 112(r1) +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: lfs f8, 60(r1) +; CHECK-NEXT: lfs f7, 56(r1) +; CHECK-NEXT: li r7, 10 +; CHECK-NEXT: lfs f6, 52(r1) +; CHECK-NEXT: lfs f5, 48(r1) +; CHECK-NEXT: lfs f4, 44(r1) +; CHECK-NEXT: lfs f3, 40(r1) +; CHECK-NEXT: lfs f2, 36(r1) +; CHECK-NEXT: lfs f1, 32(r1) ; CHECK-NEXT: bl bar ; CHECK-NEXT: nop -; CHECK-NEXT: addi r1, r1, 144 +; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr