diff --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td --- a/llvm/lib/Target/VE/VECallingConv.td +++ b/llvm/lib/Target/VE/VECallingConv.td @@ -14,6 +14,9 @@ // Aurora VE //===----------------------------------------------------------------------===// def CC_VE_C_Stack: CallingConv<[ + // F128 are assigned to the stack in 16-byte aligned units + CCIfType<[f128], CCAssignToStackWithShadow<16, 16, [SX7]>>, + // All of the rest are assigned to the stack in 8-byte aligned units. CCAssignToStack<0, 8> ]>; @@ -36,6 +39,14 @@ CCIfType<[i64, f64], CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + // long double --> pair of generic 64 bit registers + // + // NOTE: If Q1 is allocated while SX1 is free, llvm tries to allocate SX1 for + // following operands, this masks SX1 to avoid such behavior. + CCIfType<[f128], + CCAssignToRegWithShadow<[Q0, Q1, Q2, Q3], + [SX0, SX1, SX3, SX5]>>, + // Alternatively, they are assigned to the stack in 8-byte aligned units. CCDelegateTo ]>; @@ -53,6 +64,9 @@ // +------+------+ CCIfType<[f32], CCBitConvertToType>, + // F128 are assigned to the stack in 16-byte aligned units + CCIfType<[f128], CCAssignToStack<16, 16>>, + CCAssignToStack<0, 8> ]>; @@ -71,6 +85,11 @@ // --> generic 64 bit registers CCIfType<[i64, f64], CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, + + // long double --> pair of generic 64 bit registers + CCIfType<[f128], + CCAssignToRegWithShadow<[Q0, Q1, Q2, Q3], + [SX0, SX1, SX3, SX5]>>, ]>; // Callee-saved registers diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -80,8 +80,11 @@ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const; SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; /// } Custom Lower @@ -97,6 +100,7 @@ SelectionDAG &DAG) const; SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; /// Returns true if the target allows unaligned memory accesses of the diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -554,6 +554,15 @@ return Chain; } +bool VETargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { + // VE uses 64 bit addressing, so we need multiple instructions to generate + // an address. Folding address with offset increases the number of + // instructions, so that we disable it here. Offsets will be folded in + // the DAG combine later if it worth to do so. + return false; +} + /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. @@ -623,6 +632,7 @@ addRegisterClass(MVT::i64, &VE::I64RegClass); addRegisterClass(MVT::f32, &VE::F32RegClass); addRegisterClass(MVT::f64, &VE::I64RegClass); + addRegisterClass(MVT::f128, &VE::F128RegClass); /// Load & Store { for (MVT FPVT : MVT::fp_valuetypes()) { @@ -649,6 +659,7 @@ setOperationAction(ISD::BlockAddress, PtrVT, Custom); setOperationAction(ISD::GlobalAddress, PtrVT, Custom); setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); + setOperationAction(ISD::ConstantPool, PtrVT, Custom); /// VAARG handling { setOperationAction(ISD::VASTART, MVT::Other, Custom); @@ -719,6 +730,21 @@ } /// } Conversion + /// Floating-point Ops { + + // VE doesn't have fdiv of f128. + setOperationAction(ISD::FDIV, MVT::f128, Expand); + + // VE doesn't have load/store of f128, so use custom-lowering. + setOperationAction(ISD::LOAD, MVT::f128, Custom); + setOperationAction(ISD::STORE, MVT::f128, Custom); + + for (MVT FPVT : {MVT::f32, MVT::f64}) { + // f32 and f64 uses ConstantFP. f128 uses ConstantPool. + setOperationAction(ISD::ConstantFP, FPVT, Legal); + } + /// } Floating-point Ops + setStackPointerRegisterToSaveRestore(VE::SX11); // We have target-specific dag combine patterns for the following nodes: @@ -769,6 +795,10 @@ return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(), 0, TF); + if (const ConstantPoolSDNode *CP = dyn_cast(Op)) + return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0), + CP->getAlign(), CP->getOffset(), TF); + if (const ExternalSymbolSDNode *ES = dyn_cast(Op)) return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), TF); @@ -853,6 +883,11 @@ return makeAddress(Op, DAG); } +SDValue VETargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { + return makeAddress(Op, DAG); +} + SDValue VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const { @@ -903,6 +938,119 @@ return LowerToTLSGeneralDynamicModel(Op, DAG); } +// Lower a f128 load into two f64 loads. +static SDValue LowerLoadF128(SDValue Op, SelectionDAG &DAG) { + SDLoc dl(Op); + LoadSDNode *LdNode = dyn_cast(Op.getNode()); + assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type"); + unsigned alignment = LdNode->getAlign().value(); + if (alignment > 8) + alignment = 8; + + SDValue Lo64 = + DAG.getLoad(MVT::f64, dl, LdNode->getChain(), LdNode->getBasePtr(), + LdNode->getPointerInfo(), alignment, + LdNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + EVT addrVT = LdNode->getBasePtr().getValueType(); + SDValue HiPtr = DAG.getNode(ISD::ADD, dl, addrVT, LdNode->getBasePtr(), + DAG.getConstant(8, dl, addrVT)); + SDValue Hi64 = + DAG.getLoad(MVT::f64, dl, LdNode->getChain(), HiPtr, + LdNode->getPointerInfo(), alignment, + LdNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + + SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, dl, MVT::i32); + SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, dl, MVT::i32); + + // VE stores Hi64 to 8(addr) and Lo64 to 0(addr) + SDNode *InFP128 = + DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f128); + InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, MVT::f128, + SDValue(InFP128, 0), Hi64, SubRegEven); + InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, MVT::f128, + SDValue(InFP128, 0), Lo64, SubRegOdd); + SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1), + SDValue(Hi64.getNode(), 1)}; + SDValue OutChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); + SDValue Ops[2] = {SDValue(InFP128, 0), OutChain}; + return DAG.getMergeValues(Ops, dl); +} + +SDValue VETargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + LoadSDNode *LdNode = cast(Op.getNode()); + + SDValue BasePtr = LdNode->getBasePtr(); + if (isa(BasePtr.getNode())) { + // Do not expand store instruction with frame index here because of + // dependency problems. We expand it later in eliminateFrameIndex(). + return Op; + } + + EVT MemVT = LdNode->getMemoryVT(); + if (MemVT == MVT::f128) + return LowerLoadF128(Op, DAG); + + return Op; +} + +// Lower a f128 store into two f64 stores. +static SDValue LowerStoreF128(SDValue Op, SelectionDAG &DAG) { + SDLoc dl(Op); + StoreSDNode *StNode = dyn_cast(Op.getNode()); + assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type"); + + SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, dl, MVT::i32); + SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, dl, MVT::i32); + + SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i64, + StNode->getValue(), SubRegEven); + SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i64, + StNode->getValue(), SubRegOdd); + + unsigned alignment = StNode->getAlign().value(); + if (alignment > 8) + alignment = 8; + + // VE stores Hi64 to 8(addr) and Lo64 to 0(addr) + SDValue OutChains[2]; + OutChains[0] = + DAG.getStore(StNode->getChain(), dl, SDValue(Lo64, 0), + StNode->getBasePtr(), MachinePointerInfo(), alignment, + StNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + EVT addrVT = StNode->getBasePtr().getValueType(); + SDValue HiPtr = DAG.getNode(ISD::ADD, dl, addrVT, StNode->getBasePtr(), + DAG.getConstant(8, dl, addrVT)); + OutChains[1] = + DAG.getStore(StNode->getChain(), dl, SDValue(Hi64, 0), HiPtr, + MachinePointerInfo(), alignment, + StNode->isVolatile() ? MachineMemOperand::MOVolatile + : MachineMemOperand::MONone); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); +} + +SDValue VETargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + StoreSDNode *StNode = cast(Op.getNode()); + assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type"); + + SDValue BasePtr = StNode->getBasePtr(); + if (isa(BasePtr.getNode())) { + // Do not expand store instruction with frame index here because of + // dependency problems. We expand it later in eliminateFrameIndex(). + return Op; + } + + EVT MemVT = StNode->getMemoryVT(); + if (MemVT == MVT::f128) + return LowerStoreF128(Op, DAG); + + // Otherwise, ask llvm to expand it. + return SDValue(); +} + SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); VEMachineFunctionInfo *FuncInfo = MF.getInfo(); @@ -935,7 +1083,19 @@ SDValue Chain = VAList.getValue(1); SDValue NextPtr; - if (VT == MVT::f32) { + if (VT == MVT::f128) { + // VE f128 values must be stored with 16 bytes alignment. We doesn't + // know the actual alignment of VAList, so we take alignment of it + // dyanmically. + int Align = 16; + VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, + DAG.getConstant(Align - 1, DL, PtrVT)); + VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList, + DAG.getConstant(-Align, DL, PtrVT)); + // Increment the pointer, VAList, by 16 to the next vaarg. + NextPtr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL)); + } else if (VT == MVT::f32) { // float --> need special handling like below. // 0 4 // +------+------+ @@ -1034,12 +1194,18 @@ llvm_unreachable("Should not custom lower this!"); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); + case ISD::ConstantPool: + return LowerConstantPool(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return lowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::LOAD: + return LowerLOAD(Op, DAG); + case ISD::STORE: + return LowerSTORE(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -315,6 +315,34 @@ VE::F32RegClass.contains(Reg); } +static void copyPhysSubRegs(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + MCRegister DestReg, MCRegister SrcReg, bool KillSrc, + const MCInstrDesc &MCID, unsigned int numSubRegs, + const unsigned *subRegIdx, + const TargetRegisterInfo *TRI) { + MachineInstr *MovMI = nullptr; + + for (unsigned i = 0; i != numSubRegs; ++i) { + unsigned SubDest = TRI->getSubReg(DestReg, subRegIdx[i]); + unsigned SubSrc = TRI->getSubReg(SrcReg, subRegIdx[i]); + assert(SubDest && SubSrc && "Bad sub-register"); + + if (MCID.getOpcode() == VE::ORri) { + // generate "ORri, dest, src, 0" instruction. + MachineInstrBuilder MIB = + BuildMI(MBB, I, DL, MCID, SubDest).addReg(SubSrc).addImm(0); + MovMI = MIB.getInstr(); + } else { + llvm_unreachable("Unexpected reg-to-reg copy instruction"); + } + } + // Add implicit super-register defs and kills to the last MovMI. + MovMI->addRegisterDefined(DestReg, TRI); + if (KillSrc) + MovMI->addRegisterKilled(SrcReg, TRI, true); +} + void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, @@ -324,6 +352,12 @@ BuildMI(MBB, I, DL, get(VE::ORri), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)) .addImm(0); + } else if (VE::F128RegClass.contains(DestReg, SrcReg)) { + // Use two instructions. + const unsigned subRegIdx[] = {VE::sub_even, VE::sub_odd}; + unsigned int numSubRegs = 2; + copyPhysSubRegs(MBB, I, DL, DestReg, SrcReg, KillSrc, get(VE::ORri), + numSubRegs, subRegIdx, &getRegisterInfo()); } else { const TargetRegisterInfo *TRI = &getRegisterInfo(); dbgs() << "Impossible reg-to-reg copy from " << printReg(SrcReg, TRI) @@ -341,7 +375,8 @@ int &FrameIndex) const { if (MI.getOpcode() == VE::LDrii || // I64 MI.getOpcode() == VE::LDLSXrii || // I32 - MI.getOpcode() == VE::LDUrii // F32 + MI.getOpcode() == VE::LDUrii || // F32 + MI.getOpcode() == VE::LDQrii // F128 (pseudo) ) { if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0 && MI.getOperand(3).isImm() && @@ -362,7 +397,8 @@ int &FrameIndex) const { if (MI.getOpcode() == VE::STrii || // I64 MI.getOpcode() == VE::STLrii || // I32 - MI.getOpcode() == VE::STUrii // F32 + MI.getOpcode() == VE::STUrii || // F32 + MI.getOpcode() == VE::STQrii // F128 (pseudo) ) { if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0 && MI.getOperand(2).isImm() && @@ -411,6 +447,13 @@ .addImm(0) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO); + } else if (VE::F128RegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(VE::STQrii)) + .addFrameIndex(FI) + .addImm(0) + .addImm(0) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO); } else report_fatal_error("Can't store this register to stack slot"); } @@ -448,6 +491,12 @@ .addImm(0) .addImm(0) .addMemOperand(MMO); + } else if (VE::F128RegClass.hasSubClassEq(RC)) { + BuildMI(MBB, I, DL, get(VE::LDQrii), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addImm(0) + .addMemOperand(MMO); } else report_fatal_error("Can't load this register from stack slot"); } diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -898,6 +898,7 @@ //----------------------------------------------------------------------------- // Multiclass for generic RM instructions +let hasSideEffects = 0 in multiclass RMmopc, RegisterClass RC> { def rri : RM; @@ -986,6 +987,13 @@ let cx = 1, DecoderMethod = "DecodeLoadI32" in defm LD1BZX : LOADm<"ld1b.zx", 0x05, I32, i32, zextloadi8>; +// LDQ pseudo instructions +let mayLoad = 1, hasSideEffects = 0 in { + def LDQrii : Pseudo<(outs F128:$dest), (ins MEMrii:$addr), + "# pseudo ldq $dest, $addr", + [(set f128:$dest, (load ADDRrii:$addr))]>; +} + // Multiclass for store instructions. let mayStore = 1 in multiclass STOREm opc, RegisterClass RC, ValueType Ty, @@ -1031,6 +1039,13 @@ let DecoderMethod = "DecodeStoreI32" in defm ST1B : STOREm<"st1b", 0x15, I32, i32, truncstorei8>; +// STQ pseudo instructions +let mayStore = 1, hasSideEffects = 0 in { + def STQrii : Pseudo<(outs), (ins MEMrii:$addr, F128:$sx), + "# pseudo stq $sx, $addr", + [(store f128:$sx, ADDRrii:$addr)]>; +} + // Section 8.2.12 - DLDS let DecoderMethod = "DecodeLoadI64" in defm DLD : LOADm<"dld", 0x09, I64, i64, load>; @@ -1307,13 +1322,13 @@ defm FMINS : RRFm<"fmin.s", 0x3E, F32, f32, fminnum, simm7fp, mimmfp32>; // Section 8.7.7 - FAQ (Floating Add Quadruple) -defm FADDQ : RRFm<"fadd.q", 0x6C, F128, f128>; +defm FADDQ : RRFm<"fadd.q", 0x6C, F128, f128, fadd>; // Section 8.7.8 - FSQ (Floating Subtract Quadruple) -defm FSUBQ : RRFm<"fsub.q", 0x7C, F128, f128>; +defm FSUBQ : RRFm<"fsub.q", 0x7C, F128, f128, fsub>; // Section 8.7.9 - FMQ (Floating Subtract Quadruple) -defm FMULQ : RRFm<"fmul.q", 0x6D, F128, f128>; +defm FMULQ : RRFm<"fmul.q", 0x6D, F128, f128, fmul>; // Section 8.7.10 - FCQ (Floating Compare Quadruple) defm FCMPQ : RRNCbm<"fcmp.q", 0x7D, I64, f64, F128, f128, null_frag, simm7fp, @@ -1631,6 +1646,14 @@ defm : TRUNC64m; defm : TRUNC64m; +// Address calculation and its optimization +def : Pat<(VEhi tconstpool:$in), (LEASLzii 0, 0, tconstpool:$in)>; +def : Pat<(VElo tconstpool:$in), + (ANDrm (LEAzii 0, 0, tconstpool:$in), !add(32, 64))>; +def : Pat<(add (VEhi tconstpool:$in1), (VElo tconstpool:$in2)), + (LEASLrii (ANDrm (LEAzii 0, 0, tconstpool:$in2), !add(32, 64)), 0, + (tconstpool:$in1))>; + // Address calculation and its optimization def : Pat<(VEhi tglobaladdr:$in), (LEASLzii 0, 0, tglobaladdr:$in)>; def : Pat<(VElo tglobaladdr:$in), diff --git a/llvm/lib/Target/VE/VEMCInstLower.cpp b/llvm/lib/Target/VE/VEMCInstLower.cpp --- a/llvm/lib/Target/VE/VEMCInstLower.cpp +++ b/llvm/lib/Target/VE/VEMCInstLower.cpp @@ -51,6 +51,8 @@ break; return MCOperand::createReg(MO.getReg()); + case MachineOperand::MO_ConstantPoolIndex: + return LowerSymbolOperand(MI, MO, AP.GetCPISymbol(MO.getIndex()), AP); case MachineOperand::MO_ExternalSymbol: return LowerSymbolOperand( MI, MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP); diff --git a/llvm/lib/Target/VE/VERegisterInfo.cpp b/llvm/lib/Target/VE/VERegisterInfo.cpp --- a/llvm/lib/Target/VE/VERegisterInfo.cpp +++ b/llvm/lib/Target/VE/VERegisterInfo.cpp @@ -120,6 +120,38 @@ Offset += MI.getOperand(FIOperandNum + 2).getImm(); + if (MI.getOpcode() == VE::STQrii) { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + unsigned SrcReg = MI.getOperand(3).getReg(); + unsigned SrcHiReg = getSubReg(SrcReg, VE::sub_even); + unsigned SrcLoReg = getSubReg(SrcReg, VE::sub_odd); + // VE stores HiReg to 8(addr) and LoReg to 0(addr) + MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(VE::STrii)) + .addReg(FrameReg) + .addImm(0) + .addImm(0) + .addReg(SrcLoReg); + replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg); + MI.setDesc(TII.get(VE::STrii)); + MI.getOperand(3).setReg(SrcHiReg); + Offset += 8; + } else if (MI.getOpcode() == VE::LDQrii) { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned DestHiReg = getSubReg(DestReg, VE::sub_even); + unsigned DestLoReg = getSubReg(DestReg, VE::sub_odd); + // VE loads HiReg from 8(addr) and LoReg from 0(addr) + MachineInstr *StMI = + BuildMI(*MI.getParent(), II, dl, TII.get(VE::LDrii), DestLoReg) + .addReg(FrameReg) + .addImm(0) + .addImm(0); + replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg); + MI.setDesc(TII.get(VE::LDrii)); + MI.getOperand(0).setReg(DestHiReg); + Offset += 8; + } + replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg); } diff --git a/llvm/test/CodeGen/VE/call.ll b/llvm/test/CodeGen/VE/call.ll --- a/llvm/test/CodeGen/VE/call.ll +++ b/llvm/test/CodeGen/VE/call.ll @@ -1,5 +1,11 @@ ; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s +declare i32 @sample_add(i32, i32) +declare i32 @stack_callee_int(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) +declare i32 @stack_callee_int_szext(i1 signext, i8 zeroext, i32, i32, i32, i32, i32, i32, i16 zeroext, i8 signext) +declare float @stack_callee_float(float, float, float, float, float, float, float, float, float, float) +declare void @test(i64) + define i32 @sample_call() { ; CHECK-LABEL: sample_call: ; CHECK: .LBB{{[0-9]+}}_2: @@ -14,8 +20,6 @@ ret i32 %r } -declare i32 @sample_add(i32, i32) - define i32 @stack_call_int() { ; CHECK-LABEL: stack_call_int: ; CHECK: .LBB{{[0-9]+}}_2: @@ -40,8 +44,6 @@ ret i32 %r } -declare i32 @stack_callee_int(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) - define i32 @stack_call_int_szext() { ; CHECK-LABEL: stack_call_int_szext: ; CHECK: .LBB{{[0-9]+}}_2: @@ -65,8 +67,6 @@ ret i32 %r } -declare i32 @stack_callee_int_szext(i1 signext, i8 zeroext, i32, i32, i32, i32, i32, i32, i16 zeroext, i8 signext) - define float @stack_call_float() { ; CHECK-LABEL: stack_call_float: ; CHECK: .LBB{{[0-9]+}}_2: @@ -91,8 +91,6 @@ ret float %r } -declare float @stack_callee_float(float, float, float, float, float, float, float, float, float, float) - define float @stack_call_float2(float %p0) { ; CHECK-LABEL: stack_call_float2: ; CHECK: .LBB{{[0-9]+}}_2: diff --git a/llvm/test/CodeGen/VE/fp_add.ll b/llvm/test/CodeGen/VE/fp_add.ll --- a/llvm/test/CodeGen/VE/fp_add.ll +++ b/llvm/test/CodeGen/VE/fp_add.ll @@ -18,6 +18,15 @@ ret double %r } +define fp128 @func3(fp128 %a, fp128 %b) { +; CHECK-LABEL: func3: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fadd.q %s0, %s0, %s2 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fadd fp128 %a, %b + ret fp128 %r +} + define float @func4(float %a) { ; CHECK-LABEL: func4: ; CHECK: .LBB{{[0-9]+}}_2: @@ -38,6 +47,20 @@ ret double %r } +define fp128 @func6(fp128 %a) { +; CHECK-LABEL: func6: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, .LCPI{{[0-9]+}}_0@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, .LCPI{{[0-9]+}}_0@hi(, %s2) +; CHECK-NEXT: ld %s4, 8(, %s2) +; CHECK-NEXT: ld %s5, (, %s2) +; CHECK-NEXT: fadd.q %s0, %s0, %s4 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fadd fp128 %a, 0xL00000000000000004001400000000000 + ret fp128 %r +} + define float @func7(float %a) { ; CHECK-LABEL: func7: ; CHECK: .LBB{{[0-9]+}}_2: @@ -60,6 +83,20 @@ ret double %r } +define fp128 @func9(fp128 %a) { +; CHECK-LABEL: func9: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, .LCPI{{[0-9]+}}_0@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, .LCPI{{[0-9]+}}_0@hi(, %s2) +; CHECK-NEXT: ld %s4, 8(, %s2) +; CHECK-NEXT: ld %s5, (, %s2) +; CHECK-NEXT: fadd.q %s0, %s0, %s4 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fadd fp128 %a, 0xLFFFFFFFFFFFFFFFF7FFEFFFFFFFFFFFF + ret fp128 %r +} + define float @fadds_imm(float %a) { ; CHECK-LABEL: fadds_imm: ; CHECK: .LBB{{[0-9]+}}_2: @@ -77,3 +114,17 @@ %r = fadd double %a, -2.e+00 ret double %r } + +define fp128 @faddq_imm(fp128 %a) { +; CHECK-LABEL: faddq_imm: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, .LCPI{{[0-9]+}}_0@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, .LCPI{{[0-9]+}}_0@hi(, %s2) +; CHECK-NEXT: ld %s4, 8(, %s2) +; CHECK-NEXT: ld %s5, (, %s2) +; CHECK-NEXT: fadd.q %s0, %s0, %s4 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fadd fp128 %a, 0xLA0000000000000000000000000000000 + ret fp128 %r +} diff --git a/llvm/test/CodeGen/VE/fp_div.ll b/llvm/test/CodeGen/VE/fp_div.ll --- a/llvm/test/CodeGen/VE/fp_div.ll +++ b/llvm/test/CodeGen/VE/fp_div.ll @@ -18,6 +18,18 @@ ret double %r } +define fp128 @func3(fp128 %a, fp128 %b) { +; CHECK-LABEL: func3: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s4, __divtf3@lo +; CHECK-NEXT: and %s4, %s4, (32)0 +; CHECK-NEXT: lea.sl %s12, __divtf3@hi(, %s4) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 + %r = fdiv fp128 %a, %b + ret fp128 %r +} + define float @func4(float %a) { ; CHECK-LABEL: func4: ; CHECK: .LBB{{[0-9]+}}_2: @@ -38,6 +50,23 @@ ret double %r } +define fp128 @func6(fp128 %a) { +; CHECK-LABEL: func6: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, .LCPI{{[0-9]+}}_0@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s4, .LCPI{{[0-9]+}}_0@hi(, %s2) +; CHECK-NEXT: ld %s2, 8(, %s4) +; CHECK-NEXT: ld %s3, (, %s4) +; CHECK-NEXT: lea %s4, __divtf3@lo +; CHECK-NEXT: and %s4, %s4, (32)0 +; CHECK-NEXT: lea.sl %s12, __divtf3@hi(, %s4) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 + %r = fdiv fp128 %a, 0xL00000000000000004001400000000000 + ret fp128 %r +} + define float @func7(float %a) { ; CHECK-LABEL: func7: ; CHECK: .LBB{{[0-9]+}}_2: @@ -59,3 +88,21 @@ %r = fdiv double %a, 0x7FEFFFFFFFFFFFFF ret double %r } + +; Function Attrs: norecurse nounwind readnone +define fp128 @func9(fp128 %a) { +; CHECK-LABEL: func9: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, .LCPI{{[0-9]+}}_0@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s4, .LCPI{{[0-9]+}}_0@hi(, %s2) +; CHECK-NEXT: ld %s2, 8(, %s4) +; CHECK-NEXT: ld %s3, (, %s4) +; CHECK-NEXT: lea %s4, __divtf3@lo +; CHECK-NEXT: and %s4, %s4, (32)0 +; CHECK-NEXT: lea.sl %s12, __divtf3@hi(, %s4) +; CHECK-NEXT: bsic %s10, (, %s12) +; CHECK-NEXT: or %s11, 0, %s9 + %r = fdiv fp128 %a, 0xLFFFFFFFFFFFFFFFF7FFEFFFFFFFFFFFF + ret fp128 %r +} diff --git a/llvm/test/CodeGen/VE/fp_mul.ll b/llvm/test/CodeGen/VE/fp_mul.ll --- a/llvm/test/CodeGen/VE/fp_mul.ll +++ b/llvm/test/CodeGen/VE/fp_mul.ll @@ -18,6 +18,15 @@ ret double %r } +define fp128 @func3(fp128 %a, fp128 %b) { +; CHECK-LABEL: func3: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fmul.q %s0, %s0, %s2 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fmul fp128 %a, %b + ret fp128 %r +} + define float @func4(float %a) { ; CHECK-LABEL: func4: ; CHECK: .LBB{{[0-9]+}}_2: @@ -38,6 +47,20 @@ ret double %r } +define fp128 @func6(fp128 %a) { +; CHECK-LABEL: func6: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, .LCPI{{[0-9]+}}_0@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, .LCPI{{[0-9]+}}_0@hi(, %s2) +; CHECK-NEXT: ld %s4, 8(, %s2) +; CHECK-NEXT: ld %s5, (, %s2) +; CHECK-NEXT: fmul.q %s0, %s0, %s4 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fmul fp128 %a, 0xL00000000000000004001400000000000 + ret fp128 %r +} + define float @func7(float %a) { ; CHECK-LABEL: func7: ; CHECK: .LBB{{[0-9]+}}_2: @@ -60,6 +83,20 @@ ret double %r } +define fp128 @func9(fp128 %a) { +; CHECK-LABEL: func9: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, .LCPI{{[0-9]+}}_0@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, .LCPI{{[0-9]+}}_0@hi(, %s2) +; CHECK-NEXT: ld %s4, 8(, %s2) +; CHECK-NEXT: ld %s5, (, %s2) +; CHECK-NEXT: fmul.q %s0, %s0, %s4 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fmul fp128 %a, 0xLFFFFFFFFFFFFFFFF7FFEFFFFFFFFFFFF + ret fp128 %r +} + define float @fmuls_ir(float %a) { ; CHECK-LABEL: fmuls_ir: ; CHECK: .LBB{{[0-9]+}}_2: @@ -95,4 +132,3 @@ %r = fmul float %a, 1.175494210692441075487029444849287348827052428745893333857174530571588870475618904265502351336181163787841796875E-38 ret float %r } - diff --git a/llvm/test/CodeGen/VE/fp_sub.ll b/llvm/test/CodeGen/VE/fp_sub.ll --- a/llvm/test/CodeGen/VE/fp_sub.ll +++ b/llvm/test/CodeGen/VE/fp_sub.ll @@ -18,6 +18,15 @@ ret double %r } +define fp128 @func3(fp128 %a, fp128 %b) { +; CHECK-LABEL: func3: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: fsub.q %s0, %s0, %s2 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fsub fp128 %a, %b + ret fp128 %r +} + define float @func4(float %a) { ; CHECK-LABEL: func4: ; CHECK: .LBB{{[0-9]+}}_2: @@ -38,6 +47,20 @@ ret double %r } +define fp128 @func6(fp128 %a) { +; CHECK-LABEL: func6: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, .LCPI{{[0-9]+}}_0@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, .LCPI{{[0-9]+}}_0@hi(, %s2) +; CHECK-NEXT: ld %s4, 8(, %s2) +; CHECK-NEXT: ld %s5, (, %s2) +; CHECK-NEXT: fadd.q %s0, %s0, %s4 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fadd fp128 %a, 0xL0000000000000000C001400000000000 + ret fp128 %r +} + define float @func7(float %a) { ; CHECK-LABEL: func7: ; CHECK: .LBB{{[0-9]+}}_2: @@ -60,6 +83,20 @@ ret double %r } +define fp128 @func9(fp128 %a) { +; CHECK-LABEL: func9: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, .LCPI{{[0-9]+}}_0@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, .LCPI{{[0-9]+}}_0@hi(, %s2) +; CHECK-NEXT: ld %s4, 8(, %s2) +; CHECK-NEXT: ld %s5, (, %s2) +; CHECK-NEXT: fadd.q %s0, %s0, %s4 +; CHECK-NEXT: or %s11, 0, %s9 + %r = fadd fp128 %a, 0xLFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFF + ret fp128 %r +} + define float @fsubs_ir(float %a) { ; CHECK-LABEL: fsubs_ir: ; CHECK: .LBB{{[0-9]+}}_2: diff --git a/llvm/test/CodeGen/VE/load.ll b/llvm/test/CodeGen/VE/load.ll --- a/llvm/test/CodeGen/VE/load.ll +++ b/llvm/test/CodeGen/VE/load.ll @@ -1,5 +1,18 @@ ; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s +; Function Attrs: norecurse nounwind readonly +define fp128 @loadf128(fp128* nocapture readonly %0) { +; CHECK-LABEL: loadf128: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ld %s2, 8(, %s0) +; CHECK-NEXT: ld %s3, (, %s0) +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: or %s1, 0, %s3 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = load fp128, fp128* %0, align 16 + ret fp128 %2 +} + ; Function Attrs: norecurse nounwind readonly define double @loadf64(double* nocapture readonly %0) { ; CHECK-LABEL: loadf64: @@ -20,6 +33,18 @@ ret float %2 } +; Function Attrs: norecurse nounwind readonly +define i128 @loadi128(i128* nocapture readonly %0) { +; CHECK-LABEL: loadi128: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ld %s2, (, %s0) +; CHECK-NEXT: ld %s1, 8(, %s0) +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: or %s11, 0, %s9 + %2 = load i128, i128* %0, align 16 + ret i128 %2 +} + ; Function Attrs: norecurse nounwind readonly define i64 @loadi64(i64* nocapture readonly %0) { ; CHECK-LABEL: loadi64: @@ -126,6 +151,18 @@ ret i64 %3 } +; Function Attrs: norecurse nounwind readonly +define fp128 @loadf128stk() { +; CHECK-LABEL: loadf128stk: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ld %s1, 176(, %s11) +; CHECK-NEXT: ld %s0, 184(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 + %addr = alloca fp128, align 16 + %1 = load fp128, fp128* %addr, align 16 + ret fp128 %1 +} + ; Function Attrs: norecurse nounwind readonly define double @loadf64stk() { ; CHECK-LABEL: loadf64stk: @@ -148,6 +185,18 @@ ret float %1 } +; Function Attrs: norecurse nounwind readonly +define i128 @loadi128stk() { +; CHECK-LABEL: loadi128stk: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ld %s0, 176(, %s11) +; CHECK-NEXT: ld %s1, 184(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 + %addr = alloca i128, align 16 + %1 = load i128, i128* %addr, align 16 + ret i128 %1 +} + ; Function Attrs: norecurse nounwind readonly define i64 @loadi64stk() { ; CHECK-LABEL: loadi64stk: diff --git a/llvm/test/CodeGen/VE/load_gv.ll b/llvm/test/CodeGen/VE/load_gv.ll --- a/llvm/test/CodeGen/VE/load_gv.ll +++ b/llvm/test/CodeGen/VE/load_gv.ll @@ -4,8 +4,24 @@ @vi16 = common dso_local local_unnamed_addr global i16 0, align 2 @vi32 = common dso_local local_unnamed_addr global i32 0, align 4 @vi64 = common dso_local local_unnamed_addr global i64 0, align 8 +@vi128 = common dso_local local_unnamed_addr global i128 0, align 16 @vf32 = common dso_local local_unnamed_addr global float 0.000000e+00, align 4 @vf64 = common dso_local local_unnamed_addr global double 0.000000e+00, align 8 +@vf128 = common dso_local local_unnamed_addr global fp128 0xL00000000000000000000000000000000, align 16 + +; Function Attrs: norecurse nounwind readonly +define fp128 @loadf128com() { +; CHECK-LABEL: loadf128com: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, vf128@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s2, vf128@hi(, %s0) +; CHECK-NEXT: ld %s0, 8(, %s2) +; CHECK-NEXT: ld %s1, (, %s2) +; CHECK-NEXT: or %s11, 0, %s9 + %1 = load fp128, fp128* @vf128, align 16 + ret fp128 %1 +} ; Function Attrs: norecurse nounwind readonly define double @loadf64com() { @@ -33,6 +49,20 @@ ret float %1 } +; Function Attrs: norecurse nounwind readonly +define i128 @loadi128com() { +; CHECK-LABEL: loadi128com: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, vi128@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s1, vi128@hi(, %s0) +; CHECK-NEXT: ld %s0, (, %s1) +; CHECK-NEXT: ld %s1, 8(, %s1) +; CHECK-NEXT: or %s11, 0, %s9 + %1 = load i128, i128* @vi128, align 16 + ret i128 %1 +} + ; Function Attrs: norecurse nounwind readonly define i64 @loadi64com() { ; CHECK-LABEL: loadi64com: diff --git a/llvm/test/CodeGen/VE/load_off.ll b/llvm/test/CodeGen/VE/load_off.ll --- a/llvm/test/CodeGen/VE/load_off.ll +++ b/llvm/test/CodeGen/VE/load_off.ll @@ -13,10 +13,10 @@ define signext i8 @loadi8s() { ; CHECK-LABEL: loadi8s: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: lea %s0, bufi8+2@lo +; CHECK-NEXT: lea %s0, bufi8@lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, bufi8+2@hi(, %s0) -; CHECK-NEXT: ld1b.sx %s0, (, %s0) +; CHECK-NEXT: lea.sl %s0, bufi8@hi(, %s0) +; CHECK-NEXT: ld1b.sx %s0, 2(, %s0) ; CHECK-NEXT: or %s11, 0, %s9 entry: %0 = load i8, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @bufi8, i64 0, i64 2), align 1 @@ -27,10 +27,10 @@ define signext i16 @loadi16s() { ; CHECK-LABEL: loadi16s: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: lea %s0, bufi16+4@lo +; CHECK-NEXT: lea %s0, bufi16@lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, bufi16+4@hi(, %s0) -; CHECK-NEXT: ld2b.sx %s0, (, %s0) +; CHECK-NEXT: lea.sl %s0, bufi16@hi(, %s0) +; CHECK-NEXT: ld2b.sx %s0, 4(, %s0) ; CHECK-NEXT: or %s11, 0, %s9 entry: %0 = load i16, i16* getelementptr inbounds ([3 x i16], [3 x i16]* @bufi16, i64 0, i64 2), align 2 @@ -41,10 +41,10 @@ define signext i32 @loadi32s() { ; CHECK-LABEL: loadi32s: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: lea %s0, bufi32+8@lo +; CHECK-NEXT: lea %s0, bufi32@lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, bufi32+8@hi(, %s0) -; CHECK-NEXT: ldl.sx %s0, (, %s0) +; CHECK-NEXT: lea.sl %s0, bufi32@hi(, %s0) +; CHECK-NEXT: ldl.sx %s0, 8(, %s0) ; CHECK-NEXT: or %s11, 0, %s9 entry: %0 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bufi32, i64 0, i64 2), align 4 @@ -55,10 +55,10 @@ define i64 @loadi64s() { ; CHECK-LABEL: loadi64s: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: lea %s0, bufi64+16@lo +; CHECK-NEXT: lea %s0, bufi64@lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, bufi64+16@hi(, %s0) -; CHECK-NEXT: ld %s0, (, %s0) +; CHECK-NEXT: lea.sl %s0, bufi64@hi(, %s0) +; CHECK-NEXT: ld %s0, 16(, %s0) ; CHECK-NEXT: or %s11, 0, %s9 entry: %0 = load i64, i64* getelementptr inbounds ([3 x i64], [3 x i64]* @bufi64, i64 0, i64 2), align 8 @@ -69,14 +69,11 @@ define i128 @loadi128s() { ; CHECK-LABEL: loadi128s: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: lea %s0, bufi128+32@lo +; CHECK-NEXT: lea %s0, bufi128@lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, bufi128+32@hi(, %s0) -; CHECK-NEXT: ld %s0, (, %s0) -; CHECK-NEXT: lea %s1, bufi128+40@lo -; CHECK-NEXT: and %s1, %s1, (32)0 -; CHECK-NEXT: lea.sl %s1, bufi128+40@hi(, %s1) -; CHECK-NEXT: ld %s1, (, %s1) +; CHECK-NEXT: lea.sl %s1, bufi128@hi(, %s0) +; CHECK-NEXT: ld %s0, 32(, %s1) +; CHECK-NEXT: ld %s1, 40(, %s1) ; CHECK-NEXT: or %s11, 0, %s9 entry: %0 = load i128, i128* getelementptr inbounds ([3 x i128], [3 x i128]* @bufi128, i64 0, i64 2), align 16 @@ -87,10 +84,10 @@ define zeroext i8 @loadi8z() { ; CHECK-LABEL: loadi8z: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: lea %s0, bufi8+2@lo +; CHECK-NEXT: lea %s0, bufi8@lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, bufi8+2@hi(, %s0) -; CHECK-NEXT: ld1b.zx %s0, (, %s0) +; CHECK-NEXT: lea.sl %s0, bufi8@hi(, %s0) +; CHECK-NEXT: ld1b.zx %s0, 2(, %s0) ; CHECK-NEXT: or %s11, 0, %s9 entry: %0 = load i8, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @bufi8, i64 0, i64 2), align 1 @@ -101,10 +98,10 @@ define zeroext i16 @loadi16z() { ; CHECK-LABEL: loadi16z: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: lea %s0, bufi16+4@lo +; CHECK-NEXT: lea %s0, bufi16@lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, bufi16+4@hi(, %s0) -; CHECK-NEXT: ld2b.zx %s0, (, %s0) +; CHECK-NEXT: lea.sl %s0, bufi16@hi(, %s0) +; CHECK-NEXT: ld2b.zx %s0, 4(, %s0) ; CHECK-NEXT: or %s11, 0, %s9 entry: %0 = load i16, i16* getelementptr inbounds ([3 x i16], [3 x i16]* @bufi16, i64 0, i64 2), align 2 @@ -115,10 +112,10 @@ define zeroext i32 @loadi32z() { ; CHECK-LABEL: loadi32z: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: lea %s0, bufi32+8@lo +; CHECK-NEXT: lea %s0, bufi32@lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, bufi32+8@hi(, %s0) -; CHECK-NEXT: ldl.zx %s0, (, %s0) +; CHECK-NEXT: lea.sl %s0, bufi32@hi(, %s0) +; CHECK-NEXT: ldl.zx %s0, 8(, %s0) ; CHECK-NEXT: or %s11, 0, %s9 entry: %0 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bufi32, i64 0, i64 2), align 4 @@ -129,10 +126,10 @@ define i64 @loadi64z() { ; CHECK-LABEL: loadi64z: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: lea %s0, bufi64+16@lo +; CHECK-NEXT: lea %s0, bufi64@lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, bufi64+16@hi(, %s0) -; CHECK-NEXT: ld %s0, (, %s0) +; CHECK-NEXT: lea.sl %s0, bufi64@hi(, %s0) +; CHECK-NEXT: ld %s0, 16(, %s0) ; CHECK-NEXT: or %s11, 0, %s9 entry: %0 = load i64, i64* getelementptr inbounds ([3 x i64], [3 x i64]* @bufi64, i64 0, i64 2), align 8 @@ -143,14 +140,11 @@ define i128 @loadi128z() { ; CHECK-LABEL: loadi128z: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: lea %s0, bufi128+32@lo +; CHECK-NEXT: lea %s0, bufi128@lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, bufi128+32@hi(, %s0) -; CHECK-NEXT: ld %s0, (, %s0) -; CHECK-NEXT: lea %s1, bufi128+40@lo -; CHECK-NEXT: and %s1, %s1, (32)0 -; CHECK-NEXT: lea.sl %s1, bufi128+40@hi(, %s1) -; CHECK-NEXT: ld %s1, (, %s1) +; CHECK-NEXT: lea.sl %s1, bufi128@hi(, %s0) +; CHECK-NEXT: ld %s0, 32(, %s1) +; CHECK-NEXT: ld %s1, 40(, %s1) ; CHECK-NEXT: or %s11, 0, %s9 entry: %0 = load i128, i128* getelementptr inbounds ([3 x i128], [3 x i128]* @bufi128, i64 0, i64 2), align 16 @@ -161,10 +155,10 @@ define float @loadf32() { ; CHECK-LABEL: loadf32: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: lea %s0, buff32+8@lo +; CHECK-NEXT: lea %s0, buff32@lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, buff32+8@hi(, %s0) -; CHECK-NEXT: ldu %s0, (, %s0) +; CHECK-NEXT: lea.sl %s0, buff32@hi(, %s0) +; CHECK-NEXT: ldu %s0, 8(, %s0) ; CHECK-NEXT: or %s11, 0, %s9 entry: %0 = load float, float* getelementptr inbounds ([3 x float], [3 x float]* @buff32, i64 0, i64 2), align 4 @@ -175,10 +169,10 @@ define double @loadf64() { ; CHECK-LABEL: loadf64: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: lea %s0, buff64+16@lo +; CHECK-NEXT: lea %s0, buff64@lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, buff64+16@hi(, %s0) -; CHECK-NEXT: ld %s0, (, %s0) +; CHECK-NEXT: lea.sl %s0, buff64@hi(, %s0) +; CHECK-NEXT: ld %s0, 16(, %s0) ; CHECK-NEXT: or %s11, 0, %s9 entry: %0 = load double, double* getelementptr inbounds ([3 x double], [3 x double]* @buff64, i64 0, i64 2), align 8 @@ -189,14 +183,11 @@ define fp128 @loadf128() { ; CHECK-LABEL: loadf128: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: lea %s0, buff128+32@lo +; CHECK-NEXT: lea %s0, buff128@lo ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s0, buff128+32@hi(, %s0) -; CHECK-NEXT: ld %s0, (, %s0) -; CHECK-NEXT: lea %s1, buff128+40@lo -; CHECK-NEXT: and %s1, %s1, (32)0 -; CHECK-NEXT: lea.sl %s1, buff128+40@hi(, %s1) -; CHECK-NEXT: ld %s1, (, %s1) +; CHECK-NEXT: lea.sl %s2, buff128@hi(, %s0) +; CHECK-NEXT: ld %s0, 40(, %s2) +; CHECK-NEXT: ld %s1, 32(, %s2) ; CHECK-NEXT: or %s11, 0, %s9 entry: %0 = load fp128, fp128* getelementptr inbounds ([3 x fp128], [3 x fp128]* @buff128, i64 0, i64 2), align 16 diff --git a/llvm/test/CodeGen/VE/store.ll b/llvm/test/CodeGen/VE/store.ll --- a/llvm/test/CodeGen/VE/store.ll +++ b/llvm/test/CodeGen/VE/store.ll @@ -1,5 +1,16 @@ ; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s +; Function Attrs: norecurse nounwind readonly +define void @storef128(fp128* nocapture %0, fp128 %1) { +; CHECK-LABEL: storef128: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: st %s2, 8(, %s0) +; CHECK-NEXT: st %s3, (, %s0) +; CHECK-NEXT: or %s11, 0, %s9 + store fp128 %1, fp128* %0, align 16 + ret void +} + ; Function Attrs: norecurse nounwind readonly define void @storef64(double* nocapture %0, double %1) { ; CHECK-LABEL: storef64: @@ -20,6 +31,17 @@ ret void } +; Function Attrs: norecurse nounwind readonly +define void @storei128(i128* nocapture %0, i128 %1) { +; CHECK-LABEL: storei128: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: st %s2, 8(, %s0) +; CHECK-NEXT: st %s1, (, %s0) +; CHECK-NEXT: or %s11, 0, %s9 + store i128 %1, i128* %0, align 16 + ret void +} + ; Function Attrs: norecurse nounwind readonly define void @storei64(i64* nocapture %0, i64 %1) { ; CHECK-LABEL: storei64: @@ -93,6 +115,18 @@ ret void } +; Function Attrs: norecurse nounwind readonly +define void @storef128stk(fp128 %0) { +; CHECK-LABEL: storef128stk: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: st %s1, 176(, %s11) +; CHECK-NEXT: st %s0, 184(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 + %addr = alloca fp128, align 16 + store fp128 %0, fp128* %addr, align 16 + ret void +} + ; Function Attrs: norecurse nounwind readonly define void @storef64stk(double %0) { ; CHECK-LABEL: storef64stk: @@ -115,6 +149,18 @@ ret void } +; Function Attrs: norecurse nounwind readonly +define void @storei128stk(i128 %0) { +; CHECK-LABEL: storei128stk: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: st %s1, 184(, %s11) +; CHECK-NEXT: st %s0, 176(, %s11) +; CHECK-NEXT: or %s11, 0, %s9 + %addr = alloca i128, align 16 + store i128 %0, i128* %addr, align 16 + ret void +} + ; Function Attrs: norecurse nounwind readonly define void @storei64stk(i64 %0) { ; CHECK-LABEL: storei64stk: diff --git a/llvm/test/CodeGen/VE/store_gv.ll b/llvm/test/CodeGen/VE/store_gv.ll --- a/llvm/test/CodeGen/VE/store_gv.ll +++ b/llvm/test/CodeGen/VE/store_gv.ll @@ -4,8 +4,24 @@ @vi16 = common dso_local local_unnamed_addr global i16 0, align 2 @vi32 = common dso_local local_unnamed_addr global i32 0, align 4 @vi64 = common dso_local local_unnamed_addr global i64 0, align 8 +@vi128 = common dso_local local_unnamed_addr global i128 0, align 16 @vf32 = common dso_local local_unnamed_addr global float 0.000000e+00, align 4 @vf64 = common dso_local local_unnamed_addr global double 0.000000e+00, align 8 +@vf128 = common dso_local local_unnamed_addr global fp128 0xL00000000000000000000000000000000, align 16 + +; Function Attrs: norecurse nounwind readonly +define void @storef128com(fp128 %0) { +; CHECK-LABEL: storef128com: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, vf128@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, vf128@hi(, %s2) +; CHECK-NEXT: st %s0, 8(, %s2) +; CHECK-NEXT: st %s1, (, %s2) +; CHECK-NEXT: or %s11, 0, %s9 + store fp128 %0, fp128* @vf128, align 16 + ret void +} ; Function Attrs: norecurse nounwind readonly define void @storef64com(double %0) { @@ -33,6 +49,20 @@ ret void } +; Function Attrs: norecurse nounwind readonly +define void @storei128com(i128 %0) { +; CHECK-LABEL: storei128com: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s2, vi128@lo +; CHECK-NEXT: and %s2, %s2, (32)0 +; CHECK-NEXT: lea.sl %s2, vi128@hi(, %s2) +; CHECK-NEXT: st %s1, 8(, %s2) +; CHECK-NEXT: st %s0, (, %s2) +; CHECK-NEXT: or %s11, 0, %s9 + store i128 %0, i128* @vi128, align 16 + ret void +} + ; Function Attrs: norecurse nounwind readonly define void @storei64com(i64 %0) { ; CHECK-LABEL: storei64com: @@ -84,4 +114,3 @@ store i8 %0, i8* @vi8, align 1 ret void } - diff --git a/llvm/test/CodeGen/VE/va_arg.ll b/llvm/test/CodeGen/VE/va_arg.ll --- a/llvm/test/CodeGen/VE/va_arg.ll +++ b/llvm/test/CodeGen/VE/va_arg.ll @@ -10,19 +10,26 @@ @.str.7 = private unnamed_addr constant [6 x i8] c"h=%p\0A\00", align 1 @.str.8 = private unnamed_addr constant [7 x i8] c"i=%ld\0A\00", align 1 @.str.9 = private unnamed_addr constant [7 x i8] c"j=%lf\0A\00", align 1 +@.str.10 = private unnamed_addr constant [7 x i8] c"j=%Lf\0A\00", align 1 define i32 @func_vainout(i32, ...) { ; CHECK-LABEL: func_vainout: ; CHECK: ldl.sx %s1, 184(, %s9) -; CHECK: ld2b.sx %s18, 192(, %s9) -; CHECK: ld1b.sx %s19, 200(, %s9) -; CHECK: ldl.sx %s20, 208(, %s9) -; CHECK: ld2b.zx %s21, 216(, %s9) -; CHECK: ld1b.zx %s22, 224(, %s9) -; CHECK: ldu %s23, 236(, %s9) -; CHECK: ld %s24, 240(, %s9) -; CHECK: ld %s25, 248(, %s9) -; CHECK: ld %s26, 256(, %s9) +; CHECK: ld2b.sx %s19, 192(, %s9) +; CHECK: ld1b.sx %s22, 200(, %s9) +; CHECK: ldl.sx %s23, 208(, %s9) +; CHECK: ld2b.zx %s24, 216(, %s9) +; CHECK: ld1b.zx %s25, 224(, %s9) +; CHECK: ldu %s26, 236(, %s9) +; CHECK: ld %s27, 240(, %s9) +; CHECK: ld %s28, 248(, %s9) +; CHECK: ld %s29, 256(, %s9) +; CHECK: lea %s0, 279(, %s9) +; CHECK: and %s0, -16, %s0 +; CHECK: lea %s2, 16(, %s0) +; CHECK: ld %s20, 8(, %s0) +; CHECK: ld %s21, (, %s0) +; CHECK: ld %s18, 16(, %s0) %a = alloca i8*, align 8 %a8 = bitcast i8** %a to i8* @@ -38,6 +45,8 @@ %p7 = va_arg i8** %a, i8* %p8 = va_arg i8** %a, i64 %p9 = va_arg i8** %a, double + %p10 = va_arg i8** %a, fp128 + %p11 = va_arg i8** %a, double call void @llvm.va_end(i8* nonnull %a8) call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %a8) %pf0 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %p0) @@ -54,6 +63,8 @@ %pf7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.7, i64 0, i64 0), i8* %p7) %pf8 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.8, i64 0, i64 0), i64 %p8) %pf9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.9, i64 0, i64 0), double %p9) + %pf10 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.10, i64 0, i64 0), fp128 %p10) + %pf11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.9, i64 0, i64 0), double %p11) ret i32 0 } declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) diff --git a/llvm/test/CodeGen/VE/va_caller.ll b/llvm/test/CodeGen/VE/va_caller.ll --- a/llvm/test/CodeGen/VE/va_caller.ll +++ b/llvm/test/CodeGen/VE/va_caller.ll @@ -7,9 +7,6 @@ ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: st %s18, 48(, %s9) # 8-byte Folded Spill ; CHECK-NEXT: or %s18, 0, (0)1 -; CHECK-NEXT: st %s18, 280(, %s11) -; CHECK-NEXT: or %s0, 11, (0)1 -; CHECK-NEXT: st %s0, 272(, %s11) ; CHECK-NEXT: st %s18, 264(, %s11) ; CHECK-NEXT: or %s0, 10, (0)1 ; CHECK-NEXT: st %s0, 256(, %s11) @@ -28,18 +25,25 @@ ; CHECK-NEXT: st %s2, 192(, %s11) ; CHECK-NEXT: or %s1, 1, (0)1 ; CHECK-NEXT: st %s1, 184(, %s11) +; CHECK-NEXT: lea %s0, .LCPI{{[0-9]+}}_0@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, .LCPI{{[0-9]+}}_0@hi(, %s0) +; CHECK-NEXT: ld %s34, 8(, %s0) +; CHECK-NEXT: ld %s35, (, %s0) ; CHECK-NEXT: st %s18, 176(, %s11) ; CHECK-NEXT: lea.sl %s6, 1086324736 +; CHECK-NEXT: st %s6, 224(, %s11) +; CHECK-NEXT: st %s34, 280(, %s11) ; CHECK-NEXT: lea %s0, func@lo ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: lea.sl %s12, func@hi(, %s0) -; CHECK-NEXT: st %s6, 224(, %s11) +; CHECK-NEXT: st %s35, 272(, %s11) ; CHECK-NEXT: or %s0, 0, %s18 ; CHECK-NEXT: or %s7, 0, %s18 ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: or %s0, 0, %s18 ; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload ; CHECK-NEXT: or %s11, 0, %s9 - call i32 (i32, ...) @func(i32 0, i16 1, i8 2, i32 3, i16 4, i8 5, float 6.0, i8* null, i64 8, double 9.0, i128 10, i128 11) + call i32 (i32, ...) @func(i32 0, i16 1, i8 2, i32 3, i16 4, i8 5, float 6.0, i8* null, i64 8, double 9.0, i128 10, fp128 0xLA000000000000000) ret i32 0 }