Index: llvm/trunk/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ llvm/trunk/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -140,6 +140,12 @@ SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27, SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31}; + static unsigned IntPairRegs[] = { + Sparc::G0_G1, Sparc::G2_G3, Sparc::G4_G5, Sparc::G6_G7, + Sparc::O0_O1, Sparc::O2_O3, Sparc::O4_O5, Sparc::O6_O7, + Sparc::L0_L1, Sparc::L2_L3, Sparc::L4_L5, Sparc::L6_L7, + Sparc::I0_I1, Sparc::I2_I3, Sparc::I4_I5, Sparc::I6_I7}; + /// SparcOperand - Instances of this class represent a parsed Sparc machine /// instruction. class SparcOperand : public MCParsedAsmOperand { @@ -147,6 +153,7 @@ enum RegisterKind { rk_None, rk_IntReg, + rk_IntPairReg, rk_FloatReg, rk_DoubleReg, rk_QuadReg, @@ -200,6 +207,10 @@ bool isMEMrr() const { return Kind == k_MemoryReg; } bool isMEMri() const { return Kind == k_MemoryImm; } + bool isIntReg() const { + return (Kind == k_Register && Reg.Kind == rk_IntReg); + } + bool isFloatReg() const { return (Kind == k_Register && Reg.Kind == rk_FloatReg); } @@ -330,6 +341,25 @@ return Op; } + static bool MorphToIntPairReg(SparcOperand &Op) { + unsigned Reg = Op.getReg(); + assert(Op.Reg.Kind == rk_IntReg); + unsigned regIdx = 32; + if (Reg >= Sparc::G0 && Reg <= Sparc::G7) + regIdx = Reg - Sparc::G0; + else if (Reg >= Sparc::O0 && Reg <= Sparc::O7) + regIdx = Reg - Sparc::O0 + 8; + else if (Reg >= Sparc::L0 && Reg <= Sparc::L7) + regIdx = Reg - Sparc::L0 + 16; + else if (Reg >= Sparc::I0 && Reg <= Sparc::I7) + regIdx = Reg - Sparc::I0 + 24; + if (regIdx % 2 || regIdx > 31) + return false; + Op.Reg.RegNum = IntPairRegs[regIdx / 2]; + Op.Reg.Kind = rk_IntPairReg; + return true; + } + static bool MorphToDoubleReg(SparcOperand &Op) { unsigned Reg = Op.getReg(); assert(Op.Reg.Kind == rk_FloatReg); @@ -1051,5 +1081,9 @@ break; } } + if (Op.isIntReg() && Kind == MCK_IntPair) { + if (SparcOperand::MorphToIntPairReg(Op)) + return MCTargetAsmParser::Match_Success; + } return Match_InvalidOperand; } Index: llvm/trunk/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp =================================================================== --- llvm/trunk/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp +++ llvm/trunk/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp @@ -117,6 +117,13 @@ SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27, SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31}; +static const uint16_t IntPairDecoderTable[] = { + SP::G0_G1, SP::G2_G3, SP::G4_G5, SP::G6_G7, + SP::O0_O1, SP::O2_O3, SP::O4_O5, SP::O6_O7, + SP::L0_L1, SP::L2_L3, SP::L4_L5, SP::L6_L7, + SP::I0_I1, SP::I2_I3, SP::I4_I5, SP::I6_I7, +}; + static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -196,9 +203,25 @@ return MCDisassembler::Success; } +static DecodeStatus DecodeIntPairRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + if (RegNo > 31) + return MCDisassembler::Fail; + + if ((RegNo & 1)) + S = MCDisassembler::SoftFail; + + unsigned RegisterPair = IntPairDecoderTable[RegNo/2]; + Inst.addOperand(MCOperand::createReg(RegisterPair)); + return S; +} static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address, + const void *Decoder); static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeLoadDFP(MCInst &Inst, unsigned insn, uint64_t Address, @@ -207,6 +230,8 @@ const void *Decoder); static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeStoreDFP(MCInst &Inst, unsigned insn, @@ -326,6 +351,12 @@ DecodeIntRegsRegisterClass); } +static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address, + const void *Decoder) { + return DecodeMem(Inst, insn, Address, Decoder, true, + DecodeIntPairRegisterClass); +} + static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, true, @@ -350,6 +381,12 @@ DecodeIntRegsRegisterClass); } +static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn, + uint64_t Address, const void *Decoder) { + return DecodeMem(Inst, insn, Address, Decoder, false, + DecodeIntPairRegisterClass); +} + static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, false, Index: llvm/trunk/lib/Target/Sparc/SparcCallingConv.td =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcCallingConv.td +++ llvm/trunk/lib/Target/Sparc/SparcCallingConv.td @@ -21,7 +21,11 @@ // i32 f32 arguments get passed in integer registers if there is space. CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, // f64 arguments are split and passed through registers or through stack. - CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>, + CCIfType<[f64], CCCustom<"CC_Sparc_Assign_Split_64">>, + // As are v2i32 arguments (this would be the default behavior for + // v2i32 if it wasn't allocated to the IntPair register-class) + CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Split_64">>, + // Alternatively, they are assigned to the stack in 4-byte aligned units. CCAssignToStack<4, 4> @@ -30,7 +34,8 @@ def RetCC_Sparc32 : CallingConv<[ CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1]>> + CCIfType<[f64], CCAssignToReg<[D0, D1]>>, + CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Ret_Split_64">> ]>; Index: llvm/trunk/lib/Target/Sparc/SparcFrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcFrameLowering.cpp +++ llvm/trunk/lib/Target/Sparc/SparcFrameLowering.cpp @@ -213,23 +213,35 @@ } void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const { - MachineRegisterInfo &MRI = MF.getRegInfo(); - // Remap %i[0-7] to %o[0-7]. for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) { if (MRI.reg_nodbg_empty(reg)) continue; - unsigned mapped_reg = (reg - SP::I0 + SP::O0); + + unsigned mapped_reg = reg - SP::I0 + SP::O0; assert(MRI.reg_nodbg_empty(mapped_reg)); // Replace I register with O register. MRI.replaceRegWith(reg, mapped_reg); + + // Also replace register pair super-registers. + if ((reg - SP::I0) % 2 == 0) { + unsigned preg = (reg - SP::I0) / 2 + SP::I0_I1; + unsigned mapped_preg = preg - SP::I0_I1 + SP::O0_O1; + MRI.replaceRegWith(preg, mapped_preg); + } } // Rewrite MBB's Live-ins. for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { + for (unsigned reg = SP::I0_I1; reg <= SP::I6_I7; ++reg) { + if (!MBB->isLiveIn(reg)) + continue; + MBB->removeLiveIn(reg); + MBB->addLiveIn(reg - SP::I0_I1 + SP::O0_O1); + } for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) { if (!MBB->isLiveIn(reg)) continue; Index: llvm/trunk/lib/Target/Sparc/SparcISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "SparcTargetMachine.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Compiler.h" @@ -62,6 +63,7 @@ private: SDNode* getGlobalBaseReg(); + SDNode *SelectInlineAsm(SDNode *N); }; } // end anonymous namespace @@ -141,6 +143,181 @@ return true; } + +// Re-assemble i64 arguments split up in SelectionDAGBuilder's +// visitInlineAsm / GetRegistersForValue functions. +// +// Note: This function was copied from, and is essentially identical +// to ARMISelDAGToDAG::SelectInlineAsm. It is very unfortunate that +// such hacking-up is necessary; a rethink of how inline asm operands +// are handled may be in order to make doing this more sane. +// +// TODO: fix inline asm support so I can simply tell it that 'i64' +// inputs to asm need to be allocated to the IntPair register type, +// and have that work. Then, delete this function. +SDNode *SparcDAGToDAGISel::SelectInlineAsm(SDNode *N){ + std::vector AsmNodeOperands; + unsigned Flag, Kind; + bool Changed = false; + unsigned NumOps = N->getNumOperands(); + + // Normally, i64 data is bounded to two arbitrary GPRs for "%r" + // constraint. However, some instructions (e.g. ldd/std) require + // (even/even+1) GPRs. + + // So, here, we check for this case, and mutate the inlineasm to use + // a single IntPair register instead, which guarantees such even/odd + // placement. + + SDLoc dl(N); + SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) + : SDValue(nullptr,0); + + SmallVector OpChanged; + // Glue node will be appended late. + for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { + SDValue op = N->getOperand(i); + AsmNodeOperands.push_back(op); + + if (i < InlineAsm::Op_FirstOperand) + continue; + + if (ConstantSDNode *C = dyn_cast(N->getOperand(i))) { + Flag = C->getZExtValue(); + Kind = InlineAsm::getKind(Flag); + } + else + continue; + + // Immediate operands to inline asm in the SelectionDAG are modeled with + // two operands. The first is a constant of value InlineAsm::Kind_Imm, and + // the second is a constant with the value of the immediate. If we get here + // and we have a Kind_Imm, skip the next operand, and continue. + if (Kind == InlineAsm::Kind_Imm) { + SDValue op = N->getOperand(++i); + AsmNodeOperands.push_back(op); + continue; + } + + unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); + if (NumRegs) + OpChanged.push_back(false); + + unsigned DefIdx = 0; + bool IsTiedToChangedOp = false; + // If it's a use that is tied with a previous def, it has no + // reg class constraint. + if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) + IsTiedToChangedOp = OpChanged[DefIdx]; + + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef + && Kind != InlineAsm::Kind_RegDefEarlyClobber) + continue; + + unsigned RC; + bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); + if ((!IsTiedToChangedOp && (!HasRC || RC != SP::IntRegsRegClassID)) + || NumRegs != 2) + continue; + + assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); + SDValue V0 = N->getOperand(i+1); + SDValue V1 = N->getOperand(i+2); + unsigned Reg0 = cast(V0)->getReg(); + unsigned Reg1 = cast(V1)->getReg(); + SDValue PairedReg; + MachineRegisterInfo &MRI = MF->getRegInfo(); + + if (Kind == InlineAsm::Kind_RegDef || + Kind == InlineAsm::Kind_RegDefEarlyClobber) { + // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to + // the original GPRs. + + unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32); + SDValue Chain = SDValue(N,0); + + SDNode *GU = N->getGluedUser(); + SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::v2i32, + Chain.getValue(1)); + + // Extract values from a GPRPair reg and copy to the original GPR reg. + SDValue Sub0 = CurDAG->getTargetExtractSubreg(SP::sub_even, dl, MVT::i32, + RegCopy); + SDValue Sub1 = CurDAG->getTargetExtractSubreg(SP::sub_odd, dl, MVT::i32, + RegCopy); + SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, + RegCopy.getValue(1)); + SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); + + // Update the original glue user. + std::vector Ops(GU->op_begin(), GU->op_end()-1); + Ops.push_back(T1.getValue(1)); + CurDAG->UpdateNodeOperands(GU, Ops); + } + else { + // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a + // GPRPair and then pass the GPRPair to the inline asm. + SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; + + // As REG_SEQ doesn't take RegisterSDNode, we copy them first. + SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, + Chain.getValue(1)); + SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, + T0.getValue(1)); + SDValue Pair = SDValue( + CurDAG->getMachineNode( + TargetOpcode::REG_SEQUENCE, dl, MVT::v2i32, + { + CurDAG->getTargetConstant(SP::IntPairRegClassID, dl, + MVT::i32), + T0, + CurDAG->getTargetConstant(SP::sub_even, dl, MVT::i32), + T1, + CurDAG->getTargetConstant(SP::sub_odd, dl, MVT::i32), + }), + 0); + + // Copy REG_SEQ into a GPRPair-typed VR and replace the original two + // i32 VRs of inline asm with it. + unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32); + Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); + + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; + Glue = Chain.getValue(1); + } + + Changed = true; + + if(PairedReg.getNode()) { + OpChanged[OpChanged.size() -1 ] = true; + Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); + if (IsTiedToChangedOp) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); + else + Flag = InlineAsm::getFlagWordForRegClass(Flag, SP::IntPairRegClassID); + // Replace the current flag. + AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( + Flag, dl, MVT::i32); + // Add the new register node and skip the original two GPRs. + AsmNodeOperands.push_back(PairedReg); + // Skip the next two GPRs. + i += 2; + } + } + + if (Glue.getNode()) + AsmNodeOperands.push_back(Glue); + if (!Changed) + return nullptr; + + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), + CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); + New->setNodeId(-1); + return New.getNode(); +} + SDNode *SparcDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); if (N->isMachineOpcode()) { @@ -150,6 +327,12 @@ switch (N->getOpcode()) { default: break; + case ISD::INLINEASM: { + SDNode *ResNode = SelectInlineAsm(N); + if (ResNode) + return ResNode; + break; + } case SPISD::GLOBAL_BASE_REG: return getGlobalBaseReg(); Index: llvm/trunk/lib/Target/Sparc/SparcISelLowering.h =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcISelLowering.h +++ llvm/trunk/lib/Target/Sparc/SparcISelLowering.h @@ -167,8 +167,8 @@ } void ReplaceNodeResults(SDNode *N, - SmallVectorImpl& Results, - SelectionDAG &DAG) const override; + SmallVectorImpl& Results, + SelectionDAG &DAG) const override; MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB, unsigned BROpcode) const; Index: llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp +++ llvm/trunk/lib/Target/Sparc/SparcISelLowering.cpp @@ -49,9 +49,9 @@ return true; } -static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, CCState &State) +static bool CC_Sparc_Assign_Split_64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { static const MCPhysReg RegList[] = { SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 @@ -77,6 +77,29 @@ return true; } +static bool CC_Sparc_Assign_Ret_Split_64(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) +{ + static const MCPhysReg RegList[] = { + SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 + }; + + // Try to get first reg. + if (unsigned Reg = State.AllocateReg(RegList)) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + else + return false; + + // Try to get second reg. + if (unsigned Reg = State.AllocateReg(RegList)) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + else + return false; + + return true; +} + // Allocate a full-sized argument for the 64-bit ABI. static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, @@ -202,12 +225,34 @@ RetOps.push_back(SDValue()); // Copy the result values into the output registers. - for (unsigned i = 0; i != RVLocs.size(); ++i) { + for (unsigned i = 0, realRVLocIdx = 0; + i != RVLocs.size(); + ++i, ++realRVLocIdx) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), - OutVals[i], Flag); + SDValue Arg = OutVals[realRVLocIdx]; + + if (VA.needsCustom()) { + assert(VA.getLocVT() == MVT::v2i32); + // Legalize ret v2i32 -> ret 2 x i32 (Basically: do what would + // happen by default if this wasn't a legal type) + + SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, + Arg, + DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout()))); + SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, + Arg, + DAG.getConstant(1, DL, getVectorIdxTy(DAG.getDataLayout()))); + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part0, Flag); + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + VA = RVLocs[++i]; // skip ahead to next loc + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part1, + Flag); + } else + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag); // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); @@ -375,7 +420,8 @@ if (VA.isRegLoc()) { if (VA.needsCustom()) { - assert(VA.getLocVT() == MVT::f64); + assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32); + unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi); SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32); @@ -398,7 +444,7 @@ } SDValue WholeValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal); - WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue); + WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), WholeValue); InVals.push_back(WholeValue); continue; } @@ -422,7 +468,7 @@ auto PtrVT = getPointerTy(DAG.getDataLayout()); if (VA.needsCustom()) { - assert(VA.getValVT() == MVT::f64); + assert(VA.getValVT() == MVT::f64 || MVT::v2i32); // If it is double-word aligned, just load. if (Offset % 8 == 0) { int FI = MF.getFrameInfo()->CreateFixedObject(8, @@ -454,7 +500,7 @@ SDValue WholeValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal); - WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue); + WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), WholeValue); InVals.push_back(WholeValue); continue; } @@ -788,7 +834,7 @@ } if (VA.needsCustom()) { - assert(VA.getLocVT() == MVT::f64); + assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32); if (VA.isMemLoc()) { unsigned Offset = VA.getLocMemOffset() + StackOffset; @@ -804,49 +850,54 @@ } } - SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, - Arg, StackPtr, MachinePointerInfo(), - false, false, 0); - // Sparc is big-endian, so the high part comes first. - SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, - MachinePointerInfo(), false, false, false, 0); - // Increment the pointer to the other half. - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getIntPtrConstant(4, dl)); - // Load the low part. - SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, - MachinePointerInfo(), false, false, false, 0); + if (VA.getLocVT() == MVT::f64) { + // Move from the float value from float registers into the + // integer registers. + + // TODO: this conversion is done in two steps, because + // f64->i64 conversion is done efficiently, and i64->v2i32 is + // basically a no-op. But f64->v2i32 is NOT done efficiently + // for some reason. + Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); + Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg); + } + + SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, + Arg, + DAG.getConstant(0, dl, getVectorIdxTy(DAG.getDataLayout()))); + SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, + Arg, + DAG.getConstant(1, dl, getVectorIdxTy(DAG.getDataLayout()))); if (VA.isRegLoc()) { - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi)); + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Part0)); assert(i+1 != e); CCValAssign &NextVA = ArgLocs[++i]; if (NextVA.isRegLoc()) { - RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo)); + RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Part1)); } else { - // Store the low part in stack. + // Store the second part in stack. unsigned Offset = NextVA.getLocMemOffset() + StackOffset; SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff, + MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff, MachinePointerInfo(), false, false, 0)); } } else { unsigned Offset = VA.getLocMemOffset() + StackOffset; - // Store the high part. + // Store the first part. SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32); SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff, + MemOpChains.push_back(DAG.getStore(Chain, dl, Part0, PtrOff, MachinePointerInfo(), false, false, 0)); - // Store the low part. + // Store the second part. PtrOff = DAG.getIntPtrConstant(Offset + 4, dl); PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff, + MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff, MachinePointerInfo(), false, false, 0)); } @@ -1377,8 +1428,45 @@ addRegisterClass(MVT::f32, &SP::FPRegsRegClass); addRegisterClass(MVT::f64, &SP::DFPRegsRegClass); addRegisterClass(MVT::f128, &SP::QFPRegsRegClass); - if (Subtarget->is64Bit()) + if (Subtarget->is64Bit()) { addRegisterClass(MVT::i64, &SP::I64RegsRegClass); + } else { + // On 32bit sparc, we define a double-register 32bit register + // class, as well. This is modeled in LLVM as a 2-vector of i32. + addRegisterClass(MVT::v2i32, &SP::IntPairRegClass); + + // ...but almost all operations must be expanded, so set that as + // the default. + for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { + setOperationAction(Op, MVT::v2i32, Expand); + } + // Truncating/extending stores/loads are also not supported. + for (MVT VT : MVT::integer_vector_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand); + + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, VT, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, VT, Expand); + + setTruncStoreAction(VT, MVT::v2i32, Expand); + setTruncStoreAction(MVT::v2i32, VT, Expand); + } + // However, load and store *are* legal. + setOperationAction(ISD::LOAD, MVT::v2i32, Legal); + setOperationAction(ISD::STORE, MVT::v2i32, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Legal); + + // And we need to promote i64 loads/stores into vector load/store + setOperationAction(ISD::LOAD, MVT::i64, Custom); + setOperationAction(ISD::STORE, MVT::i64, Custom); + + // Sadly, this doesn't work: + // AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); + // AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); + } // Turn FP extload into load/fextend for (MVT VT : MVT::fp_valuetypes()) { @@ -2604,6 +2692,17 @@ return DAG.getMergeValues(Ops, dl); } +static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) +{ + LoadSDNode *LdNode = cast(Op.getNode()); + + EVT MemVT = LdNode->getMemoryVT(); + if (MemVT == MVT::f128) + return LowerF128Load(Op, DAG); + + return Op; +} + // Lower a f128 store into two f64 stores. static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) { SDLoc dl(Op); @@ -2648,6 +2747,29 @@ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } +static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) +{ + SDLoc dl(Op); + StoreSDNode *St = cast(Op.getNode()); + + EVT MemVT = St->getMemoryVT(); + if (MemVT == MVT::f128) + return LowerF128Store(Op, DAG); + + if (MemVT == MVT::i64) { + // Custom handling for i64 stores: turn it into a bitcast and a + // v2i32 store. + SDValue Val = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, St->getValue()); + SDValue Chain = DAG.getStore( + St->getChain(), dl, Val, St->getBasePtr(), St->getPointerInfo(), + St->isVolatile(), St->isNonTemporal(), St->getAlignment(), + St->getAAInfo()); + return Chain; + } + + return SDValue(); +} + static SDValue LowerFNEGorFABS(SDValue Op, SelectionDAG &DAG, bool isV9) { assert((Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS) && "invalid opcode"); @@ -2786,7 +2908,6 @@ return SDValue(); } - SDValue SparcTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -2821,8 +2942,8 @@ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget); - case ISD::LOAD: return LowerF128Load(Op, DAG); - case ISD::STORE: return LowerF128Store(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::FADD: return LowerF128Op(Op, DAG, getLibcallName(RTLIB::ADD_F128), 2); case ISD::FSUB: return LowerF128Op(Op, DAG, @@ -3152,9 +3273,12 @@ if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': - return std::make_pair(0U, &SP::IntRegsRegClass); + if (VT == MVT::v2i32) + return std::make_pair(0U, &SP::IntPairRegClass); + else + return std::make_pair(0U, &SP::IntRegsRegClass); } - } else if (!Constraint.empty() && Constraint.size() <= 5 + } else if (!Constraint.empty() && Constraint.size() <= 5 && Constraint[0] == '{' && *(Constraint.end()-1) == '}') { // constraint = '{r}' // Remove the braces from around the name. @@ -3230,5 +3354,24 @@ getLibcallName(libCall), 1)); return; + case ISD::LOAD: { + LoadSDNode *Ld = cast(N); + // Custom handling only for i64: turn i64 load into a v2i32 load, + // and a bitcast. + if (Ld->getValueType(0) != MVT::i64 || Ld->getMemoryVT() != MVT::i64) + return; + + SDLoc dl(N); + SDValue LoadRes = DAG.getExtLoad( + Ld->getExtensionType(), dl, MVT::v2i32, + Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), + MVT::v2i32, Ld->isVolatile(), Ld->isNonTemporal(), + Ld->isInvariant(), Ld->getAlignment(), Ld->getAAInfo()); + + SDValue Res = DAG.getNode(ISD::BITCAST, dl, MVT::i64, LoadRes); + Results.push_back(Res); + Results.push_back(LoadRes.getValue(1)); + return; + } } } Index: llvm/trunk/lib/Target/Sparc/SparcInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcInstrInfo.cpp +++ llvm/trunk/lib/Target/Sparc/SparcInstrInfo.cpp @@ -284,7 +284,9 @@ unsigned numSubRegs = 0; unsigned movOpc = 0; const unsigned *subRegIdx = nullptr; + bool ExtraG0 = false; + const unsigned DW_SubRegsIdx[] = { SP::sub_even, SP::sub_odd }; const unsigned DFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd }; const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 }; const unsigned QFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd, @@ -294,7 +296,12 @@ if (SP::IntRegsRegClass.contains(DestReg, SrcReg)) BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0) .addReg(SrcReg, getKillRegState(KillSrc)); - else if (SP::FPRegsRegClass.contains(DestReg, SrcReg)) + else if (SP::IntPairRegClass.contains(DestReg, SrcReg)) { + subRegIdx = DW_SubRegsIdx; + numSubRegs = 2; + movOpc = SP::ORrr; + ExtraG0 = true; + } else if (SP::FPRegsRegClass.contains(DestReg, SrcReg)) BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) { @@ -347,7 +354,11 @@ unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]); assert(Dst && Src && "Bad sub-register"); - MovMI = BuildMI(MBB, I, DL, get(movOpc), Dst).addReg(Src); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(movOpc), Dst); + if (ExtraG0) + MIB.addReg(SP::G0); + MIB.addReg(Src); + MovMI = MIB.getInstr(); } // Add implicit super-register defs and kills to the last MovMI. MovMI->addRegisterDefined(DestReg, TRI); @@ -372,12 +383,15 @@ MFI.getObjectAlignment(FI)); // On the order of operands here: think "[FrameIdx + 0] = SrcReg". - if (RC == &SP::I64RegsRegClass) + if (RC == &SP::I64RegsRegClass) BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::IntRegsRegClass) BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + else if (RC == &SP::IntPairRegClass) + BuildMI(MBB, I, DL, get(SP::STDri)).addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); @@ -415,6 +429,9 @@ else if (RC == &SP::IntRegsRegClass) BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0) .addMemOperand(MMO); + else if (RC == &SP::IntPairRegClass) + BuildMI(MBB, I, DL, get(SP::LDDri), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0) .addMemOperand(MMO); Index: llvm/trunk/lib/Target/Sparc/SparcInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcInstrInfo.td +++ llvm/trunk/lib/Target/Sparc/SparcInstrInfo.td @@ -408,6 +408,9 @@ defm LD : LoadA<"ld", 0b000000, 0b010000, load, IntRegs, i32>; } +let DecoderMethod = "DecodeLoadIntPair" in + defm LDD : LoadA<"ldd", 0b000011, 0b010011, load, IntPair, v2i32>; + // Section B.2 - Load Floating-point Instructions, p. 92 let DecoderMethod = "DecodeLoadFP" in defm LDF : Load<"ld", 0b100000, load, FPRegs, f32>; @@ -424,6 +427,9 @@ defm ST : StoreA<"st", 0b000100, 0b010100, store, IntRegs, i32>; } +let DecoderMethod = "DecodeStoreIntPair" in + defm STD : StoreA<"std", 0b000111, 0b010111, store, IntPair, v2i32>; + // Section B.5 - Store Floating-point Instructions, p. 97 let DecoderMethod = "DecodeStoreFP" in defm STF : Store<"st", 0b100100, store, FPRegs, f32>; @@ -1327,6 +1333,18 @@ def : Pat<(atomic_store ADDRrr:$dst, i32:$val), (STrr ADDRrr:$dst, $val)>; def : Pat<(atomic_store ADDRri:$dst, i32:$val), (STri ADDRri:$dst, $val)>; +// extract_vector +def : Pat<(vector_extract (v2i32 IntPair:$Rn), 0), + (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_even))>; +def : Pat<(vector_extract (v2i32 IntPair:$Rn), 1), + (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_odd))>; + +// build_vector +def : Pat<(build_vector (i32 IntRegs:$a1), (i32 IntRegs:$a2)), + (INSERT_SUBREG + (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (i32 IntRegs:$a1), sub_even), + (i32 IntRegs:$a2), sub_odd)>; + include "SparcInstr64Bit.td" include "SparcInstrVIS.td" Index: llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp +++ llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -75,6 +75,18 @@ Reserved.set(SP::G6); Reserved.set(SP::G7); + // Also reserve the register pair aliases covering the above + // registers, with the same conditions. + Reserved.set(SP::G0_G1); + if (ReserveAppRegisters) + Reserved.set(SP::G2_G3); + if (ReserveAppRegisters || !Subtarget.is64Bit()) + Reserved.set(SP::G4_G5); + + Reserved.set(SP::O6_O7); + Reserved.set(SP::I6_I7); + Reserved.set(SP::G6_G7); + // Unaliased double registers are not available in non-V9 targets. if (!Subtarget.isV9()) { for (unsigned n = 0; n != 16; ++n) { @@ -210,4 +222,3 @@ unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SP::I6; } - Index: llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.td =================================================================== --- llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.td +++ llvm/trunk/lib/Target/Sparc/SparcRegisterInfo.td @@ -32,6 +32,12 @@ // Ri - 32-bit integer registers class Ri Enc, string n> : SparcReg; +// Rdi - pairs of 32-bit integer registers +class Rdi Enc, string n, list subregs> : SparcReg { + let SubRegs = subregs; + let SubRegIndices = [sub_even, sub_odd]; + let CoveredBySubRegs = 1; +} // Rf - 32-bit floating-point registers class Rf Enc, string n> : SparcReg; @@ -217,6 +223,24 @@ def Q14 : Rq<25, "F56", [D28, D29]>; def Q15 : Rq<29, "F60", [D30, D31]>; +// Aliases of the integer registers used for LDD/STD double-word operations +def G0_G1 : Rdi<0, "G0", [G0, G1]>; +def G2_G3 : Rdi<2, "G2", [G2, G3]>; +def G4_G5 : Rdi<4, "G4", [G4, G5]>; +def G6_G7 : Rdi<6, "G6", [G6, G7]>; +def O0_O1 : Rdi<8, "O0", [O0, O1]>; +def O2_O3 : Rdi<10, "O2", [O2, O3]>; +def O4_O5 : Rdi<12, "O4", [O4, O5]>; +def O6_O7 : Rdi<14, "O6", [O6, O7]>; +def L0_L1 : Rdi<16, "L0", [L0, L1]>; +def L2_L3 : Rdi<18, "L2", [L2, L3]>; +def L4_L5 : Rdi<20, "L4", [L4, L5]>; +def L6_L7 : Rdi<22, "L6", [L6, L7]>; +def I0_I1 : Rdi<24, "I0", [I0, I1]>; +def I2_I3 : Rdi<26, "I2", [I2, I3]>; +def I4_I5 : Rdi<28, "I4", [I4, I5]>; +def I6_I7 : Rdi<30, "I6", [I6, I7]>; + // Register classes. // // FIXME: the register order should be defined in terms of the preferred @@ -231,6 +255,13 @@ (sequence "L%u", 0, 7), (sequence "O%u", 0, 7))>; +// Should be in the same order as IntRegs. +def IntPair : RegisterClass<"SP", [v2i32], 64, + (add I0_I1, I2_I3, I4_I5, I6_I7, + G0_G1, G2_G3, G4_G5, G6_G7, + L0_L1, L2_L3, L4_L5, L6_L7, + O0_O1, O2_O3, O4_O5, O6_O7)>; + // Register class for 64-bit mode, with a 64-bit spill slot size. // These are the same as the 32-bit registers, so TableGen will consider this // to be a sub-class of IntRegs. That works out because requiring a 64-bit Index: llvm/trunk/llvm.spec.in =================================================================== --- llvm/trunk/llvm.spec.in +++ llvm/trunk/llvm.spec.in @@ -65,3 +65,4 @@ * Mon Feb 09 2003 Brian R. Gaeke - Initial working version of RPM spec file. + Index: llvm/trunk/test/CodeGen/SPARC/basictest.ll =================================================================== --- llvm/trunk/test/CodeGen/SPARC/basictest.ll +++ llvm/trunk/test/CodeGen/SPARC/basictest.ll @@ -84,3 +84,16 @@ ret i64 %r } +; CHECK-LABEL: load_store_64bit: +; CHECK: ldd [%o0], %o2 +; CHECK: addcc %o3, 3, %o5 +; CHECK: addxcc %o2, 0, %o4 +; CHECK: retl +; CHECK: std %o4, [%o1] +define void @load_store_64bit(i64* %x, i64* %y) { +entry: + %0 = load i64, i64* %x + %add = add nsw i64 %0, 3 + store i64 %add, i64* %y + ret void +} Index: llvm/trunk/test/CodeGen/SPARC/inlineasm.ll =================================================================== --- llvm/trunk/test/CodeGen/SPARC/inlineasm.ll +++ llvm/trunk/test/CodeGen/SPARC/inlineasm.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=sparc -no-integrated-as <%s | FileCheck %s +; RUN: llc -march=sparc <%s | FileCheck %s ; CHECK-LABEL: test_constraint_r ; CHECK: add %o1, %o0, %o0 @@ -8,7 +8,7 @@ ret i32 %0 } -; CHECK-LABEL: test_constraint_I +; CHECK-LABEL: test_constraint_I: ; CHECK: add %o0, 1023, %o0 define i32 @test_constraint_I(i32 %a) { entry: @@ -16,7 +16,7 @@ ret i32 %0 } -; CHECK-LABEL: test_constraint_I_neg +; CHECK-LABEL: test_constraint_I_neg: ; CHECK: add %o0, -4096, %o0 define i32 @test_constraint_I_neg(i32 %a) { entry: @@ -24,7 +24,7 @@ ret i32 %0 } -; CHECK-LABEL: test_constraint_I_largeimm +; CHECK-LABEL: test_constraint_I_largeimm: ; CHECK: sethi 9, [[R0:%[gilo][0-7]]] ; CHECK: or [[R0]], 784, [[R1:%[gilo][0-7]]] ; CHECK: add %o0, [[R1]], %o0 @@ -34,12 +34,51 @@ ret i32 %0 } -; CHECK-LABEL: test_constraint_reg +; CHECK-LABEL: test_constraint_reg: ; CHECK: ldda [%o1] 43, %g2 -; CHECK: ldda [%o1] 43, %g3 +; CHECK: ldda [%o1] 43, %g4 define void @test_constraint_reg(i32 %s, i32* %ptr) { entry: %0 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={r2},r,n"(i32* %ptr, i32 43) - %1 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={g3},r,n"(i32* %ptr, i32 43) + %1 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={g4},r,n"(i32* %ptr, i32 43) ret void } + +;; Ensure that i64 args to asm are allocated to the IntPair register class. +;; Also checks that register renaming for leaf proc works. +; CHECK-LABEL: test_constraint_r_i64: +; CHECK: mov %o0, %o5 +; CHECK: sra %o5, 31, %o4 +; CHECK: std %o4, [%o1] +define i32 @test_constraint_r_i64(i32 %foo, i64* %out, i32 %o) { +entry: + %conv = sext i32 %foo to i64 + tail call void asm sideeffect "std $0, [$1]", "r,r,~{memory}"(i64 %conv, i64* %out) + ret i32 %o +} + +;; Same test without leaf-proc opt +; CHECK-LABEL: test_constraint_r_i64_noleaf: +; CHECK: mov %i0, %i5 +; CHECK: sra %i5, 31, %i4 +; CHECK: std %i4, [%i1] +define i32 @test_constraint_r_i64_noleaf(i32 %foo, i64* %out, i32 %o) #0 { +entry: + %conv = sext i32 %foo to i64 + tail call void asm sideeffect "std $0, [$1]", "r,r,~{memory}"(i64 %conv, i64* %out) + ret i32 %o +} +attributes #0 = { "no-frame-pointer-elim"="true" } + +;; Ensures that tied in and out gets allocated properly. +; CHECK-LABEL: test_i64_inout: +; CHECK: sethi 0, %o2 +; CHECK: mov 5, %o3 +; CHECK: xor %o2, %g0, %o2 +; CHECK: mov %o2, %o0 +; CHECK: ret +define i64 @test_i64_inout() { +entry: + %0 = call i64 asm sideeffect "xor $1, %g0, $0", "=r,0,~{i1}"(i64 5); + ret i64 %0 +} Index: llvm/trunk/test/CodeGen/SPARC/reserved-regs.ll =================================================================== --- llvm/trunk/test/CodeGen/SPARC/reserved-regs.ll +++ llvm/trunk/test/CodeGen/SPARC/reserved-regs.ll @@ -0,0 +1,135 @@ +; RUN: llc -march=sparc < %s | FileCheck %s + +@g = common global [32 x i32] zeroinitializer, align 16 +@h = common global [16 x i64] zeroinitializer, align 16 + +;; Ensures that we don't use registers which are supposed to be reserved. + +; CHECK-LABEL: use_all_i32_regs: +; CHECK-NOT: %g0 +; CHECK-NOT: %g1 +; CHECK-NOT: %g5 +; CHECK-NOT: %g6 +; CHECK-NOT: %g7 +; CHECK-NOT: %o6 +; CHECK-NOT: %i6 +; CHECK-NOT: %i7 +; CHECK: ret +define void @use_all_i32_regs() { +entry: + %0 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 0), align 16 + %1 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 1), align 4 + %2 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 2), align 8 + %3 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 3), align 4 + %4 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 4), align 16 + %5 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 5), align 4 + %6 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 6), align 8 + %7 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 7), align 4 + %8 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 8), align 16 + %9 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 9), align 4 + %10 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 10), align 8 + %11 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 11), align 4 + %12 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 12), align 16 + %13 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 13), align 4 + %14 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 14), align 8 + %15 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 15), align 4 + %16 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 16), align 16 + %17 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 17), align 4 + %18 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 18), align 8 + %19 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 19), align 4 + %20 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 20), align 16 + %21 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 21), align 4 + %22 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 22), align 8 + %23 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 23), align 4 + %24 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 24), align 16 + %25 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 25), align 4 + %26 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 26), align 8 + %27 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 27), align 4 + %28 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 28), align 16 + %29 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 29), align 4 + %30 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 30), align 8 + %31 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 31), align 4 + store volatile i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 0), align 16 + store volatile i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 1), align 4 + store volatile i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 2), align 8 + store volatile i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 3), align 4 + store volatile i32 %5, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 4), align 16 + store volatile i32 %6, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 5), align 4 + store volatile i32 %7, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 6), align 8 + store volatile i32 %8, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 7), align 4 + store volatile i32 %9, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 8), align 16 + store volatile i32 %10, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 9), align 4 + store volatile i32 %11, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 10), align 8 + store volatile i32 %12, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 11), align 4 + store volatile i32 %13, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 12), align 16 + store volatile i32 %14, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 13), align 4 + store volatile i32 %15, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 14), align 8 + store volatile i32 %16, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 15), align 4 + store volatile i32 %17, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 16), align 16 + store volatile i32 %18, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 17), align 4 + store volatile i32 %19, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 18), align 8 + store volatile i32 %20, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 19), align 4 + store volatile i32 %21, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 20), align 16 + store volatile i32 %22, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 21), align 4 + store volatile i32 %23, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 22), align 8 + store volatile i32 %24, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 23), align 4 + store volatile i32 %25, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 24), align 16 + store volatile i32 %26, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 25), align 4 + store volatile i32 %27, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 26), align 8 + store volatile i32 %28, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 27), align 4 + store volatile i32 %29, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 28), align 16 + store volatile i32 %30, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 29), align 4 + store volatile i32 %31, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 30), align 8 + store volatile i32 %0, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 31), align 4 + ret void +} + + +; CHECK-LABEL: use_all_i64_regs: +; CHECK-NOT: %g0 +; CHECK-NOT: %g1 +; CHECK-NOT: %g4 +; CHECK-NOT: %g5 +; CHECK-NOT: %g6 +; CHECK-NOT: %g7 +; CHECK-NOT: %o6 +; CHECK-NOT: %o7 +; CHECK-NOT: %i6 +; CHECK-NOT: %i7 +; CHECK: ret +define void @use_all_i64_regs() { +entry: + %0 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 0), align 16 + %1 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 1), align 4 + %2 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 2), align 8 + %3 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 3), align 4 + %4 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 4), align 16 + %5 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 5), align 4 + %6 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 6), align 8 + %7 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 7), align 4 + %8 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 8), align 16 + %9 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 9), align 4 + %10 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 10), align 8 + %11 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 11), align 4 + %12 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 12), align 16 + %13 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 13), align 4 + %14 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 14), align 8 + %15 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 15), align 4 + store volatile i64 %1, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 0), align 16 + store volatile i64 %2, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 1), align 4 + store volatile i64 %3, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 2), align 8 + store volatile i64 %4, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 3), align 4 + store volatile i64 %5, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 4), align 16 + store volatile i64 %6, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 5), align 4 + store volatile i64 %7, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 6), align 8 + store volatile i64 %8, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 7), align 4 + store volatile i64 %9, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 8), align 16 + store volatile i64 %10, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 9), align 4 + store volatile i64 %11, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 10), align 8 + store volatile i64 %12, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 11), align 4 + store volatile i64 %13, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 12), align 16 + store volatile i64 %14, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 13), align 4 + store volatile i64 %15, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 14), align 8 + store volatile i64 %0, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 15), align 4 + ret void +} Index: llvm/trunk/test/CodeGen/SPARC/spill.ll =================================================================== --- llvm/trunk/test/CodeGen/SPARC/spill.ll +++ llvm/trunk/test/CodeGen/SPARC/spill.ll @@ -0,0 +1,64 @@ +; RUN: llc -march=sparc < %s | FileCheck %s + +;; Ensure that spills and reloads work for various types on +;; sparcv8. + +;; For i32/i64 tests, use an asm statement which clobbers most +;; registers to ensure the spill will happen. + +; CHECK-LABEL: test_i32_spill: +; CHECK: and %i0, %i1, %o0 +; CHECK: st %o0, [%fp+{{.+}}] +; CHECK: add %o0, %o0, %g0 +; CHECK: ld [%fp+{{.+}}, %i0 +define i32 @test_i32_spill(i32 %a, i32 %b) { +entry: + %r0 = and i32 %a, %b + ; The clobber list has all registers except g0/o0. (Only o0 is usable.) + %0 = call i32 asm sideeffect "add $0,$1,%g0", "=r,0,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6},~{o7}"(i32 %r0) + ret i32 %r0 +} + +; CHECK-LABEL: test_i64_spill: +; CHECK: and %i0, %i2, %o0 +; CHECK: and %i1, %i3, %o1 +; CHECK: std %o0, [%fp+{{.+}}] +; CHECK: add %o0, %o0, %g0 +; CHECK: ldd [%fp+{{.+}}, %i0 +define i64 @test_i64_spill(i64 %a, i64 %b) { +entry: + %r0 = and i64 %a, %b + ; The clobber list has all registers except g0,g1,o0,o1. (Only o0/o1 are a usable pair) + ; So, o0/o1 must be used. + %0 = call i64 asm sideeffect "add $0,$1,%g0", "=r,0,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o2},~{o3},~{o4},~{o5},~{o7}"(i64 %r0) + ret i64 %r0 +} + +;; For float/double tests, a call is a suitable clobber as *all* FPU +;; registers are caller-save on sparcv8. + +; CHECK-LABEL: test_float_spill: +; CHECK: fadds %f1, %f0, [[R:%[f][0-31]]] +; CHECK: st [[R]], [%fp+{{.+}}] +; CHECK: call +; CHECK: ld [%fp+{{.+}}, %f0 +declare float @foo_float(float) +define float @test_float_spill(float %a, float %b) { +entry: + %r0 = fadd float %a, %b + %0 = call float @foo_float(float %r0) + ret float %r0 +} + +; CHECK-LABEL: test_double_spill: +; CHECK: faddd %f2, %f0, [[R:%[f][0-31]]] +; CHECK: std [[R]], [%fp+{{.+}}] +; CHECK: call +; CHECK: ldd [%fp+{{.+}}, %f0 +declare double @foo_double(double) +define double @test_double_spill(double %a, double %b) { +entry: + %r0 = fadd double %a, %b + %0 = call double @foo_double(double %r0) + ret double %r0 +} Index: llvm/trunk/test/MC/Disassembler/Sparc/sparc-mem.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/Sparc/sparc-mem.txt +++ llvm/trunk/test/MC/Disassembler/Sparc/sparc-mem.txt @@ -221,3 +221,27 @@ # CHECK: swapa [%g1] 131, %o2 0xd4 0xf8 0x50 0x60 + +# CHECK: ldd [%i0+%l6], %o2 +0xd4 0x1e 0x00 0x16 + +# CHECK: ldd [%i0+32], %o2 +0xd4 0x1e 0x20 0x20 + +# CHECK: ldd [%g1], %o2 +0xd4 0x18 0x60 0x00 + +# CHECK: ldd [%g1], %o2 +0xd4 0x18 0x40 0x00 + +# CHECK: std %o2, [%i0+%l6] +0xd4 0x3e 0x00 0x16 + +# CHECK: std %o2, [%i0+32] +0xd4 0x3e 0x20 0x20 + +# CHECK: std %o2, [%g1] +0xd4 0x38 0x60 0x00 + +# CHECK: std %o2, [%g1] +0xd4 0x38 0x40 0x00 Index: llvm/trunk/test/MC/Sparc/sparc-mem-instructions.s =================================================================== --- llvm/trunk/test/MC/Sparc/sparc-mem-instructions.s +++ llvm/trunk/test/MC/Sparc/sparc-mem-instructions.s @@ -46,6 +46,15 @@ ! CHECK: lda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x86,0x10,0x76] lda [%i0 + %l6] 131, %o2 + ! CHECK: ldd [%i0+%l6], %o2 ! encoding: [0xd4,0x1e,0x00,0x16] + ldd [%i0 + %l6], %o2 + ! CHECK: ldd [%i0+32], %o2 ! encoding: [0xd4,0x1e,0x20,0x20] + ldd [%i0 + 32], %o2 + ! CHECK: ldd [%g1], %o2 ! encoding: [0xd4,0x18,0x40,0x00] + ldd [%g1], %o2 + ! CHECK: ldda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x9e,0x10,0x76] + ldda [%i0 + %l6] 131, %o2 + ! CHECK: stb %o2, [%i0+%l6] ! encoding: [0xd4,0x2e,0x00,0x16] stb %o2, [%i0 + %l6] ! CHECK: stb %o2, [%i0+32] ! encoding: [0xd4,0x2e,0x20,0x20] @@ -73,6 +82,15 @@ ! CHECK: sta %o2, [%i0+%l6] 131 ! encoding: [0xd4,0xa6,0x10,0x76] sta %o2, [%i0 + %l6] 131 + ! CHECK: std %o2, [%i0+%l6] ! encoding: [0xd4,0x3e,0x00,0x16] + std %o2, [%i0 + %l6] + ! CHECK: std %o2, [%i0+32] ! encoding: [0xd4,0x3e,0x20,0x20] + std %o2, [%i0 + 32] + ! CHECK: std %o2, [%g1] ! encoding: [0xd4,0x38,0x40,0x00] + std %o2, [%g1] + ! CHECK: stda %o2, [%i0+%l6] 131 ! encoding: [0xd4,0xbe,0x10,0x76] + stda %o2, [%i0 + %l6] 131 + ! CHECK: flush %g1+%g2 ! encoding: [0x81,0xd8,0x40,0x02] flush %g1 + %g2 ! CHECK: flush %g1+8 ! encoding: [0x81,0xd8,0x60,0x08]