Index: lib/Target/Sparc/AsmParser/SparcAsmParser.cpp =================================================================== --- lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -138,6 +138,12 @@ SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27, SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31}; + static unsigned IntPairRegs[] = { + Sparc::G0_G1, Sparc::G2_G3, Sparc::G4_G5, Sparc::G6_G7, + Sparc::O0_O1, Sparc::O2_O3, Sparc::O4_O5, Sparc::O6_O7, + Sparc::L0_L1, Sparc::L2_L3, Sparc::L4_L5, Sparc::L6_L7, + Sparc::I0_I1, Sparc::I2_I3, Sparc::I4_I5, Sparc::I6_I7}; + /// SparcOperand - Instances of this class represent a parsed Sparc machine /// instruction. class SparcOperand : public MCParsedAsmOperand { @@ -145,6 +151,7 @@ enum RegisterKind { rk_None, rk_IntReg, + rk_IntPairReg, rk_FloatReg, rk_DoubleReg, rk_QuadReg, @@ -198,6 +205,10 @@ bool isMEMrr() const { return Kind == k_MemoryReg; } bool isMEMri() const { return Kind == k_MemoryImm; } + bool isIntReg() const { + return (Kind == k_Register && Reg.Kind == rk_IntReg); + } + bool isFloatReg() const { return (Kind == k_Register && Reg.Kind == rk_FloatReg); } @@ -328,6 +339,25 @@ return Op; } + static bool MorphToIntPairReg(SparcOperand &Op) { + unsigned Reg = Op.getReg(); + assert(Op.Reg.Kind == rk_IntReg); + unsigned regIdx = 32; + if (Reg >= Sparc::G0 && Reg <= Sparc::G7) + regIdx = Reg - Sparc::G0; + else if (Reg >= Sparc::O0 && Reg <= Sparc::O7) + regIdx = Reg - Sparc::O0 + 8; + else if (Reg >= Sparc::L0 && Reg <= Sparc::L7) + regIdx = Reg - Sparc::L0 + 16; + else if (Reg >= Sparc::I0 && Reg <= Sparc::I7) + regIdx = Reg - Sparc::I0 + 24; + if (regIdx % 2 || regIdx > 31) + return false; + Op.Reg.RegNum = IntPairRegs[regIdx / 2]; + Op.Reg.Kind = rk_IntPairReg; + return true; + } + static bool MorphToDoubleReg(SparcOperand &Op) { unsigned Reg = Op.getReg(); assert(Op.Reg.Kind == rk_FloatReg); @@ -1040,5 +1070,9 @@ break; } } + if (Op.isIntReg() && Kind == MCK_IntPair) { + if (SparcOperand::MorphToIntPairReg(Op)) + return MCTargetAsmParser::Match_Success; + } return Match_InvalidOperand; } Index: lib/Target/Sparc/Disassembler/SparcDisassembler.cpp =================================================================== --- lib/Target/Sparc/Disassembler/SparcDisassembler.cpp +++ lib/Target/Sparc/Disassembler/SparcDisassembler.cpp @@ -117,6 +117,13 @@ SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27, SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31}; +static const uint16_t IntPairDecoderTable[] = { + SP::G0_G1, SP::G2_G3, SP::G4_G5, SP::G6_G7, + SP::O0_O1, SP::O2_O3, SP::O4_O5, SP::O6_O7, + SP::L0_L1, SP::L2_L3, SP::L4_L5, SP::L6_L7, + SP::I0_I1, SP::I2_I3, SP::I4_I5, SP::I6_I7, +}; + static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -196,9 +203,25 @@ return MCDisassembler::Success; } +static DecodeStatus DecodeIntPairRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + if (RegNo > 31) + return MCDisassembler::Fail; + + if ((RegNo & 1)) + S = MCDisassembler::SoftFail; + + unsigned RegisterPair = IntPairDecoderTable[RegNo/2]; + Inst.addOperand(MCOperand::createReg(RegisterPair)); + return S; +} static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address, + const void *Decoder); static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeLoadDFP(MCInst &Inst, unsigned insn, uint64_t Address, @@ -207,6 +230,8 @@ const void *Decoder); static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeStoreDFP(MCInst &Inst, unsigned insn, @@ -326,6 +351,12 @@ DecodeIntRegsRegisterClass); } +static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address, + const void *Decoder) { + return DecodeMem(Inst, insn, Address, Decoder, true, + DecodeIntPairRegisterClass); +} + static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, true, @@ -350,6 +381,12 @@ DecodeIntRegsRegisterClass); } +static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn, + uint64_t Address, const void *Decoder) { + return DecodeMem(Inst, insn, Address, Decoder, false, + DecodeIntPairRegisterClass); +} + static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address, const void *Decoder) { return DecodeMem(Inst, insn, Address, Decoder, false, Index: lib/Target/Sparc/SparcFrameLowering.cpp =================================================================== --- lib/Target/Sparc/SparcFrameLowering.cpp +++ lib/Target/Sparc/SparcFrameLowering.cpp @@ -213,19 +213,24 @@ } void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const { - MachineRegisterInfo &MRI = MF.getRegInfo(); - // Remap %i[0-7] to %o[0-7]. for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) { if (!MRI.isPhysRegUsed(reg)) continue; - unsigned mapped_reg = (reg - SP::I0 + SP::O0); + unsigned mapped_reg = reg - SP::I0 + SP::O0; assert(!MRI.isPhysRegUsed(mapped_reg)); // Replace I register with O register. MRI.replaceRegWith(reg, mapped_reg); + // Also replace register pair super-registers. Note, these share + // "used" marks with their underlying actual-registers. + if ((reg - SP::I0) % 2 == 0) { + unsigned preg = (reg - SP::I0) / 2 + SP::I0_I1; + unsigned mapped_preg = preg - SP::I0_I1 + SP::O0_O1; + MRI.replaceRegWith(preg, mapped_preg); + } // Mark the reg unused. MRI.setPhysRegUnused(reg); } @@ -233,6 +238,12 @@ // Rewrite MBB's Live-ins. for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { + for (unsigned reg = SP::I0_I1; reg <= SP::I6_I7; ++reg) { + if (!MBB->isLiveIn(reg)) + continue; + MBB->removeLiveIn(reg); + MBB->addLiveIn(reg - SP::I0_I1 + SP::O0_O1); + } for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) { if (!MBB->isLiveIn(reg)) continue; Index: lib/Target/Sparc/SparcISelDAGToDAG.cpp =================================================================== --- lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "SparcTargetMachine.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Compiler.h" @@ -62,6 +63,7 @@ private: SDNode* getGlobalBaseReg(); + SDNode *SelectInlineAsm(SDNode *N); }; } // end anonymous namespace @@ -138,6 +140,174 @@ return true; } + +// Note: This function was copied from, and is essentially identical +// to ARMISelDAGToDAG::SelectInlineAsm. It is very unfortunate that +// such hacking-up is necessary; a rethink of how inline asm operands +// are handled may be in order to make doing this more sane. +SDNode *SparcDAGToDAGISel::SelectInlineAsm(SDNode *N){ + std::vector AsmNodeOperands; + unsigned Flag, Kind; + bool Changed = false; + unsigned NumOps = N->getNumOperands(); + + // Normally, i64 data is bounded to two arbitrary GPRs for "%r" + // constraint. However, some instructions (e.g. ldd/std) require + // (even/even+1) GPRs. + + // So, here, we check for this case, and mutate the inlineasm to use + // a single IntPair register instead, which guarantees such even/odd + // placement. + + SDLoc dl(N); + SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) + : SDValue(nullptr,0); + + SmallVector OpChanged; + // Glue node will be appended late. + for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { + SDValue op = N->getOperand(i); + AsmNodeOperands.push_back(op); + + if (i < InlineAsm::Op_FirstOperand) + continue; + + if (ConstantSDNode *C = dyn_cast(N->getOperand(i))) { + Flag = C->getZExtValue(); + Kind = InlineAsm::getKind(Flag); + } + else + continue; + + // Immediate operands to inline asm in the SelectionDAG are modeled with + // two operands. The first is a constant of value InlineAsm::Kind_Imm, and + // the second is a constant with the value of the immediate. If we get here + // and we have a Kind_Imm, skip the next operand, and continue. + if (Kind == InlineAsm::Kind_Imm) { + SDValue op = N->getOperand(++i); + AsmNodeOperands.push_back(op); + continue; + } + + unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); + if (NumRegs) + OpChanged.push_back(false); + + unsigned DefIdx = 0; + bool IsTiedToChangedOp = false; + // If it's a use that is tied with a previous def, it has no + // reg class constraint. + if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) + IsTiedToChangedOp = OpChanged[DefIdx]; + + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef + && Kind != InlineAsm::Kind_RegDefEarlyClobber) + continue; + + unsigned RC; + bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); + if ((!IsTiedToChangedOp && (!HasRC || RC != SP::IntRegsRegClassID)) + || NumRegs != 2) + continue; + + assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); + SDValue V0 = N->getOperand(i+1); + SDValue V1 = N->getOperand(i+2); + unsigned Reg0 = cast(V0)->getReg(); + unsigned Reg1 = cast(V1)->getReg(); + SDValue PairedReg; + MachineRegisterInfo &MRI = MF->getRegInfo(); + + if (Kind == InlineAsm::Kind_RegDef || + Kind == InlineAsm::Kind_RegDefEarlyClobber) { + // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to + // the original GPRs. + + unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32); + SDValue Chain = SDValue(N,0); + + SDNode *GU = N->getGluedUser(); + SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::v2i32, + Chain.getValue(1)); + + // Extract values from a GPRPair reg and copy to the original GPR reg. + SDValue Sub0 = CurDAG->getTargetExtractSubreg(SP::sub_even, dl, MVT::i32, + RegCopy); + SDValue Sub1 = CurDAG->getTargetExtractSubreg(SP::sub_odd, dl, MVT::i32, + RegCopy); + SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, + RegCopy.getValue(1)); + SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); + + // Update the original glue user. + std::vector Ops(GU->op_begin(), GU->op_end()-1); + Ops.push_back(T1.getValue(1)); + CurDAG->UpdateNodeOperands(GU, Ops); + } + else { + // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a + // GPRPair and then pass the GPRPair to the inline asm. + SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; + + // As REG_SEQ doesn't take RegisterSDNode, we copy them first. + SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, + Chain.getValue(1)); + SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, + T0.getValue(1)); + SDValue Pair = SDValue( + CurDAG->getMachineNode( + TargetOpcode::REG_SEQUENCE, dl, MVT::v2i32, + { + CurDAG->getTargetConstant(SP::IntPairRegClassID, dl, + MVT::i32), + T0, + CurDAG->getTargetConstant(SP::sub_even, dl, MVT::i32), + T1, + CurDAG->getTargetConstant(SP::sub_odd, dl, MVT::i32), + }), + 0); + + // Copy REG_SEQ into a GPRPair-typed VR and replace the original two + // i32 VRs of inline asm with it. + unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass); + PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32); + Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); + + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; + Glue = Chain.getValue(1); + } + + Changed = true; + + if(PairedReg.getNode()) { + OpChanged[OpChanged.size() -1 ] = true; + Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); + if (IsTiedToChangedOp) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); + else + Flag = InlineAsm::getFlagWordForRegClass(Flag, SP::IntPairRegClassID); + // Replace the current flag. + AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( + Flag, dl, MVT::i32); + // Add the new register node and skip the original two GPRs. + AsmNodeOperands.push_back(PairedReg); + // Skip the next two GPRs. + i += 2; + } + } + + if (Glue.getNode()) + AsmNodeOperands.push_back(Glue); + if (!Changed) + return nullptr; + + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), + CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); + New->setNodeId(-1); + return New.getNode(); +} + SDNode *SparcDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); if (N->isMachineOpcode()) { @@ -147,6 +317,12 @@ switch (N->getOpcode()) { default: break; + case ISD::INLINEASM: { + SDNode *ResNode = SelectInlineAsm(N); + if (ResNode) + return ResNode; + break; + } case SPISD::GLOBAL_BASE_REG: return getGlobalBaseReg(); Index: lib/Target/Sparc/SparcISelLowering.h =================================================================== --- lib/Target/Sparc/SparcISelLowering.h +++ lib/Target/Sparc/SparcISelLowering.h @@ -165,8 +165,8 @@ } void ReplaceNodeResults(SDNode *N, - SmallVectorImpl& Results, - SelectionDAG &DAG) const override; + SmallVectorImpl& Results, + SelectionDAG &DAG) const override; MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB, unsigned BROpcode) const; Index: lib/Target/Sparc/SparcISelLowering.cpp =================================================================== --- lib/Target/Sparc/SparcISelLowering.cpp +++ lib/Target/Sparc/SparcISelLowering.cpp @@ -1375,8 +1375,45 @@ addRegisterClass(MVT::f32, &SP::FPRegsRegClass); addRegisterClass(MVT::f64, &SP::DFPRegsRegClass); addRegisterClass(MVT::f128, &SP::QFPRegsRegClass); - if (Subtarget->is64Bit()) + if (Subtarget->is64Bit()) { addRegisterClass(MVT::i64, &SP::I64RegsRegClass); + } else { + // On 32bit sparc, we define a double-register 32bit register + // class, as well. This is modeled in LLVM as a 2-vector of i32. + addRegisterClass(MVT::v2i32, &SP::IntPairRegClass); + + // ...but almost all operations must be expanded, so set that as + // the default. + for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { + setOperationAction(Op, MVT::v2i32, Expand); + } + // FIXME: I'm not 100% sure if these are needed + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand); + + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, VT, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, VT, Expand); + + setTruncStoreAction(VT, MVT::v2i32, Expand); + setTruncStoreAction(MVT::v2i32, VT, Expand); + } + // However, load and store *are* legal. + setOperationAction(ISD::LOAD, MVT::v2i32, Legal); + setOperationAction(ISD::STORE, MVT::v2i32, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Legal); + + // And we need to promote i64 loads/stores into vector load/store + setOperationAction(ISD::LOAD, MVT::i64, Custom); + setOperationAction(ISD::STORE, MVT::i64, Custom); + + // Sadly, this doesn't work: + // AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); + // AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); + } // Turn FP extload into load/fextend for (MVT VT : MVT::fp_valuetypes()) { @@ -2596,6 +2633,17 @@ return DAG.getMergeValues(Ops, dl); } +static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) +{ + LoadSDNode *LdNode = cast(Op.getNode()); + + EVT MemVT = LdNode->getMemoryVT(); + if (MemVT == MVT::f128) + return LowerF128Load(Op, DAG); + + return Op; +} + // Lower a f128 store into two f64 stores. static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) { SDLoc dl(Op); @@ -2640,6 +2688,29 @@ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } +static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) +{ + SDLoc dl(Op); + StoreSDNode *St = cast(Op.getNode()); + + EVT MemVT = St->getMemoryVT(); + if (MemVT == MVT::f128) + return LowerF128Store(Op, DAG); + + if (MemVT == MVT::i64) { + // Custom handling for i64 stores: turn it into a bitcast and a + // v2i32 store. + SDValue Val = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, St->getValue()); + SDValue Chain = DAG.getStore( + St->getChain(), dl, Val, St->getBasePtr(), St->getPointerInfo(), + St->isVolatile(), St->isNonTemporal(), St->getAlignment(), + St->getAAInfo()); + return Chain; + } + + return Op; +} + static SDValue LowerFNEGorFABS(SDValue Op, SelectionDAG &DAG, bool isV9) { assert((Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS) && "invalid opcode"); @@ -2778,7 +2849,6 @@ return SDValue(); } - SDValue SparcTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -2813,8 +2883,8 @@ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget); - case ISD::LOAD: return LowerF128Load(Op, DAG); - case ISD::STORE: return LowerF128Store(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::FADD: return LowerF128Op(Op, DAG, getLibcallName(RTLIB::ADD_F128), 2); case ISD::FSUB: return LowerF128Op(Op, DAG, @@ -3144,9 +3214,12 @@ if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': - return std::make_pair(0U, &SP::IntRegsRegClass); + if (VT == MVT::v2i32) + return std::make_pair(0U, &SP::IntPairRegClass); + else + return std::make_pair(0U, &SP::IntRegsRegClass); } - } else if (!Constraint.empty() && Constraint.size() <= 5 + } else if (!Constraint.empty() && Constraint.size() <= 5 && Constraint[0] == '{' && *(Constraint.end()-1) == '}') { // constraint = '{r}' // Remove the braces from around the name. @@ -3222,5 +3295,24 @@ getLibcallName(libCall), 1)); return; + case ISD::LOAD: { + LoadSDNode *Ld = cast(N); + // Custom handling only for i64: turn i64 load into a v2i32 load, + // and a bitcast. + if (Ld->getValueType(0) != MVT::i64 || Ld->getMemoryVT() != MVT::i64) + return; + + SDLoc dl(N); + SDValue LoadRes = DAG.getExtLoad( + Ld->getExtensionType(), dl, MVT::v2i32, + Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), + MVT::v2i32, Ld->isVolatile(), Ld->isNonTemporal(), + Ld->isInvariant(), Ld->getAlignment(), Ld->getAAInfo()); + + SDValue Res = DAG.getNode(ISD::BITCAST, dl, MVT::i64, LoadRes); + Results.push_back(Res); + Results.push_back(LoadRes.getValue(1)); + return; + } } } Index: lib/Target/Sparc/SparcInstrInfo.cpp =================================================================== --- lib/Target/Sparc/SparcInstrInfo.cpp +++ lib/Target/Sparc/SparcInstrInfo.cpp @@ -284,7 +284,9 @@ unsigned numSubRegs = 0; unsigned movOpc = 0; const unsigned *subRegIdx = nullptr; + bool ExtraG0 = false; + const unsigned DW_SubRegsIdx[] = { SP::sub_even, SP::sub_odd }; const unsigned DFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd }; const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 }; const unsigned QFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd, @@ -294,7 +296,12 @@ if (SP::IntRegsRegClass.contains(DestReg, SrcReg)) BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0) .addReg(SrcReg, getKillRegState(KillSrc)); - else if (SP::FPRegsRegClass.contains(DestReg, SrcReg)) + else if (SP::IntPairRegClass.contains(DestReg, SrcReg)) { + subRegIdx = DW_SubRegsIdx; + numSubRegs = 2; + movOpc = SP::ORrr; + ExtraG0 = true; + } else if (SP::FPRegsRegClass.contains(DestReg, SrcReg)) BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) { @@ -338,7 +345,11 @@ unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]); assert(Dst && Src && "Bad sub-register"); - MovMI = BuildMI(MBB, I, DL, get(movOpc), Dst).addReg(Src); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(movOpc), Dst); + if (ExtraG0) + MIB.addReg(SP::G0); + MIB.addReg(Src); + MovMI = MIB.getInstr(); } // Add implicit super-register defs and kills to the last MovMI. MovMI->addRegisterDefined(DestReg, TRI); @@ -363,12 +374,15 @@ MFI.getObjectAlignment(FI)); // On the order of operands here: think "[FrameIdx + 0] = SrcReg". - if (RC == &SP::I64RegsRegClass) + if (RC == &SP::I64RegsRegClass) BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::IntRegsRegClass) BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); + else if (RC == &SP::IntPairRegClass) + BuildMI(MBB, I, DL, get(SP::STDri)).addFrameIndex(FI).addImm(0) + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); @@ -406,6 +420,9 @@ else if (RC == &SP::IntRegsRegClass) BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0) .addMemOperand(MMO); + else if (RC == &SP::IntPairRegClass) + BuildMI(MBB, I, DL, get(SP::LDDri), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); else if (RC == &SP::FPRegsRegClass) BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0) .addMemOperand(MMO); Index: lib/Target/Sparc/SparcInstrInfo.td =================================================================== --- lib/Target/Sparc/SparcInstrInfo.td +++ lib/Target/Sparc/SparcInstrInfo.td @@ -445,6 +445,9 @@ defm LD : LoadA<"ld", 0b000000, 0b010000, load, IntRegs, i32>; } +let DecoderMethod = "DecodeLoadIntPair" in + defm LDD : LoadA<"ldd", 0b000011, 0b010011, load, IntPair, v2i32>; + // Section B.2 - Load Floating-point Instructions, p. 92 let DecoderMethod = "DecodeLoadFP" in defm LDF : Load<"ld", 0b100000, load, FPRegs, f32>; @@ -461,6 +464,9 @@ defm ST : StoreA<"st", 0b000100, 0b010100, store, IntRegs, i32>; } +let DecoderMethod = "DecodeStoreIntPair" in + defm STD : StoreA<"std", 0b000111, 0b010111, store, IntPair, v2i32>; + // Section B.5 - Store Floating-point Instructions, p. 97 let DecoderMethod = "DecodeStoreFP" in defm STF : Store<"st", 0b100100, store, FPRegs, f32>; @@ -1300,6 +1306,18 @@ def : Pat<(atomic_store ADDRrr:$dst, i32:$val), (STrr ADDRrr:$dst, $val)>; def : Pat<(atomic_store ADDRri:$dst, i32:$val), (STri ADDRri:$dst, $val)>; +// extract_vector +def : Pat<(vector_extract (v2i32 IntPair:$Rn), 0), + (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_even))>; +def : Pat<(vector_extract (v2i32 IntPair:$Rn), 1), + (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_odd))>; + +// build_vector +def : Pat<(build_vector (i32 IntRegs:$a1), (i32 IntRegs:$a2)), + (INSERT_SUBREG + (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (i32 IntRegs:$a1), sub_even), + (i32 IntRegs:$a2), sub_odd)>; + include "SparcInstr64Bit.td" include "SparcInstrVIS.td" Index: lib/Target/Sparc/SparcRegisterInfo.cpp =================================================================== --- lib/Target/Sparc/SparcRegisterInfo.cpp +++ lib/Target/Sparc/SparcRegisterInfo.cpp @@ -75,6 +75,18 @@ Reserved.set(SP::G6); Reserved.set(SP::G7); + // Also reserve the register pair aliases covering the above + // registers, with the same conditions. + Reserved.set(SP::G0_G1); + if (ReserveAppRegisters) + Reserved.set(SP::G2_G3); + if (ReserveAppRegisters || !Subtarget.is64Bit()) + Reserved.set(SP::G4_G5); + + Reserved.set(SP::O6_O7); + Reserved.set(SP::I6_I7); + Reserved.set(SP::G6_G7); + // Unaliased double registers are not available in non-V9 targets. if (!Subtarget.isV9()) { for (unsigned n = 0; n != 16; ++n) { @@ -210,4 +222,3 @@ unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SP::I6; } - Index: lib/Target/Sparc/SparcRegisterInfo.td =================================================================== --- lib/Target/Sparc/SparcRegisterInfo.td +++ lib/Target/Sparc/SparcRegisterInfo.td @@ -32,6 +32,12 @@ // Ri - 32-bit integer registers class Ri Enc, string n> : SparcReg; +// Rdi - pairs of 32-bit integer registers +class Rdi Enc, string n, list subregs> : SparcReg { + let SubRegs = subregs; + let SubRegIndices = [sub_even, sub_odd]; + let CoveredBySubRegs = 1; +} // Rf - 32-bit floating-point registers class Rf Enc, string n> : SparcReg; @@ -217,6 +223,24 @@ def Q14 : Rq<25, "F56", [D28, D29]>; def Q15 : Rq<29, "F60", [D30, D31]>; +// Aliases of the integer registers used for LDD/STD double-word operations +def G0_G1 : Rdi<0, "G0", [G0, G1]>; +def G2_G3 : Rdi<2, "G2", [G2, G3]>; +def G4_G5 : Rdi<4, "G4", [G4, G5]>; +def G6_G7 : Rdi<6, "G6", [G6, G7]>; +def O0_O1 : Rdi<8, "O0", [O0, O1]>; +def O2_O3 : Rdi<10, "O2", [O2, O3]>; +def O4_O5 : Rdi<12, "O4", [O4, O5]>; +def O6_O7 : Rdi<14, "O6", [O6, O7]>; +def L0_L1 : Rdi<16, "L0", [L0, L1]>; +def L2_L3 : Rdi<18, "L2", [L2, L3]>; +def L4_L5 : Rdi<20, "L4", [L4, L5]>; +def L6_L7 : Rdi<22, "L6", [L6, L7]>; +def I0_I1 : Rdi<24, "I0", [I0, I1]>; +def I2_I3 : Rdi<26, "I2", [I2, I3]>; +def I4_I5 : Rdi<28, "I4", [I4, I5]>; +def I6_I7 : Rdi<30, "I6", [I6, I7]>; + // Register classes. // // FIXME: the register order should be defined in terms of the preferred @@ -231,6 +255,13 @@ (sequence "L%u", 0, 7), (sequence "O%u", 0, 7))>; +// Should be in the same order as IntRegs. +def IntPair : RegisterClass<"SP", [v2i32], 64, + (add I0_I1, I2_I3, I4_I5, I6_I7, + G0_G1, G2_G3, G4_G5, G6_G7, + L0_L1, L2_L3, L4_L5, L6_L7, + O0_O1, O2_O3, O4_O5, O6_O7)>; + // Register class for 64-bit mode, with a 64-bit spill slot size. // These are the same as the 32-bit registers, so TableGen will consider this // to be a sub-class of IntRegs. That works out because requiring a 64-bit Index: llvm.spec.in =================================================================== --- llvm.spec.in +++ llvm.spec.in @@ -65,3 +65,4 @@ * Mon Feb 09 2003 Brian R. Gaeke - Initial working version of RPM spec file. + Index: test/CodeGen/SPARC/basictest.ll =================================================================== --- test/CodeGen/SPARC/basictest.ll +++ test/CodeGen/SPARC/basictest.ll @@ -84,3 +84,16 @@ ret i64 %r } +; CHECK-LABEL: load_store_64bit: +; CHECK: ldd [%o0], %o2 +; CHECK: addcc %o3, 3, %o5 +; CHECK: addxcc %o2, 0, %o4 +; CHECK: retl +; CHECK: std %o4, [%o1] +define void @load_store_64bit(i64* %x, i64* %y) { +entry: + %0 = load i64, i64* %x + %add = add nsw i64 %0, 3 + store i64 %add, i64* %y + ret void +} Index: test/CodeGen/SPARC/inlineasm.ll =================================================================== --- test/CodeGen/SPARC/inlineasm.ll +++ test/CodeGen/SPARC/inlineasm.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=sparc -no-integrated-as <%s | FileCheck %s +; RUN: llc -march=sparc <%s | FileCheck %s ; CHECK-LABEL: test_constraint_r ; CHECK: add %o1, %o0, %o0 @@ -8,7 +8,7 @@ ret i32 %0 } -; CHECK-LABEL: test_constraint_I +; CHECK-LABEL: test_constraint_I: ; CHECK: add %o0, 1023, %o0 define i32 @test_constraint_I(i32 %a) { entry: @@ -16,7 +16,7 @@ ret i32 %0 } -; CHECK-LABEL: test_constraint_I_neg +; CHECK-LABEL: test_constraint_I_neg: ; CHECK: add %o0, -4096, %o0 define i32 @test_constraint_I_neg(i32 %a) { entry: @@ -24,7 +24,7 @@ ret i32 %0 } -; CHECK-LABEL: test_constraint_I_largeimm +; CHECK-LABEL: test_constraint_I_largeimm: ; CHECK: sethi 9, [[R0:%[gilo][0-7]]] ; CHECK: or [[R0]], 784, [[R1:%[gilo][0-7]]] ; CHECK: add %o0, [[R1]], %o0 @@ -34,12 +34,51 @@ ret i32 %0 } -; CHECK-LABEL: test_constraint_reg +; CHECK-LABEL: test_constraint_reg: ; CHECK: ldda [%o1] 43, %g2 -; CHECK: ldda [%o1] 43, %g3 +; CHECK: ldda [%o1] 43, %g4 define void @test_constraint_reg(i32 %s, i32* %ptr) { entry: %0 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={r2},r,n"(i32* %ptr, i32 43) - %1 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={g3},r,n"(i32* %ptr, i32 43) + %1 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={g4},r,n"(i32* %ptr, i32 43) ret void } + +;; Ensure that i64 args to asm are allocated to the IntPair register class. +;; Also checks that register renaming for leaf proc works. +; CHECK-LABEL: test_constraint_r_i64: +; CHECK: mov %o0, %o5 +; CHECK: sra %o5, 31, %o4 +; CHECK: std %o4, [%o1] +define i32 @test_constraint_r_i64(i32 %foo, i64* %out, i32 %o) { +entry: + %conv = sext i32 %foo to i64 + tail call void asm sideeffect "std $0, [$1]", "r,r,~{memory}"(i64 %conv, i64* %out) + ret i32 %o +} + +;; Same test without leaf-proc opt +; CHECK-LABEL: test_constraint_r_i64_noleaf: +; CHECK: mov %i0, %i5 +; CHECK: sra %i5, 31, %i4 +; CHECK: std %i4, [%i1] +define i32 @test_constraint_r_i64_noleaf(i32 %foo, i64* %out, i32 %o) #0 { +entry: + %conv = sext i32 %foo to i64 + tail call void asm sideeffect "std $0, [$1]", "r,r,~{memory}"(i64 %conv, i64* %out) + ret i32 %o +} +attributes #0 = { "no-frame-pointer-elim"="true" } + +;; Ensures that tied in and out gets allocated properly. +; CHECK-LABEL: test_i64_inout: +; CHECK: sethi 0, %i2 +; CHECK: mov 5, %i3 +; CHECK: xor %i2, %g0, %i2 +; CHECK: mov %i2, %i0 +; CHECK: ret +define i64 @test_i64_inout() { +entry: + %0 = call i64 asm sideeffect "xor $1, %g0, $0", "=r,0,~{i1}"(i64 5); + ret i64 %0 +} Index: test/CodeGen/SPARC/reserved-regs.ll =================================================================== --- /dev/null +++ test/CodeGen/SPARC/reserved-regs.ll @@ -0,0 +1,135 @@ +; RUN: llc -march=sparc < %s | FileCheck %s + +@g = common global [32 x i32] zeroinitializer, align 16 +@h = common global [16 x i64] zeroinitializer, align 16 + +;; Ensures that we don't use registers which are supposed to be reserved. + +; CHECK-LABEL: use_all_i32_regs: +; CHECK-NOT: %g0 +; CHECK-NOT: %g1 +; CHECK-NOT: %g5 +; CHECK-NOT: %g6 +; CHECK-NOT: %g7 +; CHECK-NOT: %o6 +; CHECK-NOT: %i6 +; CHECK-NOT: %i7 +; CHECK: ret +define void @use_all_i32_regs() { +entry: + %0 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 0), align 16 + %1 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 1), align 4 + %2 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 2), align 8 + %3 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 3), align 4 + %4 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 4), align 16 + %5 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 5), align 4 + %6 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 6), align 8 + %7 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 7), align 4 + %8 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 8), align 16 + %9 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 9), align 4 + %10 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 10), align 8 + %11 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 11), align 4 + %12 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 12), align 16 + %13 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 13), align 4 + %14 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 14), align 8 + %15 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 15), align 4 + %16 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 16), align 16 + %17 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 17), align 4 + %18 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 18), align 8 + %19 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 19), align 4 + %20 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 20), align 16 + %21 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 21), align 4 + %22 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 22), align 8 + %23 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 23), align 4 + %24 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 24), align 16 + %25 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 25), align 4 + %26 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 26), align 8 + %27 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 27), align 4 + %28 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 28), align 16 + %29 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 29), align 4 + %30 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 30), align 8 + %31 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 31), align 4 + store volatile i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 0), align 16 + store volatile i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 1), align 4 + store volatile i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 2), align 8 + store volatile i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 3), align 4 + store volatile i32 %5, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 4), align 16 + store volatile i32 %6, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 5), align 4 + store volatile i32 %7, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 6), align 8 + store volatile i32 %8, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 7), align 4 + store volatile i32 %9, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 8), align 16 + store volatile i32 %10, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 9), align 4 + store volatile i32 %11, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 10), align 8 + store volatile i32 %12, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 11), align 4 + store volatile i32 %13, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 12), align 16 + store volatile i32 %14, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 13), align 4 + store volatile i32 %15, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 14), align 8 + store volatile i32 %16, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 15), align 4 + store volatile i32 %17, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 16), align 16 + store volatile i32 %18, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 17), align 4 + store volatile i32 %19, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 18), align 8 + store volatile i32 %20, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 19), align 4 + store volatile i32 %21, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 20), align 16 + store volatile i32 %22, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 21), align 4 + store volatile i32 %23, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 22), align 8 + store volatile i32 %24, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 23), align 4 + store volatile i32 %25, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 24), align 16 + store volatile i32 %26, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 25), align 4 + store volatile i32 %27, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 26), align 8 + store volatile i32 %28, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 27), align 4 + store volatile i32 %29, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 28), align 16 + store volatile i32 %30, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 29), align 4 + store volatile i32 %31, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 30), align 8 + store volatile i32 %0, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 31), align 4 + ret void +} + + +; CHECK-LABEL: use_all_i64_regs: +; CHECK-NOT: %g0 +; CHECK-NOT: %g1 +; CHECK-NOT: %g4 +; CHECK-NOT: %g5 +; CHECK-NOT: %g6 +; CHECK-NOT: %g7 +; CHECK-NOT: %o6 +; CHECK-NOT: %o7 +; CHECK-NOT: %i6 +; CHECK-NOT: %i7 +; CHECK: ret +define void @use_all_i64_regs() { +entry: + %0 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 0), align 16 + %1 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 1), align 4 + %2 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 2), align 8 + %3 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 3), align 4 + %4 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 4), align 16 + %5 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 5), align 4 + %6 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 6), align 8 + %7 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 7), align 4 + %8 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 8), align 16 + %9 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 9), align 4 + %10 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 10), align 8 + %11 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 11), align 4 + %12 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 12), align 16 + %13 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 13), align 4 + %14 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 14), align 8 + %15 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 15), align 4 + store volatile i64 %1, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 0), align 16 + store volatile i64 %2, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 1), align 4 + store volatile i64 %3, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 2), align 8 + store volatile i64 %4, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 3), align 4 + store volatile i64 %5, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 4), align 16 + store volatile i64 %6, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 5), align 4 + store volatile i64 %7, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 6), align 8 + store volatile i64 %8, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 7), align 4 + store volatile i64 %9, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 8), align 16 + store volatile i64 %10, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 9), align 4 + store volatile i64 %11, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 10), align 8 + store volatile i64 %12, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 11), align 4 + store volatile i64 %13, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 12), align 16 + store volatile i64 %14, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 13), align 4 + store volatile i64 %15, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 14), align 8 + store volatile i64 %0, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 15), align 4 + ret void +} Index: test/CodeGen/SPARC/spill.ll =================================================================== --- /dev/null +++ test/CodeGen/SPARC/spill.ll @@ -0,0 +1,64 @@ +; RUN: llc -march=sparc < %s | FileCheck %s + +;; Ensure that spills and reloads work for various types on +;; sparcv8. + +;; For i32/i64 tests, use an asm statement which clobbers most +;; registers to ensure the spill will happen. + +; CHECK-LABEL: test_i32_spill: +; CHECK: and %i0, %i1, %o0 +; CHECK: st %o0, [%fp+{{.+}}] +; CHECK: add %o0, %o0, %g0 +; CHECK: ld [%fp+{{.+}}, %i0 +define i32 @test_i32_spill(i32 %a, i32 %b) { +entry: + %r0 = and i32 %a, %b + ; The clobber list has all registers except g0/o0. (Only o0 is usable.) + %0 = call i32 asm sideeffect "add $0,$1,%g0", "=r,0,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6},~{o7}"(i32 %r0) + ret i32 %r0 +} + +; CHECK-LABEL: test_i64_spill: +; CHECK: and %i0, %i2, %o0 +; CHECK: and %i1, %i3, %o1 +; CHECK: std %o0, [%fp+{{.+}}] +; CHECK: add %o0, %o0, %g0 +; CHECK: ldd [%fp+{{.+}}, %i0 +define i64 @test_i64_spill(i64 %a, i64 %b) { +entry: + %r0 = and i64 %a, %b + ; The clobber list has all registers except g0,g1,o0,o1. (Only o0/o1 are a usable pair) + ; So, o0/o1 must be used. + %0 = call i64 asm sideeffect "add $0,$1,%g0", "=r,0,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o2},~{o3},~{o4},~{o5},~{o7}"(i64 %r0) + ret i64 %r0 +} + +;; For float/double tests, a call is a suitable clobber as *all* FPU +;; registers are caller-save on sparcv8. + +; CHECK-LABEL: test_float_spill: +; CHECK: fadds %f1, %f0, [[R:%[f][0-31]]] +; CHECK: st [[R]], [%fp+{{.+}}] +; CHECK: call +; CHECK: ld [%fp+{{.+}}, %f0 +declare float @foo_float(float) +define float @test_float_spill(float %a, float %b) { +entry: + %r0 = fadd float %a, %b + %0 = call float @foo_float(float %r0) + ret float %r0 +} + +; CHECK-LABEL: test_double_spill: +; CHECK: faddd %f2, %f0, [[R:%[f][0-31]]] +; CHECK: std [[R]], [%fp+{{.+}}] +; CHECK: call +; CHECK: ldd [%fp+{{.+}}, %f0 +declare double @foo_double(double) +define double @test_double_spill(double %a, double %b) { +entry: + %r0 = fadd double %a, %b + %0 = call double @foo_double(double %r0) + ret double %r0 +} Index: test/MC/Disassembler/Sparc/sparc-mem.txt =================================================================== --- test/MC/Disassembler/Sparc/sparc-mem.txt +++ test/MC/Disassembler/Sparc/sparc-mem.txt @@ -221,3 +221,27 @@ # CHECK: swapa [%g1] 131, %o2 0xd4 0xf8 0x50 0x60 + +# CHECK: ldd [%i0+%l6], %o2 +0xd4 0x1e 0x00 0x16 + +# CHECK: ldd [%i0+32], %o2 +0xd4 0x1e 0x20 0x20 + +# CHECK: ldd [%g1], %o2 +0xd4 0x18 0x60 0x00 + +# CHECK: ldd [%g1], %o2 +0xd4 0x18 0x40 0x00 + +# CHECK: std %o2, [%i0+%l6] +0xd4 0x3e 0x00 0x16 + +# CHECK: std %o2, [%i0+32] +0xd4 0x3e 0x20 0x20 + +# CHECK: std %o2, [%g1] +0xd4 0x38 0x60 0x00 + +# CHECK: std %o2, [%g1] +0xd4 0x38 0x40 0x00 Index: test/MC/Sparc/sparc-mem-instructions.s =================================================================== --- test/MC/Sparc/sparc-mem-instructions.s +++ test/MC/Sparc/sparc-mem-instructions.s @@ -46,6 +46,15 @@ ! CHECK: lda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x86,0x10,0x76] lda [%i0 + %l6] 131, %o2 + ! CHECK: ldd [%i0+%l6], %o2 ! encoding: [0xd4,0x1e,0x00,0x16] + ldd [%i0 + %l6], %o2 + ! CHECK: ldd [%i0+32], %o2 ! encoding: [0xd4,0x1e,0x20,0x20] + ldd [%i0 + 32], %o2 + ! CHECK: ldd [%g1], %o2 ! encoding: [0xd4,0x18,0x40,0x00] + ldd [%g1], %o2 + ! CHECK: ldda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x9e,0x10,0x76] + ldda [%i0 + %l6] 131, %o2 + ! CHECK: stb %o2, [%i0+%l6] ! encoding: [0xd4,0x2e,0x00,0x16] stb %o2, [%i0 + %l6] ! CHECK: stb %o2, [%i0+32] ! encoding: [0xd4,0x2e,0x20,0x20] @@ -72,3 +81,12 @@ st %o2, [%g1] ! CHECK: sta %o2, [%i0+%l6] 131 ! encoding: [0xd4,0xa6,0x10,0x76] sta %o2, [%i0 + %l6] 131 + + ! CHECK: std %o2, [%i0+%l6] ! encoding: [0xd4,0x3e,0x00,0x16] + std %o2, [%i0 + %l6] + ! CHECK: std %o2, [%i0+32] ! encoding: [0xd4,0x3e,0x20,0x20] + std %o2, [%i0 + 32] + ! CHECK: std %o2, [%g1] ! encoding: [0xd4,0x38,0x40,0x00] + std %o2, [%g1] + ! CHECK: stda %o2, [%i0+%l6] 131 ! encoding: [0xd4,0xbe,0x10,0x76] + stda %o2, [%i0 + %l6] 131