Index: clang/include/clang/Basic/BuiltinsMips.def =================================================================== --- clang/include/clang/Basic/BuiltinsMips.def +++ clang/include/clang/Basic/BuiltinsMips.def @@ -635,6 +635,9 @@ BUILTIN(__builtin_msa_ld_w, "V4Siv*Ii", "nc") BUILTIN(__builtin_msa_ld_d, "V2SLLiv*Ii", "nc") +BUILTIN(__builtin_msa_ldr_d, "V2SLLiv*Ii", "nc") +BUILTIN(__builtin_msa_ldrq_w, "V4Siv*Ii", "nc") + BUILTIN(__builtin_msa_ldi_b, "V16cIi", "nc") BUILTIN(__builtin_msa_ldi_h, "V8sIi", "nc") BUILTIN(__builtin_msa_ldi_w, "V4iIi", "nc") @@ -857,6 +860,9 @@ BUILTIN(__builtin_msa_st_w, "vV4Siv*Ii", "nc") BUILTIN(__builtin_msa_st_d, "vV2SLLiv*Ii", "nc") +BUILTIN(__builtin_msa_str_d, "vV2SLLiv*Ii", "nc") +BUILTIN(__builtin_msa_strq_w, "vV4Siv*Ii", "nc") + BUILTIN(__builtin_msa_subs_s_b, "V16ScV16ScV16Sc", "nc") BUILTIN(__builtin_msa_subs_s_h, "V8SsV8SsV8Ss", "nc") BUILTIN(__builtin_msa_subs_s_w, "V4SiV4SiV4Si", "nc") Index: clang/lib/Headers/msa.h =================================================================== --- clang/lib/Headers/msa.h +++ clang/lib/Headers/msa.h @@ -212,10 +212,14 @@ #define __msa_ld_h __builtin_msa_ld_h #define __msa_ld_w __builtin_msa_ld_w #define __msa_ld_d __builtin_msa_ld_d +#define __msa_ldr_d __builtin_msa_ldr_d +#define __msa_ldrq_w __builtin_msa_ldrq_w #define __msa_st_b __builtin_msa_st_b #define __msa_st_h __builtin_msa_st_h #define __msa_st_w __builtin_msa_st_w #define __msa_st_d __builtin_msa_st_d +#define __msa_str_d __builtin_msa_str_d +#define __msa_strq_w __builtin_msa_strq_w #define __msa_sat_s_b __builtin_msa_sat_s_b #define __msa_sat_s_h __builtin_msa_sat_s_h #define __msa_sat_s_w __builtin_msa_sat_s_w Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -2744,10 +2744,14 @@ case Mips::BI__builtin_msa_ld_h: i = 1; l = -1024; u = 1022; m = 2; break; case Mips::BI__builtin_msa_ld_w: i = 1; l = -2048; u = 2044; m = 4; break; case Mips::BI__builtin_msa_ld_d: i = 1; l = -4096; u = 4088; m = 8; break; + case Mips::BI__builtin_msa_ldr_d: i = 1; l = -4096; u = 4088; m = 8; break; + case Mips::BI__builtin_msa_ldrq_w: i = 1; l = -2048; u = 2044; m = 4; break; case Mips::BI__builtin_msa_st_b: i = 2; l = -512; u = 511; m = 1; break; case Mips::BI__builtin_msa_st_h: i = 2; l = -1024; u = 1022; m = 2; break; case Mips::BI__builtin_msa_st_w: i = 2; l = -2048; u = 2044; m = 4; break; case Mips::BI__builtin_msa_st_d: i = 2; l = -4096; u = 4088; m = 8; break; + case Mips::BI__builtin_msa_str_d: i = 2; l = -4096; u = 4088; m = 8; break; + case Mips::BI__builtin_msa_strq_w: i = 2; l = -2048; u = 2044; m = 4; break; } if (!m) Index: llvm/include/llvm/IR/IntrinsicsMips.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsMips.td +++ llvm/include/llvm/IR/IntrinsicsMips.td @@ -1271,6 +1271,13 @@ Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_mips_ldr_d : GCCBuiltin<"__builtin_msa_ldr_d">, + Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_mips_ldrq_w : GCCBuiltin<"__builtin_msa_ldrq_w">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly]>; + def int_mips_ldi_b : GCCBuiltin<"__builtin_msa_ldi_b">, Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>; def int_mips_ldi_h : GCCBuiltin<"__builtin_msa_ldi_h">, @@ -1695,6 +1702,13 @@ Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; +def int_mips_str_d : GCCBuiltin<"__builtin_msa_str_d">, + Intrinsic<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrArgMemOnly]>; +def int_mips_strq_w : GCCBuiltin<"__builtin_msa_strq_w">, + Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrArgMemOnly]>; + def int_mips_subs_s_b : GCCBuiltin<"__builtin_msa_subs_s_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_mips_subs_s_h : GCCBuiltin<"__builtin_msa_subs_s_h">, Index: llvm/lib/Target/Mips/MipsISelLowering.h =================================================================== --- llvm/lib/Target/Mips/MipsISelLowering.h +++ llvm/lib/Target/Mips/MipsISelLowering.h @@ -706,6 +706,12 @@ bool isFPCmp, unsigned Opc) const; MachineBasicBlock *emitPseudoD_SELECT(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitLDRQ_W(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitLDR_D(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitSTRQ_W(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *emitSTR_D(MachineInstr &MI, MachineBasicBlock *BB) const; }; /// Create MipsTargetLowering objects. Index: llvm/lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- llvm/lib/Target/Mips/MipsISelLowering.cpp +++ llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -1451,6 +1451,14 @@ case Mips::PseudoD_SELECT_I: case Mips::PseudoD_SELECT_I64: return emitPseudoD_SELECT(MI, BB); + case Mips::LDRQ_W: + return emitLDRQ_W(MI, BB); + case Mips::LDR_D: + return emitLDR_D(MI, BB); + case Mips::STRQ_W: + return emitSTRQ_W(MI, BB); + case Mips::STR_D: + return emitSTR_D(MI, BB); } } @@ -4715,3 +4723,274 @@ } report_fatal_error("Invalid register name global variable"); } + +MachineBasicBlock *MipsTargetLowering::emitLDRQ_W(MachineInstr &MI, + MachineBasicBlock *BB) const { + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const bool IsLittle = Subtarget.isLittle(); + DebugLoc DL = MI.getDebugLoc(); + + Register Dest = MI.getOperand(0).getReg(); + Register Address = MI.getOperand(1).getReg(); + unsigned Imm = MI.getOperand(2).getImm(); + + MachineBasicBlock::iterator I(MI); + + if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) { + // Mips release 6 can load from adress that is not naturally-aligned. + Register Temp = MRI.createVirtualRegister(&Mips::GPR32RegClass); + BuildMI(*BB, I, DL, TII->get(Mips::LW)) + .addDef(Temp) + .addUse(Address) + .addImm(Imm); + BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Dest).addUse(Temp); + } else { + // Mips release 5 needs to use instructions that can load from an unaligned + // memory address. + Register LoadHalf = MRI.createVirtualRegister(&Mips::GPR32RegClass); + Register LoadFull = MRI.createVirtualRegister(&Mips::GPR32RegClass); + Register Undef = MRI.createVirtualRegister(&Mips::GPR32RegClass); + BuildMI(*BB, I, DL, TII->get(Mips::IMPLICIT_DEF)).addDef(Undef); + BuildMI(*BB, I, DL, TII->get(Mips::LWR)) + .addDef(LoadHalf) + .addUse(Address) + .addImm(Imm + (IsLittle ? 0 : 3)) + .addUse(Undef); + BuildMI(*BB, I, DL, TII->get(Mips::LWL)) + .addDef(LoadFull) + .addUse(Address) + .addImm(Imm + (IsLittle ? 3 : 0)) + .addUse(LoadHalf); + BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Dest).addUse(LoadFull); + } + + MI.eraseFromParent(); + return BB; +} + +MachineBasicBlock *MipsTargetLowering::emitLDR_D(MachineInstr &MI, + MachineBasicBlock *BB) const { + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const bool IsLittle = Subtarget.isLittle(); + DebugLoc DL = MI.getDebugLoc(); + + Register Dest = MI.getOperand(0).getReg(); + Register Address = MI.getOperand(1).getReg(); + unsigned Imm = MI.getOperand(2).getImm(); + + MachineBasicBlock::iterator I(MI); + + if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) { + // Mips release 6 can load from adress that is not naturally-aligned. + if (Subtarget.isGP64bit()) { + Register Temp = MRI.createVirtualRegister(&Mips::GPR64RegClass); + BuildMI(*BB, I, DL, TII->get(Mips::LD)) + .addDef(Temp) + .addUse(Address) + .addImm(Imm); + BuildMI(*BB, I, DL, TII->get(Mips::FILL_D)).addDef(Dest).addUse(Temp); + } else { + Register Wtemp = MRI.createVirtualRegister(&Mips::MSA128WRegClass); + Register Lo = MRI.createVirtualRegister(&Mips::GPR32RegClass); + Register Hi = MRI.createVirtualRegister(&Mips::GPR32RegClass); + BuildMI(*BB, I, DL, TII->get(Mips::LW)) + .addDef(Lo) + .addUse(Address) + .addImm(Imm + (IsLittle ? 0 : 4)); + BuildMI(*BB, I, DL, TII->get(Mips::LW)) + .addDef(Hi) + .addUse(Address) + .addImm(Imm + (IsLittle ? 4 : 0)); + BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Wtemp).addUse(Lo); + BuildMI(*BB, I, DL, TII->get(Mips::INSERT_W), Dest) + .addUse(Wtemp) + .addUse(Hi) + .addImm(1); + } + } else { + // Mips release 5 needs to use instructions that can load from an unaligned + // memory address. + Register LoHalf = MRI.createVirtualRegister(&Mips::GPR32RegClass); + Register LoFull = MRI.createVirtualRegister(&Mips::GPR32RegClass); + Register LoUndef = MRI.createVirtualRegister(&Mips::GPR32RegClass); + Register HiHalf = MRI.createVirtualRegister(&Mips::GPR32RegClass); + Register HiFull = MRI.createVirtualRegister(&Mips::GPR32RegClass); + Register HiUndef = MRI.createVirtualRegister(&Mips::GPR32RegClass); + Register Wtemp = MRI.createVirtualRegister(&Mips::MSA128WRegClass); + BuildMI(*BB, I, DL, TII->get(Mips::IMPLICIT_DEF)).addDef(LoUndef); + BuildMI(*BB, I, DL, TII->get(Mips::LWR)) + .addDef(LoHalf) + .addUse(Address) + .addImm(Imm + (IsLittle ? 0 : 7)) + .addUse(LoUndef); + BuildMI(*BB, I, DL, TII->get(Mips::LWL)) + .addDef(LoFull) + .addUse(Address) + .addImm(Imm + (IsLittle ? 3 : 4)) + .addUse(LoHalf); + BuildMI(*BB, I, DL, TII->get(Mips::IMPLICIT_DEF)).addDef(HiUndef); + BuildMI(*BB, I, DL, TII->get(Mips::LWR)) + .addDef(HiHalf) + .addUse(Address) + .addImm(Imm + (IsLittle ? 4 : 3)) + .addUse(HiUndef); + BuildMI(*BB, I, DL, TII->get(Mips::LWL)) + .addDef(HiFull) + .addUse(Address) + .addImm(Imm + (IsLittle ? 7 : 0)) + .addUse(HiHalf); + BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Wtemp).addUse(LoFull); + BuildMI(*BB, I, DL, TII->get(Mips::INSERT_W), Dest) + .addUse(Wtemp) + .addUse(HiFull) + .addImm(1); + } + + MI.eraseFromParent(); + return BB; +} + +MachineBasicBlock *MipsTargetLowering::emitSTRQ_W(MachineInstr &MI, + MachineBasicBlock *BB) const { + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const bool IsLittle = Subtarget.isLittle(); + DebugLoc DL = MI.getDebugLoc(); + + Register StoreVal = MI.getOperand(0).getReg(); + Register Address = MI.getOperand(1).getReg(); + unsigned Imm = MI.getOperand(2).getImm(); + + MachineBasicBlock::iterator I(MI); + + if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) { + // Mips release 6 can store to adress that is not naturally-aligned. + Register BitcastW = MRI.createVirtualRegister(&Mips::MSA128WRegClass); + Register Tmp = MRI.createVirtualRegister(&Mips::GPR32RegClass); + BuildMI(*BB, I, DL, TII->get(Mips::COPY)).addDef(BitcastW).addUse(StoreVal); + BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W)) + .addDef(Tmp) + .addUse(BitcastW) + .addImm(0); + BuildMI(*BB, I, DL, TII->get(Mips::SW)) + .addUse(Tmp) + .addUse(Address) + .addImm(Imm); + } else { + // Mips release 5 needs to use instructions that can store to an unaligned + // memory address. + Register Tmp = MRI.createVirtualRegister(&Mips::GPR32RegClass); + BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W)) + .addDef(Tmp) + .addUse(StoreVal) + .addImm(0); + BuildMI(*BB, I, DL, TII->get(Mips::SWR)) + .addUse(Tmp) + .addUse(Address) + .addImm(Imm + (IsLittle ? 0 : 3)); + BuildMI(*BB, I, DL, TII->get(Mips::SWL)) + .addUse(Tmp) + .addUse(Address) + .addImm(Imm + (IsLittle ? 3 : 0)); + } + + MI.eraseFromParent(); + + return BB; +} + +MachineBasicBlock *MipsTargetLowering::emitSTR_D(MachineInstr &MI, + MachineBasicBlock *BB) const { + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const bool IsLittle = Subtarget.isLittle(); + DebugLoc DL = MI.getDebugLoc(); + + Register StoreVal = MI.getOperand(0).getReg(); + Register Address = MI.getOperand(1).getReg(); + unsigned Imm = MI.getOperand(2).getImm(); + + MachineBasicBlock::iterator I(MI); + + if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) { + // Mips release 6 can store to adress that is not naturally-aligned. + if (Subtarget.isGP64bit()) { + Register BitcastD = MRI.createVirtualRegister(&Mips::MSA128DRegClass); + Register Lo = MRI.createVirtualRegister(&Mips::GPR64RegClass); + BuildMI(*BB, I, DL, TII->get(Mips::COPY)) + .addDef(BitcastD) + .addUse(StoreVal); + BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_D)) + .addDef(Lo) + .addUse(BitcastD) + .addImm(0); + BuildMI(*BB, I, DL, TII->get(Mips::SD)) + .addUse(Lo) + .addUse(Address) + .addImm(Imm); + } else { + Register BitcastW = MRI.createVirtualRegister(&Mips::MSA128WRegClass); + Register Lo = MRI.createVirtualRegister(&Mips::GPR32RegClass); + Register Hi = MRI.createVirtualRegister(&Mips::GPR32RegClass); + BuildMI(*BB, I, DL, TII->get(Mips::COPY)) + .addDef(BitcastW) + .addUse(StoreVal); + BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W)) + .addDef(Lo) + .addUse(BitcastW) + .addImm(0); + BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W)) + .addDef(Hi) + .addUse(BitcastW) + .addImm(1); + BuildMI(*BB, I, DL, TII->get(Mips::SW)) + .addUse(Lo) + .addUse(Address) + .addImm(Imm + (IsLittle ? 0 : 4)); + BuildMI(*BB, I, DL, TII->get(Mips::SW)) + .addUse(Hi) + .addUse(Address) + .addImm(Imm + (IsLittle ? 4 : 0)); + } + } else { + // Mips release 5 needs to use instructions that can store to an unaligned + // memory address. + Register Bitcast = MRI.createVirtualRegister(&Mips::MSA128WRegClass); + Register Lo = MRI.createVirtualRegister(&Mips::GPR32RegClass); + Register Hi = MRI.createVirtualRegister(&Mips::GPR32RegClass); + BuildMI(*BB, I, DL, TII->get(Mips::COPY)).addDef(Bitcast).addUse(StoreVal); + BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W)) + .addDef(Lo) + .addUse(Bitcast) + .addImm(0); + BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W)) + .addDef(Hi) + .addUse(Bitcast) + .addImm(1); + BuildMI(*BB, I, DL, TII->get(Mips::SWR)) + .addUse(Lo) + .addUse(Address) + .addImm(Imm + (IsLittle ? 0 : 3)); + BuildMI(*BB, I, DL, TII->get(Mips::SWL)) + .addUse(Lo) + .addUse(Address) + .addImm(Imm + (IsLittle ? 3 : 0)); + BuildMI(*BB, I, DL, TII->get(Mips::SWR)) + .addUse(Hi) + .addUse(Address) + .addImm(Imm + (IsLittle ? 4 : 7)); + BuildMI(*BB, I, DL, TII->get(Mips::SWL)) + .addUse(Hi) + .addUse(Address) + .addImm(Imm + (IsLittle ? 7 : 4)); + } + + MI.eraseFromParent(); + return BB; +} Index: llvm/lib/Target/Mips/MipsMSAInstrInfo.td =================================================================== --- llvm/lib/Target/Mips/MipsMSAInstrInfo.td +++ llvm/lib/Target/Mips/MipsMSAInstrInfo.td @@ -2339,6 +2339,16 @@ class LDI_W_DESC : MSA_I10_LDI_DESC_BASE<"ldi.w", MSA128WOpnd>; class LDI_D_DESC : MSA_I10_LDI_DESC_BASE<"ldi.d", MSA128DOpnd>; +class MSA_LOAD_PSEUDO_BASE : + PseudoSE<(outs RO:$dst), (ins PtrRC:$ptr, GPR32:$imm), + [(set RO:$dst, (intrinsic iPTR:$ptr, GPR32:$imm))]> { + let hasNoSchedulingInfo = 1; + let usesCustomInserter = 1; +} + +def LDR_D : MSA_LOAD_PSEUDO_BASE; +def LDRQ_W : MSA_LOAD_PSEUDO_BASE; + class LSA_DESC_BASE { dag OutOperandList = (outs RORD:$rd); @@ -2671,6 +2681,16 @@ class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, MSA128DOpnd, mem_simm10_lsl3, addrimm10lsl3>; +class MSA_STORE_PSEUDO_BASE : + PseudoSE<(outs), (ins RO:$dst, PtrRC:$ptr, GPR32:$imm), + [(intrinsic RO:$dst, iPTR:$ptr, GPR32:$imm)]> { + let hasNoSchedulingInfo = 1; + let usesCustomInserter = 1; +} + +def STR_D : MSA_STORE_PSEUDO_BASE; +def STRQ_W : MSA_STORE_PSEUDO_BASE; + class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b, MSA128BOpnd>; class SUBS_S_H_DESC : MSA_3R_DESC_BASE<"subs_s.h", int_mips_subs_s_h, Index: llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -833,7 +833,9 @@ } case ISD::INTRINSIC_W_CHAIN: { - switch (cast(Node->getOperand(1))->getZExtValue()) { + const unsigned IntrinsicOpcode = + cast(Node->getOperand(1))->getZExtValue(); + switch (IntrinsicOpcode) { default: break; @@ -845,6 +847,40 @@ ReplaceNode(Node, Reg.getNode()); return true; } + case Intrinsic::mips_ldr_d: + case Intrinsic::mips_ldrq_w: { + unsigned Op = (IntrinsicOpcode == Intrinsic::mips_ldr_d) ? Mips::LDR_D + : Mips::LDRQ_W; + + SDLoc DL(Node); + assert(Node->getNumOperands() == 4 && "Unexpected number of operands."); + const SDValue &Chain = Node->getOperand(0); + const SDValue &Intrinsic = Node->getOperand(1); + const SDValue &Pointer = Node->getOperand(2); + const SDValue &Constant = Node->getOperand(3); + + assert(Chain.getValueType() == MVT::Other); + assert(Intrinsic.getOpcode() == ISD::TargetConstant && + Constant.getOpcode() == ISD::Constant && + "Invalid instruction operand."); + + // Convert Constant to TargetConstant. + const ConstantInt *Val = + cast(Constant)->getConstantIntValue(); + SDValue Imm = + CurDAG->getTargetConstant(*Val, DL, Constant.getValueType()); + + SmallVector Ops{Pointer, Imm, Chain}; + + assert(Node->getNumValues() == 2); + assert(Node->getValueType(0).is128BitVector()); + assert(Node->getValueType(1) == MVT::Other); + SmallVector ResTys{Node->getValueType(0), Node->getValueType(1)}; + + ReplaceNode(Node, CurDAG->getMachineNode(Op, DL, ResTys, Ops)); + + return true; + } } break; } @@ -866,7 +902,9 @@ } case ISD::INTRINSIC_VOID: { - switch (cast(Node->getOperand(1))->getZExtValue()) { + const unsigned IntrinsicOpcode = + cast(Node->getOperand(1))->getZExtValue(); + switch (IntrinsicOpcode) { default: break; @@ -879,6 +917,39 @@ ReplaceNode(Node, ChainOut.getNode()); return true; } + case Intrinsic::mips_str_d: + case Intrinsic::mips_strq_w: { + unsigned Op = (IntrinsicOpcode == Intrinsic::mips_str_d) ? Mips::STR_D + : Mips::STRQ_W; + + SDLoc DL(Node); + assert(Node->getNumOperands() == 5 && "Unexpected number of operands."); + const SDValue &Chain = Node->getOperand(0); + const SDValue &Intrinsic = Node->getOperand(1); + const SDValue &Vec = Node->getOperand(2); + const SDValue &Pointer = Node->getOperand(3); + const SDValue &Constant = Node->getOperand(4); + + assert(Chain.getValueType() == MVT::Other); + assert(Intrinsic.getOpcode() == ISD::TargetConstant && + Constant.getOpcode() == ISD::Constant && + "Invalid instruction operand."); + + // Convert Constant to TargetConstant. + const ConstantInt *Val = + cast(Constant)->getConstantIntValue(); + SDValue Imm = + CurDAG->getTargetConstant(*Val, DL, Constant.getValueType()); + + SmallVector Ops{Vec, Pointer, Imm, Chain}; + + assert(Node->getNumValues() == 1); + assert(Node->getValueType(0) == MVT::Other); + SmallVector ResTys{Node->getValueType(0)}; + + ReplaceNode(Node, CurDAG->getMachineNode(Op, DL, ResTys, Ops)); + return true; + } } break; } Index: llvm/test/CodeGen/Mips/msa/ldr_str.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Mips/msa/ldr_str.ll @@ -0,0 +1,224 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=mips -mcpu=mips32r5 -mattr=+msa,+fp64 -O0 < %s | FileCheck %s --check-prefix=MIPS32R5-EB +; RUN: llc -march=mipsel -mcpu=mips32r5 -mattr=+msa,+fp64 -O0 < %s | FileCheck %s --check-prefix=MIPS32R5-EL +; RUN: llc -march=mips -mcpu=mips32r6 -mattr=+msa,+fp64 -O0 < %s | FileCheck %s --check-prefix=MIPS32R6-EB +; RUN: llc -march=mipsel -mcpu=mips32r6 -mattr=+msa,+fp64 -O0 < %s | FileCheck %s --check-prefix=MIPS32R6-EL +; RUN: llc -march=mips64 -mcpu=mips64r6 -mattr=+msa,+fp64 -O0 < %s | FileCheck %s --check-prefix=MIPS64R6 +; RUN: llc -march=mips64el -mcpu=mips64r6 -mattr=+msa,+fp64 -O0 < %s | FileCheck %s --check-prefix=MIPS64R6 + +; Test intrinsics for 4-byte and 8-byte MSA load and stores. + +define void @llvm_mips_ldr_d_test(<2 x i64>* %val, i8* %ptr) nounwind { +; MIPS32R5-EB-LABEL: llvm_mips_ldr_d_test: +; MIPS32R5-EB: # %bb.0: # %entry +; MIPS32R5-EB-NEXT: # implicit-def: $at +; MIPS32R5-EB-NEXT: lwr $1, 23($5) +; MIPS32R5-EB-NEXT: lwl $1, 20($5) +; MIPS32R5-EB-NEXT: # implicit-def: $v0 +; MIPS32R5-EB-NEXT: lwr $2, 19($5) +; MIPS32R5-EB-NEXT: lwl $2, 16($5) +; MIPS32R5-EB-NEXT: fill.w $w0, $1 +; MIPS32R5-EB-NEXT: insert.w $w0[1], $2 +; MIPS32R5-EB-NEXT: st.d $w0, 0($4) +; MIPS32R5-EB-NEXT: jr $ra +; MIPS32R5-EB-NEXT: nop +; +; MIPS32R5-EL-LABEL: llvm_mips_ldr_d_test: +; MIPS32R5-EL: # %bb.0: # %entry +; MIPS32R5-EL-NEXT: # implicit-def: $at +; MIPS32R5-EL-NEXT: lwr $1, 16($5) +; MIPS32R5-EL-NEXT: lwl $1, 19($5) +; MIPS32R5-EL-NEXT: # implicit-def: $v0 +; MIPS32R5-EL-NEXT: lwr $2, 20($5) +; MIPS32R5-EL-NEXT: lwl $2, 23($5) +; MIPS32R5-EL-NEXT: fill.w $w0, $1 +; MIPS32R5-EL-NEXT: insert.w $w0[1], $2 +; MIPS32R5-EL-NEXT: st.d $w0, 0($4) +; MIPS32R5-EL-NEXT: jr $ra +; MIPS32R5-EL-NEXT: nop +; +; MIPS32R6-EB-LABEL: llvm_mips_ldr_d_test: +; MIPS32R6-EB: # %bb.0: # %entry +; MIPS32R6-EB-NEXT: lw $1, 20($5) +; MIPS32R6-EB-NEXT: lw $2, 16($5) +; MIPS32R6-EB-NEXT: fill.w $w0, $1 +; MIPS32R6-EB-NEXT: insert.w $w0[1], $2 +; MIPS32R6-EB-NEXT: st.d $w0, 0($4) +; MIPS32R6-EB-NEXT: jrc $ra +; +; MIPS32R6-EL-LABEL: llvm_mips_ldr_d_test: +; MIPS32R6-EL: # %bb.0: # %entry +; MIPS32R6-EL-NEXT: lw $1, 16($5) +; MIPS32R6-EL-NEXT: lw $2, 20($5) +; MIPS32R6-EL-NEXT: fill.w $w0, $1 +; MIPS32R6-EL-NEXT: insert.w $w0[1], $2 +; MIPS32R6-EL-NEXT: st.d $w0, 0($4) +; MIPS32R6-EL-NEXT: jrc $ra +; +; MIPS64R6-LABEL: llvm_mips_ldr_d_test: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: ld $1, 16($5) +; MIPS64R6-NEXT: fill.d $w0, $1 +; MIPS64R6-NEXT: st.d $w0, 0($4) +; MIPS64R6-NEXT: jrc $ra +entry: + %0 = tail call <2 x i64> @llvm.mips.ldr.d(i8* %ptr, i32 16) + store <2 x i64> %0, <2 x i64>* %val + ret void +} + +declare <2 x i64> @llvm.mips.ldr.d(i8*, i32) nounwind + +define void @llvm_mips_ldrq_w_test(<4 x i32>* %val, i8* %ptr) nounwind { +; MIPS32R5-EB-LABEL: llvm_mips_ldrq_w_test: +; MIPS32R5-EB: # %bb.0: # %entry +; MIPS32R5-EB-NEXT: # implicit-def: $at +; MIPS32R5-EB-NEXT: lwr $1, 19($5) +; MIPS32R5-EB-NEXT: lwl $1, 16($5) +; MIPS32R5-EB-NEXT: fill.w $w0, $1 +; MIPS32R5-EB-NEXT: st.w $w0, 0($4) +; MIPS32R5-EB-NEXT: jr $ra +; MIPS32R5-EB-NEXT: nop +; +; MIPS32R5-EL-LABEL: llvm_mips_ldrq_w_test: +; MIPS32R5-EL: # %bb.0: # %entry +; MIPS32R5-EL-NEXT: # implicit-def: $at +; MIPS32R5-EL-NEXT: lwr $1, 16($5) +; MIPS32R5-EL-NEXT: lwl $1, 19($5) +; MIPS32R5-EL-NEXT: fill.w $w0, $1 +; MIPS32R5-EL-NEXT: st.w $w0, 0($4) +; MIPS32R5-EL-NEXT: jr $ra +; MIPS32R5-EL-NEXT: nop +; +; MIPS32R6-EB-LABEL: llvm_mips_ldrq_w_test: +; MIPS32R6-EB: # %bb.0: # %entry +; MIPS32R6-EB-NEXT: lw $1, 16($5) +; MIPS32R6-EB-NEXT: fill.w $w0, $1 +; MIPS32R6-EB-NEXT: st.w $w0, 0($4) +; MIPS32R6-EB-NEXT: jrc $ra +; +; MIPS32R6-EL-LABEL: llvm_mips_ldrq_w_test: +; MIPS32R6-EL: # %bb.0: # %entry +; MIPS32R6-EL-NEXT: lw $1, 16($5) +; MIPS32R6-EL-NEXT: fill.w $w0, $1 +; MIPS32R6-EL-NEXT: st.w $w0, 0($4) +; MIPS32R6-EL-NEXT: jrc $ra +; +; MIPS64R6-LABEL: llvm_mips_ldrq_w_test: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: lw $1, 16($5) +; MIPS64R6-NEXT: fill.w $w0, $1 +; MIPS64R6-NEXT: st.w $w0, 0($4) +; MIPS64R6-NEXT: jrc $ra +entry: + %0 = tail call <4 x i32> @llvm.mips.ldrq.w(i8* %ptr, i32 16) + store <4 x i32> %0, <4 x i32>* %val + ret void +} + +declare <4 x i32> @llvm.mips.ldrq.w(i8*, i32) nounwind + +define void @llvm_mips_str_d_test(<2 x i64>* %val, i8* %ptr) nounwind { +; MIPS32R5-EB-LABEL: llvm_mips_str_d_test: +; MIPS32R5-EB: # %bb.0: # %entry +; MIPS32R5-EB-NEXT: ld.d $w0, 0($4) +; MIPS32R5-EB-NEXT: copy_s.w $1, $w0[0] +; MIPS32R5-EB-NEXT: copy_s.w $2, $w0[1] +; MIPS32R5-EB-NEXT: swr $1, 19($5) +; MIPS32R5-EB-NEXT: swl $1, 16($5) +; MIPS32R5-EB-NEXT: swr $2, 23($5) +; MIPS32R5-EB-NEXT: swl $2, 20($5) +; MIPS32R5-EB-NEXT: jr $ra +; MIPS32R5-EB-NEXT: nop +; +; MIPS32R5-EL-LABEL: llvm_mips_str_d_test: +; MIPS32R5-EL: # %bb.0: # %entry +; MIPS32R5-EL-NEXT: ld.d $w0, 0($4) +; MIPS32R5-EL-NEXT: copy_s.w $1, $w0[0] +; MIPS32R5-EL-NEXT: copy_s.w $2, $w0[1] +; MIPS32R5-EL-NEXT: swr $1, 16($5) +; MIPS32R5-EL-NEXT: swl $1, 19($5) +; MIPS32R5-EL-NEXT: swr $2, 20($5) +; MIPS32R5-EL-NEXT: swl $2, 23($5) +; MIPS32R5-EL-NEXT: jr $ra +; MIPS32R5-EL-NEXT: nop +; +; MIPS32R6-EB-LABEL: llvm_mips_str_d_test: +; MIPS32R6-EB: # %bb.0: # %entry +; MIPS32R6-EB-NEXT: ld.d $w0, 0($4) +; MIPS32R6-EB-NEXT: copy_s.w $1, $w0[0] +; MIPS32R6-EB-NEXT: copy_s.w $2, $w0[1] +; MIPS32R6-EB-NEXT: sw $1, 20($5) +; MIPS32R6-EB-NEXT: sw $2, 16($5) +; MIPS32R6-EB-NEXT: jrc $ra +; +; MIPS32R6-EL-LABEL: llvm_mips_str_d_test: +; MIPS32R6-EL: # %bb.0: # %entry +; MIPS32R6-EL-NEXT: ld.d $w0, 0($4) +; MIPS32R6-EL-NEXT: copy_s.w $1, $w0[0] +; MIPS32R6-EL-NEXT: copy_s.w $2, $w0[1] +; MIPS32R6-EL-NEXT: sw $1, 16($5) +; MIPS32R6-EL-NEXT: sw $2, 20($5) +; MIPS32R6-EL-NEXT: jrc $ra +; +; MIPS64R6-LABEL: llvm_mips_str_d_test: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: ld.d $w0, 0($4) +; MIPS64R6-NEXT: copy_s.d $1, $w0[0] +; MIPS64R6-NEXT: sd $1, 16($5) +; MIPS64R6-NEXT: jrc $ra +entry: + %0 = load <2 x i64>, <2 x i64>* %val + tail call void @llvm.mips.str.d(<2 x i64> %0, i8* %ptr, i32 16) + ret void +} + +declare void @llvm.mips.str.d(<2 x i64>, i8*, i32) nounwind + +define void @llvm_mips_strq_w_test(<4 x i32>* %val, i8* %ptr) nounwind { +; MIPS32R5-EB-LABEL: llvm_mips_strq_w_test: +; MIPS32R5-EB: # %bb.0: # %entry +; MIPS32R5-EB-NEXT: ld.w $w0, 0($4) +; MIPS32R5-EB-NEXT: copy_s.w $1, $w0[0] +; MIPS32R5-EB-NEXT: swr $1, 19($5) +; MIPS32R5-EB-NEXT: swl $1, 16($5) +; MIPS32R5-EB-NEXT: jr $ra +; MIPS32R5-EB-NEXT: nop +; +; MIPS32R5-EL-LABEL: llvm_mips_strq_w_test: +; MIPS32R5-EL: # %bb.0: # %entry +; MIPS32R5-EL-NEXT: ld.w $w0, 0($4) +; MIPS32R5-EL-NEXT: copy_s.w $1, $w0[0] +; MIPS32R5-EL-NEXT: swr $1, 16($5) +; MIPS32R5-EL-NEXT: swl $1, 19($5) +; MIPS32R5-EL-NEXT: jr $ra +; MIPS32R5-EL-NEXT: nop +; +; MIPS32R6-EB-LABEL: llvm_mips_strq_w_test: +; MIPS32R6-EB: # %bb.0: # %entry +; MIPS32R6-EB-NEXT: ld.w $w0, 0($4) +; MIPS32R6-EB-NEXT: copy_s.w $1, $w0[0] +; MIPS32R6-EB-NEXT: sw $1, 16($5) +; MIPS32R6-EB-NEXT: jrc $ra +; +; MIPS32R6-EL-LABEL: llvm_mips_strq_w_test: +; MIPS32R6-EL: # %bb.0: # %entry +; MIPS32R6-EL-NEXT: ld.w $w0, 0($4) +; MIPS32R6-EL-NEXT: copy_s.w $1, $w0[0] +; MIPS32R6-EL-NEXT: sw $1, 16($5) +; MIPS32R6-EL-NEXT: jrc $ra +; +; MIPS64R6-LABEL: llvm_mips_strq_w_test: +; MIPS64R6: # %bb.0: # %entry +; MIPS64R6-NEXT: ld.w $w0, 0($4) +; MIPS64R6-NEXT: copy_s.w $1, $w0[0] +; MIPS64R6-NEXT: sw $1, 16($5) +; MIPS64R6-NEXT: jrc $ra +entry: + %0 = load <4 x i32>, <4 x i32>* %val + tail call void @llvm.mips.strq.w(<4 x i32> %0, i8* %ptr, i32 16) + ret void +} + +declare void @llvm.mips.strq.w(<4 x i32>, i8*, i32) nounwind +