Index: llvm/trunk/lib/Target/X86/X86CallLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86CallLowering.cpp +++ llvm/trunk/lib/Target/X86/X86CallLowering.cpp @@ -47,7 +47,9 @@ unsigned NumParts = TLI.getNumRegisters(Context, VT); if (NumParts == 1) { - SplitArgs.push_back(OrigArg); + // replace the original type ( pointer -> GPR ). + SplitArgs.emplace_back(OrigArg.Reg, VT.getTypeForEVT(Context), + OrigArg.Flags, OrigArg.IsFixed); return; } Index: llvm/trunk/lib/Target/X86/X86GenRegisterBankInfo.def =================================================================== --- llvm/trunk/lib/Target/X86/X86GenRegisterBankInfo.def +++ llvm/trunk/lib/Target/X86/X86GenRegisterBankInfo.def @@ -15,7 +15,7 @@ #error "You shouldn't build this" #endif -namespace llvm { +#ifdef GET_TARGET_REGBANK_INFO_IMPL RegisterBankInfo::PartialMapping X86GenRegisterBankInfo::PartMappings[]{ /* StartIdx, Length, RegBank */ // GPR value @@ -31,7 +31,9 @@ {0, 256, X86::VECRRegBank}, // :7 {0, 512, X86::VECRRegBank}, // :8 }; +#endif // GET_TARGET_REGBANK_INFO_IMPL +#ifdef GET_TARGET_REGBANK_INFO_CLASS enum PartialMappingIdx { PMI_None = -1, PMI_GPR8, @@ -44,7 +46,9 @@ PMI_VEC256, PMI_VEC512 }; +#endif // GET_TARGET_REGBANK_INFO_CLASS +#ifdef GET_TARGET_REGBANK_INFO_IMPL #define INSTR_3OP(INFO) INFO, INFO, INFO, #define BREAKDOWN(INDEX, NUM) \ { &X86GenRegisterBankInfo::PartMappings[INDEX], NUM } @@ -65,7 +69,9 @@ }; #undef INSTR_3OP #undef BREAKDOWN +#endif // GET_TARGET_REGBANK_INFO_IMPL +#ifdef GET_TARGET_REGBANK_INFO_CLASS enum ValueMappingIdx { VMI_None = -1, VMI_3OpsGpr8Idx = PMI_GPR8 * 3, @@ -78,5 +84,21 @@ VMI_3OpsVec256Idx = PMI_VEC256 * 3, VMI_3OpsVec512Idx = PMI_VEC512 * 3, }; +#undef GET_TARGET_REGBANK_INFO_CLASS +#endif // GET_TARGET_REGBANK_INFO_CLASS + +#ifdef GET_TARGET_REGBANK_INFO_IMPL +#undef GET_TARGET_REGBANK_INFO_IMPL +const RegisterBankInfo::ValueMapping * +X86GenRegisterBankInfo::getValueMapping(PartialMappingIdx Idx, + unsigned NumOperands) { + + // We can use VMI_3Ops Mapping for all the cases. + if (NumOperands <= 3 && (Idx >= PMI_GPR8 && Idx <= PMI_VEC512)) + return &ValMappings[(unsigned)Idx * 3]; + + llvm_unreachable("Unsupported PartialMappingIdx."); +} + +#endif // GET_TARGET_REGBANK_INFO_IMPL -} // End llvm namespace. Index: llvm/trunk/lib/Target/X86/X86InstructionSelector.h =================================================================== --- llvm/trunk/lib/Target/X86/X86InstructionSelector.h +++ llvm/trunk/lib/Target/X86/X86InstructionSelector.h @@ -27,6 +27,7 @@ class LLT; class RegisterBank; class MachineRegisterInfo; +class MachineFunction; class X86InstructionSelector : public InstructionSelector { public: @@ -45,7 +46,13 @@ unsigned getFSubOp(LLT &Ty, const RegisterBank &RB) const; unsigned getAddOp(LLT &Ty, const RegisterBank &RB) const; unsigned getSubOp(LLT &Ty, const RegisterBank &RB) const; - bool selectBinaryOp(MachineInstr &I, MachineRegisterInfo &MRI) const; + unsigned getLoadStoreOp(LLT &Ty, const RegisterBank &RB, unsigned Opc, + uint64_t Alignment) const; + + bool selectBinaryOp(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; + bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; const X86Subtarget &STI; const X86InstrInfo &TII; Index: llvm/trunk/lib/Target/X86/X86InstructionSelector.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstructionSelector.cpp +++ llvm/trunk/lib/Target/X86/X86InstructionSelector.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86InstructionSelector.h" +#include "X86InstrBuilder.h" #include "X86InstrInfo.h" #include "X86RegisterBankInfo.h" #include "X86RegisterInfo.h" @@ -154,7 +155,9 @@ // TODO: This should be implemented by tblgen, pattern with predicate not // supported yet. - if (selectBinaryOp(I, MRI)) + if (selectBinaryOp(I, MRI, MF)) + return true; + if (selectLoadStoreOp(I, MRI, MF)) return true; return selectImpl(I); @@ -269,15 +272,16 @@ } bool X86InstructionSelector::selectBinaryOp(MachineInstr &I, - MachineRegisterInfo &MRI) const { + MachineRegisterInfo &MRI, + MachineFunction &MF) const { - LLT Ty = MRI.getType(I.getOperand(0).getReg()); const unsigned DefReg = I.getOperand(0).getReg(); + LLT Ty = MRI.getType(DefReg); const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); unsigned NewOpc = I.getOpcode(); - switch (I.getOpcode()) { + switch (NewOpc) { case TargetOpcode::G_FADD: NewOpc = getFAddOp(Ty, RB); break; @@ -301,3 +305,87 @@ return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } + +unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB, + unsigned Opc, + uint64_t Alignment) const { + bool Isload = (Opc == TargetOpcode::G_LOAD); + bool HasAVX = STI.hasAVX(); + bool HasAVX512 = STI.hasAVX512(); + bool HasVLX = STI.hasVLX(); + + if (Ty == LLT::scalar(8)) { + if (X86::GPRRegBankID == RB.getID()) + return Isload ? X86::MOV8rm : X86::MOV8mr; + } else if (Ty == LLT::scalar(16)) { + if (X86::GPRRegBankID == RB.getID()) + return Isload ? X86::MOV16rm : X86::MOV16mr; + } else if (Ty == LLT::scalar(32)) { + if (X86::GPRRegBankID == RB.getID()) + return Isload ? X86::MOV32rm : X86::MOV32mr; + if (X86::VECRRegBankID == RB.getID()) + return Isload ? (HasAVX512 ? X86::VMOVSSZrm + : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) + : (HasAVX512 ? X86::VMOVSSZmr + : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr); + } else if (Ty == LLT::scalar(64)) { + if (X86::GPRRegBankID == RB.getID()) + return Isload ? X86::MOV64rm : X86::MOV64mr; + if (X86::VECRRegBankID == RB.getID()) + return Isload ? (HasAVX512 ? X86::VMOVSDZrm + : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) + : (HasAVX512 ? X86::VMOVSDZmr + : HasAVX ? X86::VMOVSDmr : X86::MOVSDmr); + } else if (Ty.isVector() && Ty.getSizeInBits() == 128) { + if (Alignment >= 16) + return Isload ? (HasVLX ? X86::VMOVAPSZ128rm + : HasAVX512 + ? X86::VMOVAPSZ128rm_NOVLX + : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) + : (HasVLX ? X86::VMOVAPSZ128mr + : HasAVX512 + ? X86::VMOVAPSZ128mr_NOVLX + : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr); + else + return Isload ? (HasVLX ? X86::VMOVUPSZ128rm + : HasAVX512 + ? X86::VMOVUPSZ128rm_NOVLX + : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) + : (HasVLX ? X86::VMOVUPSZ128mr + : HasAVX512 + ? X86::VMOVUPSZ128mr_NOVLX + : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); + } + return Opc; +} + +bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + + unsigned Opc = I.getOpcode(); + + if (Opc != TargetOpcode::G_STORE && Opc != TargetOpcode::G_LOAD) + return false; + + const unsigned DefReg = I.getOperand(0).getReg(); + LLT Ty = MRI.getType(DefReg); + const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); + + auto &MemOp = **I.memoperands_begin(); + unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlignment()); + if (NewOpc == Opc) + return false; + + I.setDesc(TII.get(NewOpc)); + MachineInstrBuilder MIB(MF, I); + if (Opc == TargetOpcode::G_LOAD) + addOffset(MIB, 0); + else { + // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL) + I.RemoveOperand(0); + addOffset(MIB, 0).addUse(DefReg); + } + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + Index: llvm/trunk/lib/Target/X86/X86LegalizerInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86LegalizerInfo.cpp +++ llvm/trunk/lib/Target/X86/X86LegalizerInfo.cpp @@ -37,13 +37,24 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { + if (Subtarget.is64Bit()) + return; + + const LLT p0 = LLT::pointer(0, 32); const LLT s8 = LLT::scalar(8); const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); - for (auto Ty : {s8, s16, s32}) { - setAction({G_ADD, Ty}, Legal); - setAction({G_SUB, Ty}, Legal); + for (unsigned BinOp : {G_ADD, G_SUB}) + for (auto Ty : {s8, s16, s32}) + setAction({BinOp, Ty}, Legal); + + for (unsigned MemOp : {G_LOAD, G_STORE}) { + for (auto Ty : {s8, s16, s32, p0}) + setAction({MemOp, Ty}, Legal); + + // And everything's fine in addrspace 0. + setAction({MemOp, 1, p0}, Legal); } } @@ -52,10 +63,23 @@ if (!Subtarget.is64Bit()) return; + const LLT p0 = LLT::pointer(0, 64); + const LLT s8 = LLT::scalar(8); + const LLT s16 = LLT::scalar(16); + const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); - setAction({G_ADD, s64}, Legal); - setAction({G_SUB, s64}, Legal); + for (unsigned BinOp : {G_ADD, G_SUB}) + for (auto Ty : {s8, s16, s32, s64}) + setAction({BinOp, Ty}, Legal); + + for (unsigned MemOp : {G_LOAD, G_STORE}) { + for (auto Ty : {s8, s16, s32, s64, p0}) + setAction({MemOp, Ty}, Legal); + + // And everything's fine in addrspace 0. + setAction({MemOp, 1, p0}, Legal); + } } void X86LegalizerInfo::setLegalizerInfoSSE1() { @@ -64,10 +88,15 @@ const LLT s32 = LLT::scalar(32); const LLT v4s32 = LLT::vector(4, 32); + const LLT v2s64 = LLT::vector(2, 64); for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV}) for (auto Ty : {s32, v4s32}) setAction({BinOp, Ty}, Legal); + + for (unsigned MemOp : {G_LOAD, G_STORE}) + for (auto Ty : {v4s32, v2s64}) + setAction({MemOp, Ty}, Legal); } void X86LegalizerInfo::setLegalizerInfoSSE2() { Index: llvm/trunk/lib/Target/X86/X86RegisterBankInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterBankInfo.h +++ llvm/trunk/lib/Target/X86/X86RegisterBankInfo.h @@ -21,13 +21,21 @@ namespace llvm { +class LLT; + class X86GenRegisterBankInfo : public RegisterBankInfo { protected: +#define GET_TARGET_REGBANK_CLASS +#include "X86GenRegisterBank.inc" +#define GET_TARGET_REGBANK_INFO_CLASS +#include "X86GenRegisterBankInfo.def" + static RegisterBankInfo::PartialMapping PartMappings[]; static RegisterBankInfo::ValueMapping ValMappings[]; -#define GET_TARGET_REGBANK_CLASS -#include "X86GenRegisterBank.inc" + static PartialMappingIdx getPartialMappingIdx(const LLT &Ty, bool isFP); + static const RegisterBankInfo::ValueMapping * + getValueMapping(PartialMappingIdx Idx, unsigned NumOperands); }; class TargetRegisterInfo; @@ -38,8 +46,8 @@ /// Get an instruction mapping. /// \return An InstructionMappings with a statically allocated /// OperandsMapping. - static InstructionMapping getOperandsMapping(const MachineInstr &MI, - bool isFP); + static InstructionMapping getSameOperandsMapping(const MachineInstr &MI, + bool isFP); public: X86RegisterBankInfo(const TargetRegisterInfo &TRI); Index: llvm/trunk/lib/Target/X86/X86RegisterBankInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterBankInfo.cpp +++ llvm/trunk/lib/Target/X86/X86RegisterBankInfo.cpp @@ -21,11 +21,11 @@ #define GET_TARGET_REGBANK_IMPL #include "X86GenRegisterBank.inc" +using namespace llvm; // This file will be TableGen'ed at some point. +#define GET_TARGET_REGBANK_INFO_IMPL #include "X86GenRegisterBankInfo.def" -using namespace llvm; - #ifndef LLVM_BUILD_GLOBAL_ISEL #error "You shouldn't build this" #endif @@ -64,72 +64,67 @@ llvm_unreachable("Unsupported register kind yet."); } -RegisterBankInfo::InstructionMapping -X86RegisterBankInfo::getOperandsMapping(const MachineInstr &MI, bool isFP) { - const MachineFunction &MF = *MI.getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - - unsigned NumOperands = MI.getNumOperands(); - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - - if (NumOperands != 3 || (Ty != MRI.getType(MI.getOperand(1).getReg())) || - (Ty != MRI.getType(MI.getOperand(2).getReg()))) - llvm_unreachable("Unsupported operand maping yet."); - - ValueMappingIdx ValMapIdx = VMI_None; - - if (Ty.isScalar()) { - if (!isFP) { - switch (Ty.getSizeInBits()) { - case 8: - ValMapIdx = VMI_3OpsGpr8Idx; - break; - case 16: - ValMapIdx = VMI_3OpsGpr16Idx; - break; - case 32: - ValMapIdx = VMI_3OpsGpr32Idx; - break; - case 64: - ValMapIdx = VMI_3OpsGpr64Idx; - break; - default: - llvm_unreachable("Unsupported register size."); - } - } else { - switch (Ty.getSizeInBits()) { - case 32: - ValMapIdx = VMI_3OpsFp32Idx; - break; - case 64: - ValMapIdx = VMI_3OpsFp64Idx; - break; - default: - llvm_unreachable("Unsupported register size."); - } +X86GenRegisterBankInfo::PartialMappingIdx +X86GenRegisterBankInfo::getPartialMappingIdx(const LLT &Ty, bool isFP) { + if ((Ty.isScalar() && !isFP) || Ty.isPointer()) { + switch (Ty.getSizeInBits()) { + case 8: + return PMI_GPR8; + case 16: + return PMI_GPR16; + case 32: + return PMI_GPR32; + case 64: + return PMI_GPR64; + break; + default: + llvm_unreachable("Unsupported register size."); + } + } else if (Ty.isScalar()) { + switch (Ty.getSizeInBits()) { + case 32: + return PMI_FP32; + case 64: + return PMI_FP64; + default: + llvm_unreachable("Unsupported register size."); } } else { switch (Ty.getSizeInBits()) { case 128: - ValMapIdx = VMI_3OpsVec128Idx; - break; + return PMI_VEC128; case 256: - ValMapIdx = VMI_3OpsVec256Idx; - break; + return PMI_VEC256; case 512: - ValMapIdx = VMI_3OpsVec512Idx; - break; + return PMI_VEC512; default: llvm_unreachable("Unsupported register size."); } } - return InstructionMapping{DefaultMappingID, 1, &ValMappings[ValMapIdx], - NumOperands}; + return PMI_None; +} + +RegisterBankInfo::InstructionMapping +X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI, bool isFP) { + const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + unsigned NumOperands = MI.getNumOperands(); + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + + if (NumOperands != 3 || (Ty != MRI.getType(MI.getOperand(1).getReg())) || + (Ty != MRI.getType(MI.getOperand(2).getReg()))) + llvm_unreachable("Unsupported operand mapping yet."); + + auto Mapping = getValueMapping(getPartialMappingIdx(Ty, isFP), 3); + return InstructionMapping{DefaultMappingID, 1, Mapping, NumOperands}; } RegisterBankInfo::InstructionMapping X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { + const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); auto Opc = MI.getOpcode(); // Try the default logic for non-generic instructions that are either copies @@ -143,17 +138,46 @@ switch (Opc) { case TargetOpcode::G_ADD: case TargetOpcode::G_SUB: - return getOperandsMapping(MI, false); + return getSameOperandsMapping(MI, false); break; case TargetOpcode::G_FADD: case TargetOpcode::G_FSUB: case TargetOpcode::G_FMUL: case TargetOpcode::G_FDIV: - return getOperandsMapping(MI, true); + return getSameOperandsMapping(MI, true); break; default: - return InstructionMapping{}; + break; + } + + unsigned NumOperands = MI.getNumOperands(); + unsigned Cost = 1; // set dafault cost + + // Track the bank of each register. + SmallVector OpRegBankIdx(NumOperands); + for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { + auto &MO = MI.getOperand(Idx); + if (!MO.isReg()) + continue; + + // As a top-level guess, use NotFP mapping (all scalars in GPRs) + OpRegBankIdx[Idx] = getPartialMappingIdx(MRI.getType(MO.getReg()), false); + } + + // Finally construct the computed mapping. + RegisterBankInfo::InstructionMapping Mapping = + InstructionMapping{DefaultMappingID, Cost, nullptr, NumOperands}; + SmallVector OpdsMapping(NumOperands); + for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { + if (MI.getOperand(Idx).isReg()) { + auto Mapping = getValueMapping(OpRegBankIdx[Idx], 1); + if (!Mapping->isValid()) + return InstructionMapping(); + + OpdsMapping[Idx] = Mapping; + } } - return InstructionMapping{}; + Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); + return Mapping; } Index: llvm/trunk/test/CodeGen/X86/GlobalISel/X86-regbankselect.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/X86-regbankselect.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/X86-regbankselect.mir @@ -46,6 +46,61 @@ ret <4 x float> %ret } + define i8 @test_load_i8(i8* %p1) { + %r = load i8, i8* %p1 + ret i8 %r + } + + define i16 @test_load_i16(i16* %p1) { + %r = load i16, i16* %p1 + ret i16 %r + } + + define i32 @test_load_i32(i32* %p1) { + %r = load i32, i32* %p1 + ret i32 %r + } + + define i64 @test_load_i64(i64* %p1) { + %r = load i64, i64* %p1 + ret i64 %r + } + + define float @test_load_float(float* %p1) { + %r = load float, float* %p1 + ret float %r + } + + define double @test_load_double(double* %p1) { + %r = load double, double* %p1 + ret double %r + } + + define <4 x i32> @test_load_v4i32(<4 x i32>* %p1) { + %r = load <4 x i32>, <4 x i32>* %p1, align 16 + ret <4 x i32> %r + } + + define i32* @test_store_i32(i32 %val, i32* %p1) { + store i32 %val, i32* %p1 + ret i32* %p1 + } + + define i64* @test_store_i64(i64 %val, i64* %p1) { + store i64 %val, i64* %p1 + ret i64* %p1 + } + + define float* @test_store_float(float %val, float* %p1) { + store float %val, float* %p1 + ret float* %p1 + } + + define double* @test_store_double(double %val, double* %p1) { + store double %val, double* %p1 + ret double* %p1 + } + ... --- name: test_add_i8 @@ -263,4 +318,270 @@ RET 0, implicit %xmm0 ... +--- +name: test_load_i8 +alignment: 4 +legalized: true +regBankSelected: false +selected: false +# CHECK-LABEL: name: test_load_i8 +# CHECK: registers: +# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 1, class: gpr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(s8) = G_LOAD %0(p0) :: (load 1 from %ir.p1) + %al = COPY %1(s8) + RET 0, implicit %al + +... +--- +name: test_load_i16 +alignment: 4 +legalized: true +regBankSelected: false +selected: false +# CHECK-LABEL: name: test_load_i16 +# CHECK: registers: +# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 1, class: gpr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(s16) = G_LOAD %0(p0) :: (load 2 from %ir.p1) + %ax = COPY %1(s16) + RET 0, implicit %ax + +... +--- +name: test_load_i32 +alignment: 4 +legalized: true +regBankSelected: false +selected: false +# CHECK-LABEL: name: test_load_i32 +# CHECK: registers: +# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 1, class: gpr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1) + %eax = COPY %1(s32) + RET 0, implicit %eax + +... +--- +name: test_load_i64 +alignment: 4 +exposesReturnsTwice: false +legalized: true +regBankSelected: false +selected: false +# CHECK-LABEL: name: test_load_i64 +# CHECK: registers: +# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 1, class: gpr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1) + %rax = COPY %1(s64) + RET 0, implicit %rax + +... +--- +name: test_load_float +alignment: 4 +legalized: true +regBankSelected: false +selected: false +# CHECK-LABEL: name: test_load_float +# CHECK: registers: +# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 1, class: gpr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1) + %xmm0 = COPY %1(s32) + RET 0, implicit %xmm0 + +... +--- +name: test_load_double +alignment: 4 +legalized: true +regBankSelected: false +selected: false +# CHECK-LABEL: name: test_load_double +# CHECK: registers: +# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 1, class: gpr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1) + %xmm0 = COPY %1(s64) + RET 0, implicit %xmm0 + +... +--- +name: test_load_v4i32 +alignment: 4 +legalized: true +regBankSelected: false +selected: false +# CHECK-LABEL: name: test_load_v4i32 +# CHECK: registers: +# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 1, class: vecr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.p1, align 1) + %xmm0 = COPY %1(<4 x s32>) + RET 0, implicit %xmm0 + +... +--- +name: test_store_i32 +alignment: 4 +legalized: true +regBankSelected: false +selected: false +# CHECK-LABEL: name: test_store_i32 +# CHECK: registers: +# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 1, class: gpr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %edi, %rsi + + %0(s32) = COPY %edi + %1(p0) = COPY %rsi + G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1) + %rax = COPY %1(p0) + RET 0, implicit %rax + +... +--- +name: test_store_i64 +alignment: 4 +legalized: true +regBankSelected: false +selected: false +# CHECK-LABEL: name: test_store_i64 +# CHECK: registers: +# CHECK: - { id: 0, class: gpr } +# CHECK: - { id: 1, class: gpr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %rsi + + %0(s64) = COPY %rdi + %1(p0) = COPY %rsi + G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1) + %rax = COPY %1(p0) + RET 0, implicit %rax + +... +--- +name: test_store_float +alignment: 4 +legalized: true +regBankSelected: false +selected: false +# CHECK-LABEL: name: test_store_float +# CHECK: registers: +# CHECK: - { id: 0, class: vecr } +# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 2, class: gpr } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %xmm0 + + %0(s32) = COPY %xmm0 + %1(p0) = COPY %rdi + ; CHECK: %2(s32) = COPY %0(s32) + ; CHECK: G_STORE %2(s32), %1(p0) :: (store 4 into %ir.p1) + G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1) + %rax = COPY %1(p0) + RET 0, implicit %rax + +... +--- +name: test_store_double +alignment: 4 +legalized: true +regBankSelected: false +selected: false +# CHECK-LABEL: name: test_store_double +# CHECK: registers: +# CHECK: - { id: 0, class: vecr } +# CHECK: - { id: 1, class: gpr } +# CHECK: - { id: 2, class: gpr } + +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %xmm0 + + %0(s64) = COPY %xmm0 + %1(p0) = COPY %rdi + ; CHECK: %2(s64) = COPY %0(s64) + ; CHECK: G_STORE %2(s64), %1(p0) :: (store 8 into %ir.p1) + G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1) + %rax = COPY %1(p0) + RET 0, implicit %rax + +... Index: llvm/trunk/test/CodeGen/X86/GlobalISel/binop-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/binop-isel.ll +++ llvm/trunk/test/CodeGen/X86/GlobalISel/binop-isel.ll @@ -155,3 +155,32 @@ %ret = fsub <4 x float> %arg1, %arg2 ret <4 x float> %ret } + +define i32 @test_copy_float(float %val) { +; SSE-LABEL: test_copy_float: +; SSE: # BB#0: +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_copy_float: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: vmovd %xmm0, %eax +; ALL_AVX-NEXT: retq + %r = bitcast float %val to i32 + ret i32 %r +} + +define float @test_copy_i32(i32 %val) { +; SSE-LABEL: test_copy_i32: +; SSE: # BB#0: +; SSE-NEXT: movd %edi, %xmm0 +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_copy_i32: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: vmovd %edi, %xmm0 +; ALL_AVX-NEXT: retq + %r = bitcast i32 %val to float + ret float %r +} + Index: llvm/trunk/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll +++ llvm/trunk/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll @@ -291,3 +291,20 @@ ret double %arg2 } + +define i32 * @test_memop_i32(i32 * %p1) { +; ALL-LABEL:name: test_memop_i32 +;X64 liveins: %rdi +;X64: %0(p0) = COPY %rdi +;X64-NEXT: %rax = COPY %0(p0) +;X64-NEXT: RET 0, implicit %rax + +;X32: fixedStack: +;X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false } +;X32: %1(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] +;X32-NEXT: %0(p0) = G_LOAD %1(p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) +;X32-NEXT: %eax = COPY %0(p0) +;X32-NEXT: RET 0, implicit %eax + + ret i32 * %p1; +} \ No newline at end of file Index: llvm/trunk/test/CodeGen/X86/GlobalISel/memop-isel.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/memop-isel.ll +++ llvm/trunk/test/CodeGen/X86/GlobalISel/memop-isel.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512F +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512VL + + +define i8 @test_load_i8(i8 * %p1) { +; ALL-LABEL: test_load_i8: +; ALL: # BB#0: +; ALL-NEXT: movb (%rdi), %al +; ALL-NEXT: retq + %r = load i8, i8* %p1 + ret i8 %r +} + +define i16 @test_load_i16(i16 * %p1) { +; ALL-LABEL: test_load_i16: +; ALL: # BB#0: +; ALL-NEXT: movzwl (%rdi), %eax +; ALL-NEXT: retq + %r = load i16, i16* %p1 + ret i16 %r +} + +define i32 @test_load_i32(i32 * %p1) { +; ALL-LABEL: test_load_i32: +; ALL: # BB#0: +; ALL-NEXT: movl (%rdi), %eax +; ALL-NEXT: retq + %r = load i32, i32* %p1 + ret i32 %r +} + +define i64 @test_load_i64(i64 * %p1) { +; ALL-LABEL: test_load_i64: +; ALL: # BB#0: +; ALL-NEXT: movq (%rdi), %rax +; ALL-NEXT: retq + %r = load i64, i64* %p1 + ret i64 %r +} + +define float @test_load_float(float * %p1) { +; SSE-LABEL: test_load_float: +; SSE: # BB#0: +; SSE-NEXT: movl (%rdi), %eax +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_load_float: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: movl (%rdi), %eax +; ALL_AVX-NEXT: vmovd %eax, %xmm0 +; ALL_AVX-NEXT: retq + %r = load float, float* %p1 + ret float %r +} + +define double @test_load_double(double * %p1) { +; SSE-LABEL: test_load_double: +; SSE: # BB#0: +; SSE-NEXT: movq (%rdi), %rax +; SSE-NEXT: movd %rax, %xmm0 +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_load_double: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: movq (%rdi), %rax +; ALL_AVX-NEXT: vmovq %rax, %xmm0 +; ALL_AVX-NEXT: retq + %r = load double, double* %p1 + ret double %r +} + +define <4 x i32> @test_load_v4i32_noalign(<4 x i32> * %p1) { +; SSE-LABEL: test_load_v4i32_noalign: +; SSE: # BB#0: +; SSE-NEXT: movups (%rdi), %xmm0 +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_load_v4i32_noalign: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: vmovups (%rdi), %xmm0 +; ALL_AVX-NEXT: retq + %r = load <4 x i32>, <4 x i32>* %p1, align 1 + ret <4 x i32> %r +} + +define <4 x i32> @test_load_v4i32_align(<4 x i32> * %p1) { +; SSE-LABEL: test_load_v4i32_align: +; SSE: # BB#0: +; SSE-NEXT: movaps (%rdi), %xmm0 +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_load_v4i32_align: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: vmovaps (%rdi), %xmm0 +; ALL_AVX-NEXT: retq + %r = load <4 x i32>, <4 x i32>* %p1, align 16 + ret <4 x i32> %r +} + +define i32 * @test_store_i32(i32 %val, i32 * %p1) { +; ALL-LABEL: test_store_i32: +; ALL: # BB#0: +; ALL-NEXT: movl %edi, (%rsi) +; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: retq + store i32 %val, i32* %p1 + ret i32 * %p1; +} + +define i64 * @test_store_i64(i64 %val, i64 * %p1) { +; ALL-LABEL: test_store_i64: +; ALL: # BB#0: +; ALL-NEXT: movq %rdi, (%rsi) +; ALL-NEXT: movq %rsi, %rax +; ALL-NEXT: retq + store i64 %val, i64* %p1 + ret i64 * %p1; +} + +define float * @test_store_float(float %val, float * %p1) { +; SSE-LABEL: test_store_float: +; SSE: # BB#0: +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: movl %eax, (%rdi) +; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_store_float: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: vmovd %xmm0, %eax +; ALL_AVX-NEXT: movl %eax, (%rdi) +; ALL_AVX-NEXT: movq %rdi, %rax +; ALL_AVX-NEXT: retq + store float %val, float* %p1 + ret float * %p1; +} + +define double * @test_store_double(double %val, double * %p1) { +; SSE-LABEL: test_store_double: +; SSE: # BB#0: +; SSE-NEXT: movd %xmm0, %rax +; SSE-NEXT: movq %rax, (%rdi) +; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_store_double: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: vmovq %xmm0, %rax +; ALL_AVX-NEXT: movq %rax, (%rdi) +; ALL_AVX-NEXT: movq %rdi, %rax +; ALL_AVX-NEXT: retq + store double %val, double* %p1 + ret double * %p1; +} + Index: llvm/trunk/test/CodeGen/X86/GlobalISel/x86_64-instructionselect.mir =================================================================== --- llvm/trunk/test/CodeGen/X86/GlobalISel/x86_64-instructionselect.mir +++ llvm/trunk/test/CodeGen/X86/GlobalISel/x86_64-instructionselect.mir @@ -63,6 +63,98 @@ %ret = fsub <4 x float> %arg1, %arg2 ret <4 x float> %ret } + + define i8 @test_load_i8(i8* %p1) { + %r = load i8, i8* %p1 + ret i8 %r + } + + define i16 @test_load_i16(i16* %p1) { + %r = load i16, i16* %p1 + ret i16 %r + } + + define i32 @test_load_i32(i32* %p1) { + %r = load i32, i32* %p1 + ret i32 %r + } + + define i64 @test_load_i64(i64* %p1) { + %r = load i64, i64* %p1 + ret i64 %r + } + + define float @test_load_float(float* %p1) { + %r = load float, float* %p1 + ret float %r + } + + define float @test_load_float_vecreg(float* %p1) { + %r = load float, float* %p1 + ret float %r + } + + + define double @test_load_double(double* %p1) { + %r = load double, double* %p1 + ret double %r + } + + define double @test_load_double_vecreg(double* %p1) { + %r = load double, double* %p1 + ret double %r + } + + define <4 x i32> @test_load_v4i32_noalign(<4 x i32>* %p1) { + %r = load <4 x i32>, <4 x i32>* %p1, align 1 + ret <4 x i32> %r + } + + define <4 x i32> @test_load_v4i32_align(<4 x i32>* %p1) { + %r = load <4 x i32>, <4 x i32>* %p1, align 16 + ret <4 x i32> %r + } + + define i32* @test_store_i32(i32 %val, i32* %p1) { + store i32 %val, i32* %p1 + ret i32* %p1 + } + + define i64* @test_store_i64(i64 %val, i64* %p1) { + store i64 %val, i64* %p1 + ret i64* %p1 + } + + define float* @test_store_float(float %val, float* %p1) { + store float %val, float* %p1 + ret float* %p1 + } + + define float* @test_store_float_vec(float %val, float* %p1) { + store float %val, float* %p1 + ret float* %p1 + } + + define double* @test_store_double(double %val, double* %p1) { + store double %val, double* %p1 + ret double* %p1 + } + + define double* @test_store_double_vec(double %val, double* %p1) { + store double %val, double* %p1 + ret double* %p1 + } + + define <4 x i32>* @test_store_v4i32_align(<4 x i32> %val, <4 x i32>* %p1) { + store <4 x i32> %val, <4 x i32>* %p1, align 16 + ret <4 x i32>* %p1 + } + + define <4 x i32>* @test_store_v4i32_noalign(<4 x i32> %val, <4 x i32>* %p1) { + store <4 x i32> %val, <4 x i32>* %p1, align 1 + ret <4 x i32>* %p1 + } + ... --- @@ -444,3 +536,487 @@ RET 0, implicit %xmm0 ... +--- +# ALL-LABEL: name: test_load_i8 +name: test_load_i8 +alignment: 4 +legalized: true +regBankSelected: true +registers: +# ALL: - { id: 0, class: gr64 } +# ALL: - { id: 1, class: gr8 } + - { id: 0, class: gpr } + - { id: 1, class: gpr } +# ALL: %0 = COPY %rdi +# ALL: %1 = MOV8rm %0, 1, _, 0, _ :: (load 1 from %ir.p1) +# ALL: %al = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(s8) = G_LOAD %0(p0) :: (load 1 from %ir.p1) + %al = COPY %1(s8) + RET 0, implicit %al + +... +--- +# ALL-LABEL: name: test_load_i16 +name: test_load_i16 +alignment: 4 +legalized: true +regBankSelected: true +registers: +# ALL: - { id: 0, class: gr64 } +# ALL: - { id: 1, class: gr16 } + - { id: 0, class: gpr } + - { id: 1, class: gpr } +# ALL: %0 = COPY %rdi +# ALL: %1 = MOV16rm %0, 1, _, 0, _ :: (load 2 from %ir.p1) +# ALL: %ax = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(s16) = G_LOAD %0(p0) :: (load 2 from %ir.p1) + %ax = COPY %1(s16) + RET 0, implicit %ax + +... +--- +# ALL-LABEL: name: test_load_i32 +name: test_load_i32 +alignment: 4 +legalized: true +regBankSelected: true +registers: +# ALL: - { id: 0, class: gr64 } +# ALL: - { id: 1, class: gr32 } + - { id: 0, class: gpr } + - { id: 1, class: gpr } +# ALL: %0 = COPY %rdi +# ALL: %1 = MOV32rm %0, 1, _, 0, _ :: (load 4 from %ir.p1) +# ALL: %eax = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1) + %eax = COPY %1(s32) + RET 0, implicit %eax + +... +--- +# ALL-LABEL: name: test_load_i64 +name: test_load_i64 +alignment: 4 +legalized: true +regBankSelected: true +registers: +# ALL: - { id: 0, class: gr64 } +# ALL: - { id: 1, class: gr64 } + - { id: 0, class: gpr } + - { id: 1, class: gpr } +# ALL: %0 = COPY %rdi +# ALL: %1 = MOV64rm %0, 1, _, 0, _ :: (load 8 from %ir.p1) +# ALL: %rax = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1) + %rax = COPY %1(s64) + RET 0, implicit %rax + +... +--- +# ALL-LABEL: name: test_load_float +name: test_load_float +alignment: 4 +legalized: true +regBankSelected: true +registers: +# ALL: - { id: 0, class: gr64 } +# ALL: - { id: 1, class: gr32 } + - { id: 0, class: gpr } + - { id: 1, class: gpr } +# ALL: %0 = COPY %rdi +# ALL: %1 = MOV32rm %0, 1, _, 0, _ :: (load 4 from %ir.p1) +# ALL: %xmm0 = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1) + %xmm0 = COPY %1(s32) + RET 0, implicit %xmm0 + +... +--- +# ALL-LABEL: name: test_load_float_vecreg +name: test_load_float_vecreg +alignment: 4 +legalized: true +regBankSelected: true +registers: +# ALL: - { id: 0, class: gr64 } +# NO_AVX512F: - { id: 1, class: fr32 } +# AVX512ALL: - { id: 1, class: fr32x } + - { id: 0, class: gpr } + - { id: 1, class: vecr } +# ALL: %0 = COPY %rdi +# SSE: %1 = MOVSSrm %0, 1, _, 0, _ :: (load 4 from %ir.p1) +# AVX: %1 = VMOVSSrm %0, 1, _, 0, _ :: (load 4 from %ir.p1) +# AVX512ALL: %1 = VMOVSSZrm %0, 1, _, 0, _ :: (load 4 from %ir.p1) +# ALL: %xmm0 = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(s32) = G_LOAD %0(p0) :: (load 4 from %ir.p1) + %xmm0 = COPY %1(s32) + RET 0, implicit %xmm0 + +... +--- +# ALL-LABEL: name: test_load_double +name: test_load_double +alignment: 4 +legalized: true +regBankSelected: true +registers: +# ALL: - { id: 0, class: gr64 } +# ALL: - { id: 1, class: gr64 } + - { id: 0, class: gpr } + - { id: 1, class: gpr } +# ALL: %0 = COPY %rdi +# ALL: %1 = MOV64rm %0, 1, _, 0, _ :: (load 8 from %ir.p1) +# ALL: %xmm0 = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1) + %xmm0 = COPY %1(s64) + RET 0, implicit %xmm0 + +... +--- +# ALL-LABEL: name: test_load_double_vecreg +name: test_load_double_vecreg +alignment: 4 +legalized: true +regBankSelected: true +registers: +# ALL: - { id: 0, class: gr64 } +# NO_AVX512F: - { id: 1, class: fr64 } +# AVX512ALL: - { id: 1, class: fr64x } + - { id: 0, class: gpr } + - { id: 1, class: vecr } +# ALL: %0 = COPY %rdi +# SSE: %1 = MOVSDrm %0, 1, _, 0, _ :: (load 8 from %ir.p1) +# AVX: %1 = VMOVSDrm %0, 1, _, 0, _ :: (load 8 from %ir.p1) +# AVX512ALL: %1 = VMOVSDZrm %0, 1, _, 0, _ :: (load 8 from %ir.p1) +# ALL: %xmm0 = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(s64) = G_LOAD %0(p0) :: (load 8 from %ir.p1) + %xmm0 = COPY %1(s64) + RET 0, implicit %xmm0 + +... +--- +# ALL-LABEL: name: test_load_v4i32_noalign +name: test_load_v4i32_noalign +alignment: 4 +legalized: true +regBankSelected: true +registers: +# ALL: - { id: 0, class: gr64 } +# NO_AVX512F: - { id: 1, class: vr128 } +# AVX512ALL: - { id: 1, class: vr128x } + - { id: 0, class: gpr } + - { id: 1, class: vecr } +# ALL: %0 = COPY %rdi +# SSE: %1 = MOVUPSrm %0, 1, _, 0, _ :: (load 16 from %ir.p1, align 1) +# AVX: %1 = VMOVUPSrm %0, 1, _, 0, _ :: (load 16 from %ir.p1, align 1) +# AVX512F: %1 = VMOVUPSZ128rm_NOVLX %0, 1, _, 0, _ :: (load 16 from %ir.p1, align 1) +# AVX512VL: %1 = VMOVUPSZ128rm %0, 1, _, 0, _ :: (load 16 from %ir.p1, align 1) +# ALL: %xmm0 = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.p1, align 1) + %xmm0 = COPY %1(<4 x s32>) + RET 0, implicit %xmm0 + +... +--- +# ALL-LABEL: name: test_load_v4i32_align +name: test_load_v4i32_align +alignment: 4 +legalized: true +regBankSelected: true +registers: +# ALL: - { id: 0, class: gr64 } +# NO_AVX512F: - { id: 1, class: vr128 } +# AVX512ALL: - { id: 1, class: vr128x } + - { id: 0, class: gpr } + - { id: 1, class: vecr } +# ALL: %0 = COPY %rdi +# SSE: %1 = MOVAPSrm %0, 1, _, 0, _ :: (load 16 from %ir.p1) +# AVX: %1 = VMOVAPSrm %0, 1, _, 0, _ :: (load 16 from %ir.p1) +# AVX512F: %1 = VMOVAPSZ128rm_NOVLX %0, 1, _, 0, _ :: (load 16 from %ir.p1) +# AVX512VL: %1 = VMOVAPSZ128rm %0, 1, _, 0, _ :: (load 16 from %ir.p1) +# ALL: %xmm0 = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi + + %0(p0) = COPY %rdi + %1(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.p1) + %xmm0 = COPY %1(<4 x s32>) + RET 0, implicit %xmm0 + +... +--- +# ALL-LABEL: name: test_store_i32 +name: test_store_i32 +alignment: 4 +legalized: true +regBankSelected: true +registers: +# ALL: - { id: 0, class: gr32 } +# ALL: - { id: 1, class: gr64 } + - { id: 0, class: gpr } + - { id: 1, class: gpr } +# ALL: %0 = COPY %edi +# ALL: %1 = COPY %rsi +# ALL: MOV32mr %1, 1, _, 0, _, %0 :: (store 4 into %ir.p1) +# ALL: %rax = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %edi, %rsi + + %0(s32) = COPY %edi + %1(p0) = COPY %rsi + G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1) + %rax = COPY %1(p0) + RET 0, implicit %rax + +... +--- +# ALL-LABEL: name: test_store_i64 +name: test_store_i64 +alignment: 4 +legalized: true +regBankSelected: true +registers: +# ALL: - { id: 0, class: gr64 } +# ALL: - { id: 1, class: gr64 } + - { id: 0, class: gpr } + - { id: 1, class: gpr } +# ALL: %0 = COPY %rdi +# ALL: %1 = COPY %rsi +# ALL: MOV64mr %1, 1, _, 0, _, %0 :: (store 8 into %ir.p1) +# ALL: %rax = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %rsi + + %0(s64) = COPY %rdi + %1(p0) = COPY %rsi + G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1) + %rax = COPY %1(p0) + RET 0, implicit %rax + +... +--- +# ALL-LABEL: name: test_store_float +name: test_store_float +alignment: 4 +legalized: true +regBankSelected: true +registers: +# ALL: - { id: 0, class: fr32x } +# ALL: - { id: 1, class: gr64 } +# ALL: - { id: 2, class: gr32 } + - { id: 0, class: vecr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } +# ALL: %0 = COPY %xmm0 +# ALL: %1 = COPY %rdi +# ALL: %2 = COPY %0 +# ALL: MOV32mr %1, 1, _, 0, _, %2 :: (store 4 into %ir.p1) +# ALL: %rax = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %xmm0 + + %0(s32) = COPY %xmm0 + %1(p0) = COPY %rdi + %2(s32) = COPY %0(s32) + G_STORE %2(s32), %1(p0) :: (store 4 into %ir.p1) + %rax = COPY %1(p0) + RET 0, implicit %rax + +... +--- +# ALL-LABEL: name: test_store_float_vec +name: test_store_float_vec +alignment: 4 +legalized: true +regBankSelected: true +registers: +# NO_AVX512F: - { id: 0, class: fr32 } +# AVX512ALL: - { id: 0, class: fr32x } +# ALL: - { id: 1, class: gr64 } + - { id: 0, class: vecr } + - { id: 1, class: gpr } +# ALL: %0 = COPY %xmm0 +# ALL: %1 = COPY %rdi +# SSE: MOVSSmr %1, 1, _, 0, _, %0 :: (store 4 into %ir.p1) +# AVX: VMOVSSmr %1, 1, _, 0, _, %0 :: (store 4 into %ir.p1) +# AVX512ALL: VMOVSSZmr %1, 1, _, 0, _, %0 :: (store 4 into %ir.p1) +# ALL: %rax = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %xmm0 + + %0(s32) = COPY %xmm0 + %1(p0) = COPY %rdi + G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1) + %rax = COPY %1(p0) + RET 0, implicit %rax + +... +--- +# ALL-LABEL: name: test_store_double +name: test_store_double +alignment: 4 +legalized: true +regBankSelected: true +registers: +# ALL: - { id: 0, class: fr64x } +# ALL: - { id: 1, class: gr64 } +# ALL: - { id: 2, class: gr64 } + - { id: 0, class: vecr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } +# ALL: %0 = COPY %xmm0 +# ALL: %1 = COPY %rdi +# ALL: %2 = COPY %0 +# ALL: MOV64mr %1, 1, _, 0, _, %2 :: (store 8 into %ir.p1) +# ALL: %rax = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %xmm0 + + %0(s64) = COPY %xmm0 + %1(p0) = COPY %rdi + %2(s64) = COPY %0(s64) + G_STORE %2(s64), %1(p0) :: (store 8 into %ir.p1) + %rax = COPY %1(p0) + RET 0, implicit %rax + +... +--- +# ALL-LABEL: name: test_store_double_vec +name: test_store_double_vec +alignment: 4 +legalized: true +regBankSelected: true +registers: +# NO_AVX512F: - { id: 0, class: fr64 } +# AVX512ALL: - { id: 0, class: fr64x } +# ALL: - { id: 1, class: gr64 } + - { id: 0, class: vecr } + - { id: 1, class: gpr } +# ALL: %0 = COPY %xmm0 +# ALL: %1 = COPY %rdi +# SSE: MOVSDmr %1, 1, _, 0, _, %0 :: (store 8 into %ir.p1) +# AVX: VMOVSDmr %1, 1, _, 0, _, %0 :: (store 8 into %ir.p1) +# AVX512ALL: VMOVSDZmr %1, 1, _, 0, _, %0 :: (store 8 into %ir.p1) +# ALL: %rax = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %xmm0 + + %0(s64) = COPY %xmm0 + %1(p0) = COPY %rdi + G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1) + %rax = COPY %1(p0) + RET 0, implicit %rax + +... +--- +# ALL-LABEL: name: test_store_v4i32_align +name: test_store_v4i32_align +alignment: 4 +legalized: true +regBankSelected: true +registers: +# NO_AVX512F: - { id: 0, class: vr128 } +# AVX512ALL: - { id: 0, class: vr128x } +# ALL: - { id: 1, class: gr64 } + - { id: 0, class: vecr } + - { id: 1, class: gpr } +# ALL: %0 = COPY %xmm0 +# ALL: %1 = COPY %rdi +# SSE: MOVAPSmr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1) +# AVX: VMOVAPSmr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1) +# AVX512F: VMOVAPSZ128mr_NOVLX %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1) +# AVX512VL: VMOVAPSZ128mr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1) +# ALL: %rax = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %xmm0 + + %0(<4 x s32>) = COPY %xmm0 + %1(p0) = COPY %rdi + G_STORE %0(<4 x s32>), %1(p0) :: (store 16 into %ir.p1, align 16) + %rax = COPY %1(p0) + RET 0, implicit %rax + +... +--- +# ALL-LABEL: name: test_store_v4i32_noalign +name: test_store_v4i32_noalign +alignment: 4 +legalized: true +regBankSelected: true +registers: +# NO_AVX512F: - { id: 0, class: vr128 } +# AVX512ALL: - { id: 0, class: vr128x } +# ALL: - { id: 1, class: gr64 } + - { id: 0, class: vecr } + - { id: 1, class: gpr } +# ALL: %0 = COPY %xmm0 +# ALL: %1 = COPY %rdi +# SSE: MOVUPSmr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1, align 1) +# AVX: VMOVUPSmr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1, align 1) +# AVX512F: VMOVUPSZ128mr_NOVLX %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1, align 1) +# AVX512VL: VMOVUPSZ128mr %1, 1, _, 0, _, %0 :: (store 16 into %ir.p1, align 1) +# ALL: %rax = COPY %1 +body: | + bb.1 (%ir-block.0): + liveins: %rdi, %xmm0 + + %0(<4 x s32>) = COPY %xmm0 + %1(p0) = COPY %rdi + G_STORE %0(<4 x s32>), %1(p0) :: (store 16 into %ir.p1, align 1) + %rax = COPY %1(p0) + RET 0, implicit %rax + +...