Index: lib/Target/X86/X86GenRegisterBankInfo.def =================================================================== --- lib/Target/X86/X86GenRegisterBankInfo.def +++ lib/Target/X86/X86GenRegisterBankInfo.def @@ -19,10 +19,17 @@ RegisterBankInfo::PartialMapping X86GenRegisterBankInfo::PartMappings[]{ /* StartIdx, Length, RegBank */ // GPR value - {0, 8, X86::GPRRegBank}, // :0 - {0, 16, X86::GPRRegBank}, // :1 - {0, 32, X86::GPRRegBank}, // :2 - {0, 64, X86::GPRRegBank}, // :3 + {0, 8, X86::GPRRegBank}, // :0 + {0, 16, X86::GPRRegBank}, // :1 + {0, 32, X86::GPRRegBank}, // :2 + {0, 64, X86::GPRRegBank}, // :3 + // FR32/64 , xmm registers + {0, 32, X86::VECRRegBank}, // :4 + {0, 64, X86::VECRRegBank}, // :5 + // VR128/256/512 + {0, 128, X86::VECRRegBank}, // :6 + {0, 256, X86::VECRRegBank}, // :7 + {0, 512, X86::VECRRegBank}, // :8 }; enum PartialMappingIdx { @@ -31,6 +38,11 @@ PMI_GPR16, PMI_GPR32, PMI_GPR64, + PMI_FP32, + PMI_FP64, + PMI_VEC128, + PMI_VEC256, + PMI_VEC512 }; #define INSTR_3OP(INFO) INFO, INFO, INFO, @@ -44,17 +56,27 @@ INSTR_3OP(BREAKDOWN(PMI_GPR8, 1)) // 0: GPR_8 INSTR_3OP(BREAKDOWN(PMI_GPR16, 1)) // 3: GPR_16 INSTR_3OP(BREAKDOWN(PMI_GPR32, 1)) // 6: GPR_32 - INSTR_3OP(BREAKDOWN(PMI_GPR64, 1)) // 9: GPR_64 + INSTR_3OP(BREAKDOWN(PMI_GPR64, 1)) // 9: GPR_64 + INSTR_3OP(BREAKDOWN(PMI_FP32, 1)) // 12: Fp32 + INSTR_3OP(BREAKDOWN(PMI_FP64, 1)) // 15: Fp64 + INSTR_3OP(BREAKDOWN(PMI_VEC128, 1)) // 18: Vec128 + INSTR_3OP(BREAKDOWN(PMI_VEC256, 1)) // 21: Vec256 + INSTR_3OP(BREAKDOWN(PMI_VEC512, 1)) // 24: Vec512 }; #undef INSTR_3OP #undef BREAKDOWN enum ValueMappingIdx { VMI_None = -1, - VMI_3OpsGpr8Idx = 0, - VMI_3OpsGpr16Idx = 3, - VMI_3OpsGpr32Idx = 6, - VMI_3OpsGpr64Idx = 9, + VMI_3OpsGpr8Idx = PMI_GPR8 * 3, + VMI_3OpsGpr16Idx = PMI_GPR16 * 3, + VMI_3OpsGpr32Idx = PMI_GPR32 * 3, + VMI_3OpsGpr64Idx = PMI_GPR64 * 3, + VMI_3OpsFp32Idx = PMI_FP32 * 3, + VMI_3OpsFp64Idx = PMI_FP64 * 3, + VMI_3OpsVec128Idx = PMI_VEC128 * 3, + VMI_3OpsVec256Idx = PMI_VEC256 * 3, + VMI_3OpsVec512Idx = PMI_VEC512 * 3, }; } // End llvm namespace. Index: lib/Target/X86/X86InstructionSelector.h =================================================================== --- lib/Target/X86/X86InstructionSelector.h +++ lib/Target/X86/X86InstructionSelector.h @@ -22,6 +22,9 @@ class X86RegisterInfo; class X86Subtarget; class X86TargetMachine; +class LLT; +class RegisterBank; +class MachineRegisterInfo; class X86InstructionSelector : public InstructionSelector { public: @@ -35,6 +38,14 @@ /// the patterns that don't require complex C++. bool selectImpl(MachineInstr &I) const; + // TODO: remove after selectImpl support pattern with a predicate. + unsigned getFAddOp(LLT &Ty, const RegisterBank &RB) const; + unsigned getFSubOp(LLT &Ty, const RegisterBank &RB) const; + unsigned getAddOp(LLT &Ty, const RegisterBank &RB) const; + unsigned getSubOp(LLT &Ty, const RegisterBank &RB) const; + bool selectBinaryOp(MachineInstr &I, MachineRegisterInfo &MRI) const; + + const X86Subtarget &STI; const X86InstrInfo &TII; const X86RegisterInfo &TRI; const X86RegisterBankInfo &RBI; Index: lib/Target/X86/X86InstructionSelector.cpp =================================================================== --- lib/Target/X86/X86InstructionSelector.cpp +++ lib/Target/X86/X86InstructionSelector.cpp @@ -39,7 +39,7 @@ X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI, const X86RegisterBankInfo &RBI) - : InstructionSelector(), TII(*STI.getInstrInfo()), + : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI) {} // FIXME: This should be target-independent, inferred from the types declared @@ -47,11 +47,23 @@ static const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) { if (RB.getID() == X86::GPRRegBankID) { - if (Ty.getSizeInBits() <= 32) + if (Ty.getSizeInBits() == 32) return &X86::GR32RegClass; if (Ty.getSizeInBits() == 64) return &X86::GR64RegClass; } + if (RB.getID() == X86::VECRRegBankID) { + if (Ty.getSizeInBits() == 32) + return &X86::FR32XRegClass; + if (Ty.getSizeInBits() == 64) + return &X86::FR64XRegClass; + if (Ty.getSizeInBits() == 128) + return &X86::VR128XRegClass; + if (Ty.getSizeInBits() == 256) + return &X86::VR256XRegClass; + if (Ty.getSizeInBits() == 512) + return &X86::VR512RegClass; + } llvm_unreachable("Unknown RegBank!"); } @@ -89,6 +101,9 @@ assert((DstSize <= 64) && "GPRs cannot get more than 64-bit width values."); RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank); break; + case X86::VECRRegBankID: + RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank); + break; default: llvm_unreachable("Unknown RegBank!"); } @@ -96,10 +111,13 @@ // No need to constrain SrcReg. It will get constrained when // we hit another of its use or its defs. // Copies do not have constraints. - if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { - DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) - << " operand\n"); - return false; + const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg); + if (!OldRC || !RC->hasSubClassEq(OldRC)) { + if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { + DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) + << " operand\n"); + return false; + } } I.setDesc(TII.get(X86::COPY)); return true; @@ -127,5 +145,152 @@ assert(I.getNumOperands() == I.getNumExplicitOperands() && "Generic instruction has unexpected implicit operands\n"); + // TODO: This should be implemented by tblgen, pattern with predicate not supported yet. + if (selectBinaryOp(I, MRI)) + return true; + return selectImpl(I); } + +unsigned X86InstructionSelector::getFAddOp(LLT &Ty, + const RegisterBank &RB) const { + + if (X86::VECRRegBankID != RB.getID()) + return TargetOpcode::G_FADD; + + if (Ty == LLT::scalar(32)) { + if (STI.hasAVX512()) { + return X86::VADDSSZrr; + } else if (STI.hasAVX()) { + return X86::VADDSSrr; + } else if (STI.hasSSE1()) { + return X86::ADDSSrr; + } + } else if (Ty == LLT::scalar(64)) { + if (STI.hasAVX512()) { + return X86::VADDSDZrr; + } else if (STI.hasAVX()) { + return X86::VADDSDrr; + } else if (STI.hasSSE2()) { + return X86::ADDSDrr; + } + } else if (Ty == LLT::vector(4, 32)) { + if ((STI.hasAVX512()) && (STI.hasVLX())) { + return X86::VADDPSZ128rr; + } else if (STI.hasAVX()) { + return X86::VADDPSrr; + } else if (STI.hasSSE1()) { + return X86::ADDPSrr; + } + } + + return TargetOpcode::G_FADD; +} + +unsigned X86InstructionSelector::getFSubOp(LLT &Ty, + const RegisterBank &RB) const { + + if (X86::VECRRegBankID != RB.getID()) + return TargetOpcode::G_FSUB; + + if (Ty == LLT::scalar(32)) { + if (STI.hasAVX512()) { + return X86::VSUBSSZrr; + } else if (STI.hasAVX()) { + return X86::VSUBSSrr; + } else if (STI.hasSSE1()) { + return X86::SUBSSrr; + } + } else if (Ty == LLT::scalar(64)) { + if (STI.hasAVX512()) { + return X86::VSUBSDZrr; + } else if (STI.hasAVX()) { + return X86::VSUBSDrr; + } else if (STI.hasSSE2()) { + return X86::SUBSDrr; + } + } else if (Ty == LLT::vector(4, 32)) { + if ((STI.hasAVX512()) && (STI.hasVLX())) { + return X86::VSUBPSZ128rr; + } else if (STI.hasAVX()) { + return X86::VSUBPSrr; + } else if (STI.hasSSE1()) { + return X86::SUBPSrr; + } + } + + return TargetOpcode::G_FSUB; +} + +unsigned X86InstructionSelector::getAddOp(LLT &Ty, + const RegisterBank &RB) const { + + if (X86::VECRRegBankID != RB.getID()) + return TargetOpcode::G_ADD; + + if (Ty == LLT::vector(4, 32)) { + if (STI.hasAVX512() && STI.hasVLX()) { + return X86::VPADDDZ128rr; + } else if (STI.hasAVX()) { + return X86::VPADDDrr; + } else if (STI.hasSSE2()) { + return X86::PADDDrr; + } + } + + return TargetOpcode::G_ADD; +} + +unsigned X86InstructionSelector::getSubOp(LLT &Ty, + const RegisterBank &RB) const { + + if (X86::VECRRegBankID != RB.getID()) + return TargetOpcode::G_SUB; + + if (Ty == LLT::vector(4, 32)) { + if (STI.hasAVX512() && STI.hasVLX()) { + return X86::VPSUBDZ128rr; + } else if (STI.hasAVX()) { + return X86::VPSUBDrr; + } else if (STI.hasSSE2()) { + return X86::PSUBDrr; + } + } + + return TargetOpcode::G_SUB; +} + +bool X86InstructionSelector::selectBinaryOp(MachineInstr &I, + MachineRegisterInfo &MRI) const { + + LLT Ty = MRI.getType(I.getOperand(0).getReg()); + const unsigned DefReg = I.getOperand(0).getReg(); + const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); + + unsigned NewOpc = I.getOpcode(); + + switch (I.getOpcode()) { + case TargetOpcode::G_FADD: + NewOpc = getFAddOp(Ty, RB); + break; + case TargetOpcode::G_FSUB: + NewOpc = getFSubOp(Ty, RB); + break; + case TargetOpcode::G_ADD: + NewOpc = getAddOp(Ty, RB); + break; + case TargetOpcode::G_SUB: + NewOpc = getSubOp(Ty, RB); + break; + default: + break; + } + + if (NewOpc == I.getOpcode()) + return false; + + I.setDesc(TII.get(NewOpc)); + + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + Index: lib/Target/X86/X86LegalizerInfo.h =================================================================== --- lib/Target/X86/X86LegalizerInfo.h +++ lib/Target/X86/X86LegalizerInfo.h @@ -34,6 +34,8 @@ private: void setLegalizerInfo32bit(); void setLegalizerInfo64bit(); + void setLegalizerInfoSSE1(); + void setLegalizerInfoSSE2(); }; } // End llvm namespace. #endif Index: lib/Target/X86/X86LegalizerInfo.cpp =================================================================== --- lib/Target/X86/X86LegalizerInfo.cpp +++ lib/Target/X86/X86LegalizerInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/Target/TargetOpcodes.h" using namespace llvm; +using namespace TargetOpcode; #ifndef LLVM_BUILD_GLOBAL_ISEL #error "You shouldn't build this" @@ -28,6 +29,8 @@ setLegalizerInfo32bit(); setLegalizerInfo64bit(); + setLegalizerInfoSSE1(); + setLegalizerInfoSSE2(); computeTables(); } @@ -39,8 +42,8 @@ const LLT s32 = LLT::scalar(32); for (auto Ty : {s8, s16, s32}) { - setAction({TargetOpcode::G_ADD, Ty}, Legal); - setAction({TargetOpcode::G_SUB, Ty}, Legal); + setAction({G_ADD, Ty}, Legal); + setAction({G_SUB, Ty}, Legal); } } @@ -51,6 +54,36 @@ const LLT s64 = LLT::scalar(64); - setAction({TargetOpcode::G_ADD, s64}, Legal); - setAction({TargetOpcode::G_SUB, s64}, Legal); + setAction({G_ADD, s64}, Legal); + setAction({G_SUB, s64}, Legal); +} + +void X86LegalizerInfo::setLegalizerInfoSSE1() { + if (!Subtarget.hasSSE1()) + return; + + const LLT s32 = LLT::scalar(32); + const LLT v4s32 = LLT::vector(4, 32); + + for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV}) + for (auto Ty : {s32, v4s32}) + setAction({BinOp, Ty}, Legal); +} + +void X86LegalizerInfo::setLegalizerInfoSSE2() { + if (!Subtarget.hasSSE2()) + return; + + const LLT s64 = LLT::scalar(64); + const LLT v4s32 = LLT::vector(4, 32); + const LLT v2s64 = LLT::vector(2, 64); + + for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV}) + for (auto Ty : {s64, v2s64}) + setAction({BinOp, Ty}, Legal); + + for (unsigned BinOp : {G_ADD, G_SUB}) + for (auto Ty : {v4s32}) + setAction({BinOp, Ty}, Legal); + } Index: lib/Target/X86/X86RegisterBankInfo.cpp =================================================================== --- lib/Target/X86/X86RegisterBankInfo.cpp +++ lib/Target/X86/X86RegisterBankInfo.cpp @@ -54,6 +54,13 @@ X86::GR64RegClass.hasSubClassEq(&RC)) return getRegBank(X86::GPRRegBankID); + if (X86::FR32XRegClass.hasSubClassEq(&RC) || + X86::FR64XRegClass.hasSubClassEq(&RC) || + X86::VR128XRegClass.hasSubClassEq(&RC) || + X86::VR256XRegClass.hasSubClassEq(&RC) || + X86::VR512RegClass.hasSubClassEq(&RC)) + return getRegBank(X86::VECRRegBankID); + llvm_unreachable("Unsupported register kind yet."); } @@ -71,26 +78,51 @@ llvm_unreachable("Unsupported operand maping yet."); ValueMappingIdx ValMapIdx = VMI_None; - if (!isFP) { + + if (Ty.isScalar()) { + if (!isFP) { + switch (Ty.getSizeInBits()) { + case 8: + ValMapIdx = VMI_3OpsGpr8Idx; + break; + case 16: + ValMapIdx = VMI_3OpsGpr16Idx; + break; + case 32: + ValMapIdx = VMI_3OpsGpr32Idx; + break; + case 64: + ValMapIdx = VMI_3OpsGpr64Idx; + break; + default: + llvm_unreachable("Unsupported register size."); + } + } else { + switch (Ty.getSizeInBits()) { + case 32: + ValMapIdx = VMI_3OpsFp32Idx; + break; + case 64: + ValMapIdx = VMI_3OpsFp64Idx; + break; + default: + llvm_unreachable("Unsupported register size."); + } + } + } else { switch (Ty.getSizeInBits()) { - case 8: - ValMapIdx = VMI_3OpsGpr8Idx; + case 128: + ValMapIdx = VMI_3OpsVec128Idx; break; - case 16: - ValMapIdx = VMI_3OpsGpr16Idx; + case 256: + ValMapIdx = VMI_3OpsVec256Idx; break; - case 32: - ValMapIdx = VMI_3OpsGpr32Idx; - break; - case 64: - ValMapIdx = VMI_3OpsGpr64Idx; + case 512: + ValMapIdx = VMI_3OpsVec512Idx; break; default: llvm_unreachable("Unsupported register size."); - break; } - } else { - llvm_unreachable("Floating point not supported yet."); } return InstructionMapping{DefaultMappingID, 1, &ValMappings[ValMapIdx], @@ -114,6 +146,12 @@ case TargetOpcode::G_SUB: return getOperandsMapping(MI, false); break; + case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: + return getOperandsMapping(MI, true); + break; default: return InstructionMapping{}; } Index: lib/Target/X86/X86RegisterBanks.td =================================================================== --- lib/Target/X86/X86RegisterBanks.td +++ lib/Target/X86/X86RegisterBanks.td @@ -12,3 +12,6 @@ /// General Purpose Registers: RAX, RCX,... def GPRRegBank : RegisterBank<"GPR", [GR64]>; + +/// Floating Point/Vector Registers +def VECRRegBank : RegisterBank<"VECR", [VR512]>; Index: test/CodeGen/X86/GlobalISel/X86-regbankselect.mir =================================================================== --- test/CodeGen/X86/GlobalISel/X86-regbankselect.mir +++ test/CodeGen/X86/GlobalISel/X86-regbankselect.mir @@ -5,27 +5,47 @@ source_filename = "tmp.ll" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64--linux-gnu" - + define i8 @test_add_i8(i8 %arg1, i8 %arg2) { %ret = add i8 %arg1, %arg2 ret i8 %ret } - + define i16 @test_add_i16(i16 %arg1, i16 %arg2) { %ret = add i16 %arg1, %arg2 ret i16 %ret } - + define i32 @test_add_i32(i32 %arg1, i32 %arg2) { %ret = add i32 %arg1, %arg2 ret i32 %ret } - + define i64 @test_add_i64(i64 %arg1, i64 %arg2) { %ret = add i64 %arg1, %arg2 ret i64 %ret } + define float @test_add_float(float %arg1, float %arg2) { + %ret = fadd float %arg1, %arg2 + ret float %ret + } + + define double @test_add_double(double %arg1, double %arg2) { + %ret = fadd double %arg1, %arg2 + ret double %ret + } + + define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) { + %ret = add <4 x i32> %arg1, %arg2 + ret <4 x i32> %ret + } + + define <4 x float> @test_add_v4f32(<4 x float> %arg1, <4 x float> %arg2) { + %ret = fadd <4 x float> %arg1, %arg2 + ret <4 x float> %ret + } + ... --- name: test_add_i8 @@ -39,14 +59,14 @@ # CHECK: - { id: 0, class: gpr } # CHECK: - { id: 1, class: gpr } # CHECK: - { id: 2, class: gpr } -registers: +registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } body: | bb.1 (%ir-block.0): liveins: %edi, %esi - + %0(s8) = COPY %edi %1(s8) = COPY %esi %2(s8) = G_ADD %0, %1 @@ -66,14 +86,14 @@ # CHECK: - { id: 0, class: gpr } # CHECK: - { id: 1, class: gpr } # CHECK: - { id: 2, class: gpr } -registers: +registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } body: | bb.1 (%ir-block.0): liveins: %edi, %esi - + %0(s16) = COPY %edi %1(s16) = COPY %esi %2(s16) = G_ADD %0, %1 @@ -93,14 +113,14 @@ # CHECK: - { id: 0, class: gpr } # CHECK: - { id: 1, class: gpr } # CHECK: - { id: 2, class: gpr } -registers: +registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } body: | bb.1 (%ir-block.0): liveins: %edi, %esi - + %0(s32) = COPY %edi %1(s32) = COPY %esi %2(s32) = G_ADD %0, %1 @@ -120,14 +140,14 @@ # CHECK: - { id: 0, class: gpr } # CHECK: - { id: 1, class: gpr } # CHECK: - { id: 2, class: gpr } -registers: +registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } body: | bb.1 (%ir-block.0): liveins: %rdi, %rsi - + %0(s64) = COPY %rdi %1(s64) = COPY %rsi %2(s64) = G_ADD %0, %1 @@ -135,3 +155,112 @@ RET 0, implicit %rax ... +--- +name: test_add_float +alignment: 4 +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +# CHECK-LABEL: name: test_add_float +# CHECK: registers: +# CHECK: - { id: 0, class: vecr } +# CHECK: - { id: 1, class: vecr } +# CHECK: - { id: 2, class: vecr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(s32) = COPY %xmm0 + %1(s32) = COPY %xmm1 + %2(s32) = G_FADD %0, %1 + %xmm0 = COPY %2(s32) + RET 0, implicit %xmm0 + +... +--- +name: test_add_double +alignment: 4 +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +# CHECK-LABEL: name: test_add_double +# CHECK: registers: +# CHECK: - { id: 0, class: vecr } +# CHECK: - { id: 1, class: vecr } +# CHECK: - { id: 2, class: vecr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(s64) = COPY %xmm0 + %1(s64) = COPY %xmm1 + %2(s64) = G_FADD %0, %1 + %xmm0 = COPY %2(s64) + RET 0, implicit %xmm0 + +... +--- +name: test_add_v4i32 +alignment: 4 +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +# CHECK-LABEL: name: test_add_v4i32 +# CHECK: registers: +# CHECK: - { id: 0, class: vecr } +# CHECK: - { id: 1, class: vecr } +# CHECK: - { id: 2, class: vecr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<4 x s32>) = COPY %xmm0 + %1(<4 x s32>) = COPY %xmm1 + %2(<4 x s32>) = G_ADD %0, %1 + %xmm0 = COPY %2(<4 x s32>) + RET 0, implicit %xmm0 + +... +--- +name: test_add_v4f32 +alignment: 4 +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +# CHECK-LABEL: name: test_add_v4f32 +# CHECK: registers: +# CHECK: - { id: 0, class: vecr } +# CHECK: - { id: 1, class: vecr } +# CHECK: - { id: 2, class: vecr } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<4 x s32>) = COPY %xmm0 + %1(<4 x s32>) = COPY %xmm1 + %2(<4 x s32>) = G_FADD %0, %1 + %xmm0 = COPY %2(<4 x s32>) + RET 0, implicit %xmm0 + +... + Index: test/CodeGen/X86/GlobalISel/binop-isel.ll =================================================================== --- test/CodeGen/X86/GlobalISel/binop-isel.ll +++ test/CodeGen/X86/GlobalISel/binop-isel.ll @@ -1,42 +1,157 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-linux-gnu -global-isel < %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-linux-gnu -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512F +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512VL define i64 @test_add_i64(i64 %arg1, i64 %arg2) { -; CHECK-LABEL: test_add_i64: -; CHECK: # BB#0: -; CHECK-NEXT: leaq (%rsi,%rdi), %rax -; CHECK-NEXT: retq +; ALL-LABEL: test_add_i64: +; ALL: # BB#0: +; ALL-NEXT: leaq (%rsi,%rdi), %rax +; ALL-NEXT: retq %ret = add i64 %arg1, %arg2 ret i64 %ret } define i32 @test_add_i32(i32 %arg1, i32 %arg2) { -; CHECK-LABEL: test_add_i32: -; CHECK: # BB#0: -; CHECK-NEXT: # kill: %EDI %EDI %RDI -; CHECK-NEXT: # kill: %ESI %ESI %RSI -; CHECK-NEXT: leal (%rsi,%rdi), %eax -; CHECK-NEXT: retq +; ALL-LABEL: test_add_i32: +; ALL: # BB#0: +; ALL-NEXT: # kill: %EDI %EDI %RDI +; ALL-NEXT: # kill: %ESI %ESI %RSI +; ALL-NEXT: leal (%rsi,%rdi), %eax +; ALL-NEXT: retq %ret = add i32 %arg1, %arg2 ret i32 %ret } define i64 @test_sub_i64(i64 %arg1, i64 %arg2) { -; CHECK-LABEL: test_sub_i64: -; CHECK: # BB#0: -; CHECK-NEXT: subq %rsi, %rdi -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: retq +; ALL-LABEL: test_sub_i64: +; ALL: # BB#0: +; ALL-NEXT: subq %rsi, %rdi +; ALL-NEXT: movq %rdi, %rax +; ALL-NEXT: retq %ret = sub i64 %arg1, %arg2 ret i64 %ret } define i32 @test_sub_i32(i32 %arg1, i32 %arg2) { -; CHECK-LABEL: test_sub_i32: -; CHECK: # BB#0: -; CHECK-NEXT: subl %esi, %edi -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: retq +; ALL-LABEL: test_sub_i32: +; ALL: # BB#0: +; ALL-NEXT: subl %esi, %edi +; ALL-NEXT: movl %edi, %eax +; ALL-NEXT: retq %ret = sub i32 %arg1, %arg2 ret i32 %ret } + +define float @test_add_float(float %arg1, float %arg2) { +; SSE-LABEL: test_add_float: +; SSE: # BB#0: +; SSE-NEXT: addss %xmm1, %xmm0 +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_add_float: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; ALL_AVX-NEXT: retq + %ret = fadd float %arg1, %arg2 + ret float %ret +} + +define double @test_add_double(double %arg1, double %arg2) { +; SSE-LABEL: test_add_double: +; SSE: # BB#0: +; SSE-NEXT: addsd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_add_double: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; ALL_AVX-NEXT: retq + %ret = fadd double %arg1, %arg2 + ret double %ret +} + +define float @test_sub_float(float %arg1, float %arg2) { +; SSE-LABEL: test_sub_float: +; SSE: # BB#0: +; SSE-NEXT: subss %xmm1, %xmm0 +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_sub_float: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; ALL_AVX-NEXT: retq + %ret = fsub float %arg1, %arg2 + ret float %ret +} + +define double @test_sub_double(double %arg1, double %arg2) { +; SSE-LABEL: test_sub_double: +; SSE: # BB#0: +; SSE-NEXT: subsd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_sub_double: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; ALL_AVX-NEXT: retq + %ret = fsub double %arg1, %arg2 + ret double %ret +} + +define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) { +; SSE-LABEL: test_add_v4i32: +; SSE: # BB#0: +; SSE-NEXT: paddd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_add_v4i32: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; ALL_AVX-NEXT: retq + %ret = add <4 x i32> %arg1, %arg2 + ret <4 x i32> %ret +} + +define <4 x i32> @test_sub_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) { +; SSE-LABEL: test_sub_v4i32: +; SSE: # BB#0: +; SSE-NEXT: psubd %xmm1, %xmm0 +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_sub_v4i32: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 +; ALL_AVX-NEXT: retq + %ret = sub <4 x i32> %arg1, %arg2 + ret <4 x i32> %ret +} + +define <4 x float> @test_add_v4f32(<4 x float> %arg1, <4 x float> %arg2) { +; SSE-LABEL: test_add_v4f32: +; SSE: # BB#0: +; SSE-NEXT: addps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_add_v4f32: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; ALL_AVX-NEXT: retq + %ret = fadd <4 x float> %arg1, %arg2 + ret <4 x float> %ret +} + +define <4 x float> @test_sub_v4f32(<4 x float> %arg1, <4 x float> %arg2) { +; SSE-LABEL: test_sub_v4f32: +; SSE: # BB#0: +; SSE-NEXT: subps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; ALL_AVX-LABEL: test_sub_v4f32: +; ALL_AVX: # BB#0: +; ALL_AVX-NEXT: vsubps %xmm1, %xmm0, %xmm0 +; ALL_AVX-NEXT: retq + %ret = fsub <4 x float> %arg1, %arg2 + ret <4 x float> %ret +} Index: test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll =================================================================== --- test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll +++ test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll @@ -5,15 +5,15 @@ @a7_8bit = external global i8 @a8_8bit = external global i8 -define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, +define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7, i8 %arg8) { ; ALL-LABEL: name: test_i8_args_8 -; X64: fixedStack: +; X64: fixedStack: ; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 1, alignment: 8, isImmutable: true, isAliased: false ; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 1, alignment: 16, isImmutable: true, isAliased: false -; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d +; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d ; X64: [[ARG1:%[0-9]+]](s8) = COPY %edi ; X64-NEXT: %{{[0-9]+}}(s8) = COPY %esi ; X64-NEXT: %{{[0-9]+}}(s8) = COPY %edx @@ -25,7 +25,7 @@ ; X64-NEXT: [[ARG8_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] ; X64-NEXT: [[ARG8:%[0-9]+]](s8) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0) -; X32: fixedStack: +; X32: fixedStack: ; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 1, alignment: 4, isImmutable: true, isAliased: false } ; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 1, alignment: 8, isImmutable: true, isAliased: false } ; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 1, alignment: 4, isImmutable: true, isAliased: false } @@ -40,7 +40,7 @@ ; X32-NEXT: [[ARG2:%[0-9]+]](s8) = G_LOAD [[ARG2_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK4]], align 0) ; X32-NEXT: [[ARG3_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] ; X32-NEXT: [[ARG3:%[0-9]+]](s8) = G_LOAD [[ARG3_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0) -; X32-NEXT: [[ARG4_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK12]] +; X32-NEXT: [[ARG4_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK12]] ; X32-NEXT: [[ARG4:%[0-9]+]](s8) = G_LOAD [[ARG4_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK12]], align 0) ; X32-NEXT: [[ARG5_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK16]] ; X32-NEXT: [[ARG5:%[0-9]+]](s8) = G_LOAD [[ARG5_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK16]], align 0) @@ -53,7 +53,7 @@ ; ALL-NEXT: [[GADDR_A1:%[0-9]+]](p0) = G_GLOBAL_VALUE @a1_8bit ; ALL-NEXT: [[GADDR_A7:%[0-9]+]](p0) = G_GLOBAL_VALUE @a7_8bit -; ALL-NEXT: [[GADDR_A8:%[0-9]+]](p0) = G_GLOBAL_VALUE @a8_8bit +; ALL-NEXT: [[GADDR_A8:%[0-9]+]](p0) = G_GLOBAL_VALUE @a8_8bit ; ALL-NEXT: G_STORE [[ARG1]](s8), [[GADDR_A1]](p0) :: (store 1 into @a1_8bit) ; ALL-NEXT: G_STORE [[ARG7]](s8), [[GADDR_A7]](p0) :: (store 1 into @a7_8bit) ; ALL-NEXT: G_STORE [[ARG8]](s8), [[GADDR_A8]](p0) :: (store 1 into @a8_8bit) @@ -71,15 +71,15 @@ @a7_32bit = external global i32 @a8_32bit = external global i32 -define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, +define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8) { ; ALL-LABEL: name: test_i32_args_8 -; X64: fixedStack: +; X64: fixedStack: ; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false ; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false -; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d +; X64: liveins: %ecx, %edi, %edx, %esi, %r8d, %r9d ; X64: [[ARG1:%[0-9]+]](s32) = COPY %edi ; X64-NEXT: %{{[0-9]+}}(s32) = COPY %esi ; X64-NEXT: %{{[0-9]+}}(s32) = COPY %edx @@ -91,7 +91,7 @@ ; X64-NEXT: [[ARG8_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] ; X64-NEXT: [[ARG8:%[0-9]+]](s32) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK8]], align 0) -; X32: fixedStack: +; X32: fixedStack: ; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false } ; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false } ; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false } @@ -102,24 +102,24 @@ ; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false } ; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] ; X32-NEXT: [[ARG1:%[0-9]+]](s32) = G_LOAD [[ARG1_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK0]], align 0) -; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] +; X32-NEXT: [[ARG2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK4]] ; X32-NEXT: [[ARG2:%[0-9]+]](s32) = G_LOAD [[ARG2_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK4]], align 0) -; X32-NEXT: [[ARG3_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] +; X32-NEXT: [[ARG3_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] ; X32-NEXT: [[ARG3:%[0-9]+]](s32) = G_LOAD [[ARG3_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK8]], align 0) -; X32-NEXT: [[ARG4_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK12]] +; X32-NEXT: [[ARG4_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK12]] ; X32-NEXT: [[ARG4:%[0-9]+]](s32) = G_LOAD [[ARG4_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK12]], align 0) -; X32-NEXT: [[ARG5_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK16]] +; X32-NEXT: [[ARG5_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK16]] ; X32-NEXT: [[ARG5:%[0-9]+]](s32) = G_LOAD [[ARG5_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK16]], align 0) -; X32-NEXT: [[ARG6_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK20]] +; X32-NEXT: [[ARG6_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK20]] ; X32-NEXT: [[ARG6:%[0-9]+]](s32) = G_LOAD [[ARG6_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK20]], align 0) -; X32-NEXT: [[ARG7_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK24]] +; X32-NEXT: [[ARG7_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK24]] ; X32-NEXT: [[ARG7:%[0-9]+]](s32) = G_LOAD [[ARG7_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK24]], align 0) -; X32-NEXT: [[ARG8_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK28]] +; X32-NEXT: [[ARG8_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK28]] ; X32-NEXT: [[ARG8:%[0-9]+]](s32) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 4 from %fixed-stack.[[STACK28]], align 0) ; ALL-NEXT: [[GADDR_A1:%[0-9]+]](p0) = G_GLOBAL_VALUE @a1_32bit ; ALL-NEXT: [[GADDR_A7:%[0-9]+]](p0) = G_GLOBAL_VALUE @a7_32bit -; ALL-NEXT: [[GADDR_A8:%[0-9]+]](p0) = G_GLOBAL_VALUE @a8_32bit +; ALL-NEXT: [[GADDR_A8:%[0-9]+]](p0) = G_GLOBAL_VALUE @a8_32bit ; ALL-NEXT: G_STORE [[ARG1]](s32), [[GADDR_A1]](p0) :: (store 4 into @a1_32bit) ; ALL-NEXT: G_STORE [[ARG7]](s32), [[GADDR_A7]](p0) :: (store 4 into @a7_32bit) ; ALL-NEXT: G_STORE [[ARG8]](s32), [[GADDR_A8]](p0) :: (store 4 into @a8_32bit) @@ -129,7 +129,7 @@ entry: store i32 %arg1, i32* @a1_32bit store i32 %arg7, i32* @a7_32bit - store i32 %arg8, i32* @a8_32bit + store i32 %arg8, i32* @a8_32bit ret i32 %arg1 } @@ -137,11 +137,11 @@ @a7_64bit = external global i64 @a8_64bit = external global i64 -define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, +define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8) { ; ALL-LABEL: name: test_i64_args_8 -; X64: fixedStack: +; X64: fixedStack: ; X64: id: [[STACK8:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false ; X64: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false ; X64: liveins: %rcx, %rdi, %rdx, %rsi, %r8, %r9 @@ -156,22 +156,22 @@ ; X64-NEXT: [[ARG8_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] ; X64-NEXT: [[ARG8:%[0-9]+]](s64) = G_LOAD [[ARG8_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]], align 0) -; X32: fixedStack: -; X32: id: [[STACK60:[0-9]+]], offset: 60, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK56:[0-9]+]], offset: 56, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK52:[0-9]+]], offset: 52, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK48:[0-9]+]], offset: 48, size: 4, alignment: 16, isImmutable: true, isAliased: false } -; X32: id: [[STACK44:[0-9]+]], offset: 44, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK40:[0-9]+]], offset: 40, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK36:[0-9]+]], offset: 36, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK32:[0-9]+]], offset: 32, size: 4, alignment: 16, isImmutable: true, isAliased: false } -; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false } -; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false } -; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false } -; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: fixedStack: +; X32: id: [[STACK60:[0-9]+]], offset: 60, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK56:[0-9]+]], offset: 56, size: 4, alignment: 8, isImmutable: true, isAliased: false } +; X32: id: [[STACK52:[0-9]+]], offset: 52, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK48:[0-9]+]], offset: 48, size: 4, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK44:[0-9]+]], offset: 44, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK40:[0-9]+]], offset: 40, size: 4, alignment: 8, isImmutable: true, isAliased: false } +; X32: id: [[STACK36:[0-9]+]], offset: 36, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK32:[0-9]+]], offset: 32, size: 4, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK28:[0-9]+]], offset: 28, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK24:[0-9]+]], offset: 24, size: 4, alignment: 8, isImmutable: true, isAliased: false } +; X32: id: [[STACK20:[0-9]+]], offset: 20, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK16:[0-9]+]], offset: 16, size: 4, alignment: 16, isImmutable: true, isAliased: false } +; X32: id: [[STACK12:[0-9]+]], offset: 12, size: 4, alignment: 4, isImmutable: true, isAliased: false } +; X32: id: [[STACK8:[0-9]+]], offset: 8, size: 4, alignment: 8, isImmutable: true, isAliased: false } +; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } ; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false } ; X32: [[ARG1L_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] @@ -233,20 +233,20 @@ entry: store i64 %arg1, i64* @a1_64bit store i64 %arg7, i64* @a7_64bit - store i64 %arg8, i64* @a8_64bit + store i64 %arg8, i64* @a8_64bit ret i64 %arg1 } define float @test_float_args(float %arg1, float %arg2) { -; ALL-LABEL:name: test_float_args +; ALL-LABEL:name: test_float_args -; X64: liveins: %xmm0, %xmm1 +; X64: liveins: %xmm0, %xmm1 ; X64: [[ARG1:%[0-9]+]](s32) = COPY %xmm0 ; X64-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %xmm1 ; X64-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](s32) ; X64-NEXT: RET 0, implicit %xmm0 -; X32: fixedStack: +; X32: fixedStack: ; X32: id: [[STACK4:[0-9]+]], offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } ; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false } ; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] @@ -255,19 +255,19 @@ ; X32-NEXT: [[ARG2:%[0-9]+]](s32) = G_LOAD [[ARG2_ADDR:%[0-9]+]](p0) :: (invariant load 4 from %fixed-stack.[[STACK4]], align 0) ; X32-NEXT: %fp0 = COPY [[ARG2:%[0-9]+]](s32) ; X32-NEXT: RET 0, implicit %fp0 - + ret float %arg2 } define double @test_double_args(double %arg1, double %arg2) { -; ALL-LABEL:name: test_double_args -; X64: liveins: %xmm0, %xmm1 +; ALL-LABEL:name: test_double_args +; X64: liveins: %xmm0, %xmm1 ; X64: [[ARG1:%[0-9]+]](s64) = COPY %xmm0 ; X64-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %xmm1 ; X64-NEXT: %xmm0 = COPY [[ARG2:%[0-9]+]](s64) ; X64-NEXT: RET 0, implicit %xmm0 -; X32: fixedStack: +; X32: fixedStack: ; X32: id: [[STACK4:[0-9]+]], offset: 8, size: 8, alignment: 8, isImmutable: true, isAliased: false } ; X32: id: [[STACK0:[0-9]+]], offset: 0, size: 8, alignment: 16, isImmutable: true, isAliased: false } ; X32: [[ARG1_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] Index: test/CodeGen/X86/GlobalISel/legalize-add.mir =================================================================== --- test/CodeGen/X86/GlobalISel/legalize-add.mir +++ test/CodeGen/X86/GlobalISel/legalize-add.mir @@ -1,11 +1,11 @@ -# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s --- | ; ModuleID = '' source_filename = "" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64--linux-gnu" - + define i32 @test_add_i32(i32 %arg1, i32 %arg2) { %ret = add i32 %arg1, %arg2 ret i32 %ret @@ -19,7 +19,7 @@ regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } @@ -27,7 +27,7 @@ bb.1 (%ir-block.0): liveins: %edi, %esi ; CHECK-LABEL: name: test_add_i32 - ; CHECK: [[VAL1:%.*]](s32) = COPY %edi + ; CHECK: [[VAL1:%.*]](s32) = COPY %edi ; CHECK: [[VAL2:%.*]](s32) = COPY %esi ; CHECK: [[RES:%.*]](s32) = G_ADD [[VAL1:%.*]], [[VAL2:%.*]] Index: test/CodeGen/X86/GlobalISel/legalize-sub.mir =================================================================== --- test/CodeGen/X86/GlobalISel/legalize-sub.mir +++ test/CodeGen/X86/GlobalISel/legalize-sub.mir @@ -1,11 +1,11 @@ -# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=legalizer %s -o - | FileCheck %s --- | ; ModuleID = '' source_filename = "" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64--linux-gnu" - + define i32 @test_sub_i32(i32 %arg1, i32 %arg2) { %ret = sub i32 %arg1, %arg2 ret i32 %ret @@ -19,7 +19,7 @@ regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } @@ -27,7 +27,7 @@ bb.1 (%ir-block.0): liveins: %edi, %esi ; CHECK-LABEL: name: test_sub_i32 - ; CHECK: [[VAL1:%.*]](s32) = COPY %edi + ; CHECK: [[VAL1:%.*]](s32) = COPY %edi ; CHECK: [[VAL2:%.*]](s32) = COPY %esi ; CHECK: [[RES:%.*]](s32) = G_SUB [[VAL1:%.*]], [[VAL2:%.*]] Index: test/CodeGen/X86/GlobalISel/x86_64-instructionselect.mir =================================================================== --- test/CodeGen/X86/GlobalISel/x86_64-instructionselect.mir +++ test/CodeGen/X86/GlobalISel/x86_64-instructionselect.mir @@ -1,4 +1,7 @@ -# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=SSE +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=NO_AVX512F --check-prefix=AVX +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NO_AVX512VL --check-prefix=AVX512ALL --check-prefix=AVX512F +# RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=AVX512ALL --check-prefix=AVX512VL --- | define i64 @test_add_i64(i64 %arg1, i64 %arg2) { @@ -15,33 +18,72 @@ %ret = sub i64 %arg1, %arg2 ret i64 %ret } - + define i32 @test_sub_i32(i32 %arg1, i32 %arg2) { %ret = sub i32 %arg1, %arg2 ret i32 %ret } + define float @test_add_float(float %arg1, float %arg2) { + %ret = fadd float %arg1, %arg2 + ret float %ret + } + + define double @test_add_double(double %arg1, double %arg2) { + %ret = fadd double %arg1, %arg2 + ret double %ret + } + + define float @test_sub_float(float %arg1, float %arg2) { + %ret = fsub float %arg1, %arg2 + ret float %ret + } + + define double @test_sub_double(double %arg1, double %arg2) { + %ret = fsub double %arg1, %arg2 + ret double %ret + } + + define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) { + %ret = add <4 x i32> %arg1, %arg2 + ret <4 x i32> %ret + } + + define <4 x i32> @test_sub_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) { + %ret = sub <4 x i32> %arg1, %arg2 + ret <4 x i32> %ret + } + + define <4 x float> @test_add_v4f32(<4 x float> %arg1, <4 x float> %arg2) { + %ret = fadd <4 x float> %arg1, %arg2 + ret <4 x float> %ret + } + + define <4 x float> @test_sub_v4f32(<4 x float> %arg1, <4 x float> %arg2) { + %ret = fsub <4 x float> %arg1, %arg2 + ret <4 x float> %ret + } ... --- name: test_add_i64 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } -# CHECK-NEXT: - { id: 1, class: gr64 } -# CHECK-NEXT: - { id: 2, class: gr64 } -registers: +# ALL: registers: +# ALL-NEXT: - { id: 0, class: gr64 } +# ALL-NEXT: - { id: 1, class: gr64 } +# ALL-NEXT: - { id: 2, class: gr64 } +registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: %0 = COPY %rdi -# CHECK-NEXT: %1 = COPY %rsi -# CHECK-NEXT: %2 = ADD64rr %0, %1 +# ALL: %0 = COPY %rdi +# ALL-NEXT: %1 = COPY %rsi +# ALL-NEXT: %2 = ADD64rr %0, %1 body: | bb.1 (%ir-block.0): liveins: %edi, %esi - + %0(s64) = COPY %rdi %1(s64) = COPY %rsi %2(s64) = G_ADD %0, %1 @@ -52,21 +94,21 @@ name: test_add_i32 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr32 } -registers: +# ALL: registers: +# ALL-NEXT: - { id: 0, class: gr32 } +# ALL-NEXT: - { id: 1, class: gr32 } +# ALL-NEXT: - { id: 2, class: gr32 } +registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: %0 = COPY %edi -# CHECK-NEXT: %1 = COPY %esi -# CHECK-NEXT: %2 = ADD32rr %0, %1 +# ALL: %0 = COPY %edi +# ALL-NEXT: %1 = COPY %esi +# ALL-NEXT: %2 = ADD32rr %0, %1 body: | bb.1 (%ir-block.0): liveins: %edi, %esi - + %0(s32) = COPY %edi %1(s32) = COPY %esi %2(s32) = G_ADD %0, %1 @@ -77,21 +119,21 @@ name: test_sub_i64 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr64 } -# CHECK-NEXT: - { id: 1, class: gr64 } -# CHECK-NEXT: - { id: 2, class: gr64 } -registers: +# ALL: registers: +# ALL-NEXT: - { id: 0, class: gr64 } +# ALL-NEXT: - { id: 1, class: gr64 } +# ALL-NEXT: - { id: 2, class: gr64 } +registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: %0 = COPY %rdi -# CHECK-NEXT: %1 = COPY %rsi -# CHECK-NEXT: %2 = SUB64rr %0, %1 +# ALL: %0 = COPY %rdi +# ALL-NEXT: %1 = COPY %rsi +# ALL-NEXT: %2 = SUB64rr %0, %1 body: | bb.1 (%ir-block.0): liveins: %edi, %esi - + %0(s64) = COPY %rdi %1(s64) = COPY %rsi %2(s64) = G_SUB %0, %1 @@ -102,23 +144,299 @@ name: test_sub_i32 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gr32 } -# CHECK-NEXT: - { id: 1, class: gr32 } -# CHECK-NEXT: - { id: 2, class: gr32 } -registers: +# ALL: registers: +# ALL-NEXT: - { id: 0, class: gr32 } +# ALL-NEXT: - { id: 1, class: gr32 } +# ALL-NEXT: - { id: 2, class: gr32 } +registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: %0 = COPY %edi -# CHECK-NEXT: %1 = COPY %esi -# CHECK-NEXT: %2 = SUB32rr %0, %1 +# ALL: %0 = COPY %edi +# ALL-NEXT: %1 = COPY %esi +# ALL-NEXT: %2 = SUB32rr %0, %1 body: | bb.1 (%ir-block.0): liveins: %edi, %esi - + %0(s32) = COPY %edi %1(s32) = COPY %esi %2(s32) = G_SUB %0, %1 ... + +--- +name: test_add_float +alignment: 4 +legalized: true +regBankSelected: true +selected: false +tracksRegLiveness: true +# ALL: registers: +# NO_AVX512F-NEXT: - { id: 0, class: fr32 } +# NO_AVX512F-NEXT: - { id: 1, class: fr32 } +# NO_AVX512F-NEXT: - { id: 2, class: fr32 } +# AVX512ALL-NEXT: - { id: 0, class: fr32x } +# AVX512ALL-NEXT: - { id: 1, class: fr32x } +# AVX512ALL-NEXT: - { id: 2, class: fr32x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %0 = COPY %xmm0 +# ALL-NEXT: %1 = COPY %xmm1 +# SSE-NEXT: %2 = ADDSSrr %0, %1 +# AVX-NEXT: %2 = VADDSSrr %0, %1 +# AVX512F-NEXT: %2 = VADDSSZrr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(s32) = COPY %xmm0 + %1(s32) = COPY %xmm1 + %2(s32) = G_FADD %0, %1 + %xmm0 = COPY %2(s32) + RET 0, implicit %xmm0 + +... +--- +name: test_add_double +alignment: 4 +legalized: true +regBankSelected: true +selected: false +tracksRegLiveness: true +# ALL: registers: +# NO_AVX512F-NEXT: - { id: 0, class: fr64 } +# NO_AVX512F-NEXT: - { id: 1, class: fr64 } +# NO_AVX512F-NEXT: - { id: 2, class: fr64 } +# AVX512ALL-NEXT: - { id: 0, class: fr64x } +# AVX512ALL-NEXT: - { id: 1, class: fr64x } +# AVX512ALL-NEXT: - { id: 2, class: fr64x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %0 = COPY %xmm0 +# ALL-NEXT: %1 = COPY %xmm1 +# SSE-NEXT: %2 = ADDSDrr %0, %1 +# AVX-NEXT: %2 = VADDSDrr %0, %1 +# AVX512F-NEXT: %2 = VADDSDZrr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(s64) = COPY %xmm0 + %1(s64) = COPY %xmm1 + %2(s64) = G_FADD %0, %1 + %xmm0 = COPY %2(s64) + RET 0, implicit %xmm0 + +... +--- +name: test_sub_float +alignment: 4 +legalized: true +regBankSelected: true +selected: false +tracksRegLiveness: true +# ALL: registers: +# NO_AVX512F-NEXT: - { id: 0, class: fr32 } +# NO_AVX512F-NEXT: - { id: 1, class: fr32 } +# NO_AVX512F-NEXT: - { id: 2, class: fr32 } +# AVX512ALL-NEXT: - { id: 0, class: fr32x } +# AVX512ALL-NEXT: - { id: 1, class: fr32x } +# AVX512ALL-NEXT: - { id: 2, class: fr32x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %0 = COPY %xmm0 +# ALL-NEXT: %1 = COPY %xmm1 +# SSE-NEXT: %2 = SUBSSrr %0, %1 +# AVX-NEXT: %2 = VSUBSSrr %0, %1 +# AVX512F-NEXT: %2 = VSUBSSZrr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(s32) = COPY %xmm0 + %1(s32) = COPY %xmm1 + %2(s32) = G_FSUB %0, %1 + %xmm0 = COPY %2(s32) + RET 0, implicit %xmm0 + +... +--- +name: test_sub_double +alignment: 4 +legalized: true +regBankSelected: true +selected: false +tracksRegLiveness: true +# ALL: registers: +# NO_AVX512F-NEXT: - { id: 0, class: fr64 } +# NO_AVX512F-NEXT: - { id: 1, class: fr64 } +# NO_AVX512F-NEXT: - { id: 2, class: fr64 } +# AVX512ALL-NEXT: - { id: 0, class: fr64x } +# AVX512ALL-NEXT: - { id: 1, class: fr64x } +# AVX512ALL-NEXT: - { id: 2, class: fr64x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %0 = COPY %xmm0 +# ALL-NEXT: %1 = COPY %xmm1 +# SSE-NEXT: %2 = SUBSDrr %0, %1 +# AVX-NEXT: %2 = VSUBSDrr %0, %1 +# AVX512F-NEXT: %2 = VSUBSDZrr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(s64) = COPY %xmm0 + %1(s64) = COPY %xmm1 + %2(s64) = G_FSUB %0, %1 + %xmm0 = COPY %2(s64) + RET 0, implicit %xmm0 +... +--- +name: test_add_v4i32 +alignment: 4 +legalized: true +regBankSelected: true +selected: false +tracksRegLiveness: true +# ALL: registers: +# NO_AVX512VL-NEXT: - { id: 0, class: vr128 } +# NO_AVX512VL-NEXT: - { id: 1, class: vr128 } +# NO_AVX512VL-NEXT: - { id: 2, class: vr128 } +# AVX512VL-NEXT: - { id: 0, class: vr128x } +# AVX512VL-NEXT: - { id: 1, class: vr128x } +# AVX512VL-NEXT: - { id: 2, class: vr128x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %0 = COPY %xmm0 +# ALL-NEXT: %1 = COPY %xmm1 +# SSE-NEXT: %2 = PADDDrr %0, %1 +# AVX-NEXT: %2 = VPADDDrr %0, %1 +# AVX512F-NEXT: %2 = VPADDDrr %0, %1 +# AVX512VL-NEXT: %2 = VPADDDZ128rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<4 x s32>) = COPY %xmm0 + %1(<4 x s32>) = COPY %xmm1 + %2(<4 x s32>) = G_ADD %0, %1 + %xmm0 = COPY %2(<4 x s32>) + RET 0, implicit %xmm0 + +... +--- +name: test_sub_v4i32 +alignment: 4 +legalized: true +regBankSelected: true +selected: false +tracksRegLiveness: true +# ALL: registers: +# NO_AVX512VL-NEXT: - { id: 0, class: vr128 } +# NO_AVX512VL-NEXT: - { id: 1, class: vr128 } +# NO_AVX512VL-NEXT: - { id: 2, class: vr128 } +# AVX512VL-NEXT: - { id: 0, class: vr128x } +# AVX512VL-NEXT: - { id: 1, class: vr128x } +# AVX512VL-NEXT: - { id: 2, class: vr128x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %0 = COPY %xmm0 +# ALL-NEXT: %1 = COPY %xmm1 +# SSE-NEXT: %2 = PSUBDrr %0, %1 +# AVX-NEXT: %2 = VPSUBDrr %0, %1 +# AVX512F-NEXT: %2 = VPSUBDrr %0, %1 +# AVX512VL-NEXT: %2 = VPSUBDZ128rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<4 x s32>) = COPY %xmm0 + %1(<4 x s32>) = COPY %xmm1 + %2(<4 x s32>) = G_SUB %0, %1 + %xmm0 = COPY %2(<4 x s32>) + RET 0, implicit %xmm0 + +... +--- +name: test_add_v4f32 +alignment: 4 +legalized: true +regBankSelected: true +selected: false +tracksRegLiveness: true +# ALL: registers: +# NO_AVX512VL-NEXT: - { id: 0, class: vr128 } +# NO_AVX512VL-NEXT: - { id: 1, class: vr128 } +# NO_AVX512VL-NEXT: - { id: 2, class: vr128 } +# AVX512VL-NEXT: - { id: 0, class: vr128x } +# AVX512VL-NEXT: - { id: 1, class: vr128x } +# AVX512VL-NEXT: - { id: 2, class: vr128x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %0 = COPY %xmm0 +# ALL-NEXT: %1 = COPY %xmm1 +# SSE-NEXT: %2 = ADDPSrr %0, %1 +# AVX-NEXT: %2 = VADDPSrr %0, %1 +# AVX512F-NEXT: %2 = VADDPSrr %0, %1 +# AVX512VL-NEXT: %2 = VADDPSZ128rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<4 x s32>) = COPY %xmm0 + %1(<4 x s32>) = COPY %xmm1 + %2(<4 x s32>) = G_FADD %0, %1 + %xmm0 = COPY %2(<4 x s32>) + RET 0, implicit %xmm0 + +... +--- +name: test_sub_v4f32 +alignment: 4 +legalized: true +regBankSelected: true +selected: false +tracksRegLiveness: true +# ALL: registers: +# NO_AVX512VL-NEXT: - { id: 0, class: vr128 } +# NO_AVX512VL-NEXT: - { id: 1, class: vr128 } +# NO_AVX512VL-NEXT: - { id: 2, class: vr128 } +# AVX512VL-NEXT: - { id: 0, class: vr128x } +# AVX512VL-NEXT: - { id: 1, class: vr128x } +# AVX512VL-NEXT: - { id: 2, class: vr128x } +registers: + - { id: 0, class: vecr } + - { id: 1, class: vecr } + - { id: 2, class: vecr } +# ALL: %0 = COPY %xmm0 +# ALL-NEXT: %1 = COPY %xmm1 +# SSE-NEXT: %2 = SUBPSrr %0, %1 +# AVX-NEXT: %2 = VSUBPSrr %0, %1 +# AVX512F-NEXT: %2 = VSUBPSrr %0, %1 +# AVX512VL-NEXT: %2 = VSUBPSZ128rr %0, %1 +body: | + bb.1 (%ir-block.0): + liveins: %xmm0, %xmm1 + + %0(<4 x s32>) = COPY %xmm0 + %1(<4 x s32>) = COPY %xmm1 + %2(<4 x s32>) = G_FSUB %0, %1 + %xmm0 = COPY %2(<4 x s32>) + RET 0, implicit %xmm0 + +...