Index: lib/Target/X86/X86RegisterBankInfo.h =================================================================== --- lib/Target/X86/X86RegisterBankInfo.h +++ lib/Target/X86/X86RegisterBankInfo.h @@ -49,12 +49,32 @@ static InstructionMapping getSameOperandsMapping(const MachineInstr &MI, bool isFP); + /// Track the bank of each instruction operand(register) + /// \return An instruction PartialMappingIdxs. + static void + getInstrPartialMappingIdxs(const MachineInstr &MI, + const MachineRegisterInfo &MRI, const bool isFP, + SmallVectorImpl &OpRegBankIdx); + + /// Construct the instruction ValueMapping from PartialMappingIdxs + /// \return true if mapping succeeded. + static bool + getInstrValueMapping(const MachineInstr &MI, + const SmallVectorImpl &OpRegBankIdx, + SmallVectorImpl &OpdsMapping); + public: X86RegisterBankInfo(const TargetRegisterInfo &TRI); const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC) const override; + InstructionMappings + getInstrAlternativeMappings(const MachineInstr &MI) const override; + + /// See RegisterBankInfo::applyMapping. + void applyMappingImpl(const OperandsMapper &OpdMapper) const override; + InstructionMapping getInstrMapping(const MachineInstr &MI) const override; }; Index: lib/Target/X86/X86RegisterBankInfo.cpp =================================================================== --- lib/Target/X86/X86RegisterBankInfo.cpp +++ lib/Target/X86/X86RegisterBankInfo.cpp @@ -105,6 +105,39 @@ return PMI_None; } +void X86RegisterBankInfo::getInstrPartialMappingIdxs( + const MachineInstr &MI, const MachineRegisterInfo &MRI, const bool isFP, + SmallVectorImpl &OpRegBankIdx) { + + unsigned NumOperands = MI.getNumOperands(); + for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { + auto &MO = MI.getOperand(Idx); + if (!MO.isReg()) + OpRegBankIdx[Idx] = PMI_None; + else + OpRegBankIdx[Idx] = getPartialMappingIdx(MRI.getType(MO.getReg()), isFP); + } +} + +bool X86RegisterBankInfo::getInstrValueMapping( + const MachineInstr &MI, + const SmallVectorImpl &OpRegBankIdx, + SmallVectorImpl &OpdsMapping) { + + unsigned NumOperands = MI.getNumOperands(); + for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { + if (!MI.getOperand(Idx).isReg()) + continue; + + auto Mapping = getValueMapping(OpRegBankIdx[Idx], 1); + if (!Mapping->isValid()) + return false; + + OpdsMapping[Idx] = Mapping; + } + return true; +} + RegisterBankInfo::InstructionMapping X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI, bool isFP) { const MachineFunction &MF = *MI.getParent()->getParent(); @@ -151,33 +184,60 @@ } unsigned NumOperands = MI.getNumOperands(); - unsigned Cost = 1; // set dafault cost - // Track the bank of each register. + // Track the bank of each register, use NotFP mapping (all scalars in GPRs) SmallVector OpRegBankIdx(NumOperands); - for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { - auto &MO = MI.getOperand(Idx); - if (!MO.isReg()) - continue; - - // As a top-level guess, use NotFP mapping (all scalars in GPRs) - OpRegBankIdx[Idx] = getPartialMappingIdx(MRI.getType(MO.getReg()), false); - } + getInstrPartialMappingIdxs(MI, MRI, /* isFP */ false, OpRegBankIdx); // Finally construct the computed mapping. - RegisterBankInfo::InstructionMapping Mapping = - InstructionMapping{DefaultMappingID, Cost, nullptr, NumOperands}; SmallVector OpdsMapping(NumOperands); - for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { - if (MI.getOperand(Idx).isReg()) { - auto Mapping = getValueMapping(OpRegBankIdx[Idx], 1); - if (!Mapping->isValid()) - return InstructionMapping(); + if (!getInstrValueMapping(MI, OpRegBankIdx, OpdsMapping)) + return InstructionMapping(); - OpdsMapping[Idx] = Mapping; - } - } + return InstructionMapping{DefaultMappingID, /* Cost */ 1, + getOperandsMapping(OpdsMapping), NumOperands}; +} + +void X86RegisterBankInfo::applyMappingImpl( + const OperandsMapper &OpdMapper) const { + return applyDefaultMapping(OpdMapper); +} + +RegisterBankInfo::InstructionMappings +X86RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const { + + const MachineFunction &MF = *MI.getParent()->getParent(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - return Mapping; + switch (MI.getOpcode()) { + case TargetOpcode::G_LOAD: + case TargetOpcode::G_STORE: { + // we going to try to map 32/64 bit to PMI_FP32/PMI_FP64 + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); + if (Size != 32 && Size != 64) + break; + + unsigned NumOperands = MI.getNumOperands(); + + // Track the bank of each register, use FP mapping (all scalars in VEC) + SmallVector OpRegBankIdx(NumOperands); + getInstrPartialMappingIdxs(MI, MRI, /* isFP */ true, OpRegBankIdx); + + // Finally construct the computed mapping. + SmallVector OpdsMapping(NumOperands); + if (!getInstrValueMapping(MI, OpRegBankIdx, OpdsMapping)) + break; + + RegisterBankInfo::InstructionMapping Mapping = InstructionMapping{ + /*ID*/ 1, /*Cost*/ 1, getOperandsMapping(OpdsMapping), NumOperands}; + InstructionMappings AltMappings; + AltMappings.emplace_back(std::move(Mapping)); + return AltMappings; + } + default: + break; + } + return RegisterBankInfo::getInstrAlternativeMappings(MI); } Index: test/CodeGen/X86/GlobalISel/X86-regbankselect.mir =================================================================== --- test/CodeGen/X86/GlobalISel/X86-regbankselect.mir +++ test/CodeGen/X86/GlobalISel/X86-regbankselect.mir @@ -1,4 +1,5 @@ -# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=regbankselect %s -o - | FileCheck %s +# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -run-pass=regbankselect %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=FAST +# RUN: llc -mtriple=x86_64-linux-gnu -global-isel -regbankselect-greedy -run-pass=regbankselect %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=GREEDY --- | ; ModuleID = 'tmp.ll' @@ -36,14 +37,14 @@ ret double %ret } - define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) { - %ret = add <4 x i32> %arg1, %arg2 - ret <4 x i32> %ret + define <4 x i32> @test_add_v4i32(<4 x i32> %arg1, <4 x i32> %arg2) { + %ret = add <4 x i32> %arg1, %arg2 + ret <4 x i32> %ret } - define <4 x float> @test_add_v4f32(<4 x float> %arg1, <4 x float> %arg2) { - %ret = fadd <4 x float> %arg1, %arg2 - ret <4 x float> %ret + define <4 x float> @test_add_v4f32(<4 x float> %arg1, <4 x float> %arg2) { + %ret = fadd <4 x float> %arg1, %arg2 + ret <4 x float> %ret } define i8 @test_load_i8(i8* %p1) { @@ -536,9 +537,13 @@ selected: false # CHECK-LABEL: name: test_store_float # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: gpr } -# CHECK: - { id: 2, class: gpr } + +# FAST-NEXT: - { id: 0, class: vecr } +# FAST-NEXT: - { id: 1, class: gpr } +# FAST-NEXT: - { id: 2, class: gpr } + +# GREEDY-NEXT: - { id: 0, class: vecr } +# GREEDY-NEXT: - { id: 1, class: gpr } registers: - { id: 0, class: _ } @@ -549,8 +554,13 @@ %0(s32) = COPY %xmm0 %1(p0) = COPY %rdi - ; CHECK: %2(s32) = COPY %0(s32) - ; CHECK: G_STORE %2(s32), %1(p0) :: (store 4 into %ir.p1) + ; CHECK: %1(p0) = COPY %rdi + + ; FAST-NEXT: %2(s32) = COPY %0(s32) + ; FAST-NEXT: G_STORE %2(s32), %1(p0) :: (store 4 into %ir.p1) + + ; GREEDY-NEXT: G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1) + G_STORE %0(s32), %1(p0) :: (store 4 into %ir.p1) %rax = COPY %1(p0) RET 0, implicit %rax @@ -564,9 +574,13 @@ selected: false # CHECK-LABEL: name: test_store_double # CHECK: registers: -# CHECK: - { id: 0, class: vecr } -# CHECK: - { id: 1, class: gpr } -# CHECK: - { id: 2, class: gpr } + +# FAST-NEXT: - { id: 0, class: vecr } +# FAST-NEXT: - { id: 1, class: gpr } +# FAST-NEXT: - { id: 2, class: gpr } + +# GREEDY-NEXT: - { id: 0, class: vecr } +# GREEDY-NEXT: - { id: 1, class: gpr } registers: - { id: 0, class: _ } @@ -577,8 +591,14 @@ %0(s64) = COPY %xmm0 %1(p0) = COPY %rdi - ; CHECK: %2(s64) = COPY %0(s64) - ; CHECK: G_STORE %2(s64), %1(p0) :: (store 8 into %ir.p1) + + ; CHECK: %1(p0) = COPY %rdi + + ; FAST-NEXT: %2(s64) = COPY %0(s64) + ; FAST-NEXT: G_STORE %2(s64), %1(p0) :: (store 8 into %ir.p1) + + ; GREEDY-NEXT: G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1) + G_STORE %0(s64), %1(p0) :: (store 8 into %ir.p1) %rax = COPY %1(p0) RET 0, implicit %rax Index: test/CodeGen/X86/GlobalISel/binop-isel.ll =================================================================== --- test/CodeGen/X86/GlobalISel/binop-isel.ll +++ test/CodeGen/X86/GlobalISel/binop-isel.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-linux-gnu -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE -; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX -; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512F -; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512VL +; RUN: llc -mtriple=x86_64-linux-gnu -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512F +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512VL define i64 @test_add_i64(i64 %arg1, i64 %arg2) { ; ALL-LABEL: test_add_i64: Index: test/CodeGen/X86/GlobalISel/memop-isel.ll =================================================================== --- test/CodeGen/X86/GlobalISel/memop-isel.ll +++ test/CodeGen/X86/GlobalISel/memop-isel.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-linux-gnu -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE -; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX -; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512F -; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=AVX512VL +; RUN: llc -mtriple=x86_64-linux-gnu -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE_FAST +; RUN: llc -mtriple=x86_64-linux-gnu -regbankselect-greedy -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE_GREEDY +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=ALL_AVX_FAST --check-prefix=AVX_FAST +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx -regbankselect-greedy -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=ALL_AVX_GREEDY --check-prefix=AVX_GREEDY +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=ALL_AVX_FAST --check-prefix=AVX512F_FAST +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -regbankselect-greedy -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=ALL_AVX_GREEDY --check-prefix=AVX512F_GREEDY +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=ALL_AVX_FAST --check-prefix=AVX512VL_FAST +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+avx512f -mattr=+avx512vl -regbankselect-greedy -global-isel < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=ALL_AVX --check-prefix=ALL_AVX_GREEDY --check-prefix=AVX512VL_GREEDY define i8 @test_load_i8(i8 * %p1) { @@ -122,37 +126,63 @@ } define float * @test_store_float(float %val, float * %p1) { -; SSE-LABEL: test_store_float: -; SSE: # BB#0: -; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: movl %eax, (%rdi) -; SSE-NEXT: movq %rdi, %rax -; SSE-NEXT: retq ; -; ALL_AVX-LABEL: test_store_float: -; ALL_AVX: # BB#0: -; ALL_AVX-NEXT: vmovd %xmm0, %eax -; ALL_AVX-NEXT: movl %eax, (%rdi) -; ALL_AVX-NEXT: movq %rdi, %rax -; ALL_AVX-NEXT: retq +; SSE_FAST-LABEL: test_store_float: +; SSE_FAST: # BB#0: +; SSE_FAST-NEXT: movd %xmm0, %eax +; SSE_FAST-NEXT: movl %eax, (%rdi) +; SSE_FAST-NEXT: movq %rdi, %rax +; SSE_FAST-NEXT: retq +; +; SSE_GREEDY-LABEL: test_store_float: +; SSE_GREEDY: # BB#0: +; SSE_GREEDY-NEXT: movss %xmm0, (%rdi) +; SSE_GREEDY-NEXT: movq %rdi, %rax +; SSE_GREEDY-NEXT: retq +; +; ALL_AVX_FAST-LABEL: test_store_float: +; ALL_AVX_FAST: # BB#0: +; ALL_AVX_FAST-NEXT: vmovd %xmm0, %eax +; ALL_AVX_FAST-NEXT: movl %eax, (%rdi) +; ALL_AVX_FAST-NEXT: movq %rdi, %rax +; ALL_AVX_FAST-NEXT: retq +; +; ALL_AVX_GREEDY-LABEL: test_store_float: +; ALL_AVX_GREEDY: # BB#0: +; ALL_AVX_GREEDY-NEXT: vmovss %xmm0, (%rdi) +; ALL_AVX_GREEDY-NEXT: movq %rdi, %rax +; ALL_AVX_GREEDY-NEXT: retq store float %val, float* %p1 ret float * %p1; } define double * @test_store_double(double %val, double * %p1) { -; SSE-LABEL: test_store_double: -; SSE: # BB#0: -; SSE-NEXT: movd %xmm0, %rax -; SSE-NEXT: movq %rax, (%rdi) -; SSE-NEXT: movq %rdi, %rax -; SSE-NEXT: retq ; -; ALL_AVX-LABEL: test_store_double: -; ALL_AVX: # BB#0: -; ALL_AVX-NEXT: vmovq %xmm0, %rax -; ALL_AVX-NEXT: movq %rax, (%rdi) -; ALL_AVX-NEXT: movq %rdi, %rax -; ALL_AVX-NEXT: retq +; SSE_FAST-LABEL: test_store_double: +; SSE_FAST: # BB#0: +; SSE_FAST-NEXT: movd %xmm0, %rax +; SSE_FAST-NEXT: movq %rax, (%rdi) +; SSE_FAST-NEXT: movq %rdi, %rax +; SSE_FAST-NEXT: retq +; +; SSE_GREEDY-LABEL: test_store_double: +; SSE_GREEDY: # BB#0: +; SSE_GREEDY-NEXT: movsd %xmm0, (%rdi) +; SSE_GREEDY-NEXT: movq %rdi, %rax +; SSE_GREEDY-NEXT: retq +; +; ALL_AVX_FAST-LABEL: test_store_double: +; ALL_AVX_FAST: # BB#0: +; ALL_AVX_FAST-NEXT: vmovq %xmm0, %rax +; ALL_AVX_FAST-NEXT: movq %rax, (%rdi) +; ALL_AVX_FAST-NEXT: movq %rdi, %rax +; ALL_AVX_FAST-NEXT: retq +; +; ALL_AVX_GREEDY-LABEL: test_store_double: +; ALL_AVX_GREEDY: # BB#0: +; ALL_AVX_GREEDY-NEXT: vmovsd %xmm0, (%rdi) +; ALL_AVX_GREEDY-NEXT: movq %rdi, %rax +; ALL_AVX_GREEDY-NEXT: retq store double %val, double* %p1 ret double * %p1; } Index: test/CodeGen/X86/GlobalISel/x86_64-instructionselect.mir =================================================================== --- test/CodeGen/X86/GlobalISel/x86_64-instructionselect.mir +++ test/CodeGen/X86/GlobalISel/x86_64-instructionselect.mir @@ -63,7 +63,7 @@ %ret = fsub <4 x float> %arg1, %arg2 ret <4 x float> %ret } - + define i8 @test_load_i8(i8* %p1) { %r = load i8, i8* %p1 ret i8 %r @@ -123,7 +123,7 @@ store double %val, double* %p1 ret double* %p1 } - + ... ---