Index: llvm/include/llvm/CodeGen/GlobalISel/Utils.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -380,5 +380,10 @@ /// Returns true if the given block should be optimized for size. bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI); + +/// \returns the intrinsic ID for a G_INTRINSIC or G_INTRINSIC_W_SIDE_EFFECTS +/// instruction \p MI. +unsigned getIntrinsicID(const MachineInstr &MI); + } // End namespace llvm. #endif Index: llvm/lib/CodeGen/GlobalISel/Utils.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -989,3 +989,12 @@ return F.hasOptSize() || F.hasMinSize() || llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI); } + +unsigned llvm::getIntrinsicID(const MachineInstr &MI) { +#ifndef NDEBUG + unsigned Opc = MI.getOpcode(); + assert(Opc == TargetOpcode::G_INTRINSIC || + Opc == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); +#endif + return MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID(); +} Index: llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -25,6 +26,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/Support/ErrorHandling.h" #include #include @@ -466,11 +468,24 @@ getValueMapping(RBIdx, Size), NumOperands); } +/// \returns true if a given intrinsic \p ID only uses and defines FPRs. +static bool isFPIntrinsic(unsigned ID) { + // TODO: Add more intrinsics. + switch (ID) { + default: + return false; + case Intrinsic::aarch64_neon_uaddlv: + return true; + } +} + bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, unsigned Depth) const { unsigned Op = MI.getOpcode(); + if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(getIntrinsicID(MI))) + return true; // Do we have an explicit floating point instruction? if (isPreISelGenericFloatingPointOpcode(Op)) @@ -915,6 +930,20 @@ // Assign them FPR for now. OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR, PMI_FirstFPR}; break; + case TargetOpcode::G_INTRINSIC: { + // Check if we know that the intrinsic has any constraints on its register + // banks. If it does, then update the mapping accordingly. + unsigned ID = getIntrinsicID(MI); + unsigned Idx = 0; + if (!isFPIntrinsic(ID)) + break; + for (const auto &Op : MI.explicit_operands()) { + if (Op.isReg()) + OpRegBankIdx[Idx] = PMI_FirstFPR; + ++Idx; + } + break; + } } // Finally construct the computed mapping. Index: llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/regbank-intrinsic.mir @@ -0,0 +1,70 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s +# +# Verify register banks for intrinsics with known constraints. (E.g. all +# operands must be FPRs. +# + +... +--- +name: uaddlv_fpr +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: uaddlv_fpr + ; CHECK: liveins: $q0 + ; CHECK: %copy:fpr(<16 x s8>) = COPY $q0 + ; CHECK: %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<16 x s8>) + ; CHECK: $w0 = COPY %intrin(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %copy:_(<16 x s8>) = COPY $q0 + %intrin:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<16 x s8>) + $w0 = COPY %intrin(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: uaddlv_fpr_load +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: uaddlv_fpr_load + ; CHECK: liveins: $x0 + ; CHECK: %ptr:gpr(p0) = COPY $x0 + ; CHECK: %load:fpr(<2 x s32>) = G_LOAD %ptr(p0) :: (load 8) + ; CHECK: %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %load(<2 x s32>) + ; CHECK: $w0 = COPY %intrin(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %ptr:_(p0) = COPY $x0 + %load:_(<2 x s32>) = G_LOAD %ptr :: (load 8) + %intrin:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %load(<2 x s32>) + $w0 = COPY %intrin(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: uaddlv_fpr_store +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1 + ; CHECK-LABEL: name: uaddlv_fpr_store + ; CHECK: liveins: $x0, $x1 + ; CHECK: %copy:gpr(<2 x s32>) = COPY $x0 + ; CHECK: %ptr:gpr(p0) = COPY $x0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<2 x s32>) = COPY %copy(<2 x s32>) + ; CHECK: %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), [[COPY]](<2 x s32>) + ; CHECK: G_STORE %intrin(s32), %ptr(p0) :: (store 4) + %copy:_(<2 x s32>) = COPY $x0 + %ptr:_(p0) = COPY $x0 + %intrin:_(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<2 x s32>) + G_STORE %intrin, %ptr :: (store 4) Index: llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-uaddlv.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/select-intrinsic-uaddlv.mir @@ -0,0 +1,109 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: uaddlv_v8s8 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $d0 + ; CHECK-LABEL: name: uaddlv_v8s8 + ; CHECK: %copy:fpr64 = COPY $d0 + ; CHECK: [[UADDLVv8i8v:%[0-9]+]]:fpr16 = UADDLVv8i8v %copy + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv8i8v]], %subreg.hsub + ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub + ; CHECK: $w0 = COPY %intrin + ; CHECK: RET_ReallyLR implicit $w0 + %copy:fpr(<8 x s8>) = COPY $d0 + %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<8 x s8>) + $w0 = COPY %intrin(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: uaddlv_v16s8 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: uaddlv_v16s8 + ; CHECK: %copy:fpr128 = COPY $q0 + ; CHECK: [[UADDLVv16i8v:%[0-9]+]]:fpr16 = UADDLVv16i8v %copy + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv16i8v]], %subreg.hsub + ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub + ; CHECK: $w0 = COPY %intrin + ; CHECK: RET_ReallyLR implicit $w0 + %copy:fpr(<16 x s8>) = COPY $q0 + %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<16 x s8>) + $w0 = COPY %intrin(s32) + RET_ReallyLR implicit $w0 +... +--- +name: uaddlv_v4s16 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $d0 + ; CHECK-LABEL: name: uaddlv_v4s16 + ; CHECK: %copy:fpr64 = COPY $d0 + ; CHECK: [[UADDLVv4i16v:%[0-9]+]]:fpr32 = UADDLVv4i16v %copy + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv4i16v]], %subreg.ssub + ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub + ; CHECK: $w0 = COPY %intrin + ; CHECK: RET_ReallyLR implicit $w0 + %copy:fpr(<4 x s16>) = COPY $d0 + %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<4 x s16>) + $w0 = COPY %intrin(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: uaddlv_v8s16 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: uaddlv_v8s16 + ; CHECK: %copy:fpr128 = COPY $q0 + ; CHECK: [[UADDLVv8i16v:%[0-9]+]]:fpr32 = UADDLVv8i16v %copy + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv8i16v]], %subreg.ssub + ; CHECK: %intrin:fpr32 = COPY [[INSERT_SUBREG]].ssub + ; CHECK: $w0 = COPY %intrin + ; CHECK: RET_ReallyLR implicit $w0 + %copy:fpr(<8 x s16>) = COPY $q0 + %intrin:fpr(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<8 x s16>) + $w0 = COPY %intrin(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: uaddlv_v4s32 +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $q0 + + ; CHECK-LABEL: name: uaddlv_v4s32 + ; CHECK: %copy:fpr128 = COPY $q0 + ; CHECK: [[UADDLVv4i32v:%[0-9]+]]:fpr64 = UADDLVv4i32v %copy + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[UADDLVv4i32v]], %subreg.dsub + ; CHECK: %intrin:fpr64 = COPY [[INSERT_SUBREG]].dsub + ; CHECK: $x0 = COPY %intrin + ; CHECK: RET_ReallyLR implicit $x0 + %copy:fpr(<4 x s32>) = COPY $q0 + %intrin:fpr(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.uaddlv), %copy(<4 x s32>) + $x0 = COPY %intrin(s64) + RET_ReallyLR implicit $x0