diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h --- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -292,6 +292,13 @@ return BinaryOpc_match(Opcode, L, R); } +template +inline BinaryOp_match +m_GExtVecElt(const LHS &L, const RHS &R) { + return BinaryOp_match(L, + R); +} + template inline BinaryOp_match m_GAdd(const LHS &L, const RHS &R) { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -151,6 +151,10 @@ const RegisterBank &RB, MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitUSMOV(unsigned DstSize, Register Src0, Register DefReg, + bool IsSigned, int64_t Lane, MachineRegisterInfo &MRI, + MachineInstr &I); + /// Emit a sequence of instructions representing a constant \p CV for a /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.) /// @@ -2145,6 +2149,56 @@ I.eraseFromParent(); return true; } + + case TargetOpcode::G_SEXT: { + unsigned Opcode = I.getOpcode(); + const Register DefReg = I.getOperand(0).getReg(); + Register SrcReg = I.getOperand(1).getReg(); + const LLT DstTy = MRI.getType(DefReg); + const LLT SrcTy = MRI.getType(SrcReg); + unsigned DstSize = DstTy.getSizeInBits(); + unsigned SrcSize = SrcTy.getSizeInBits(); + + if (DstTy.isVector()) + return false; // Should be handled by imported patterns. + + assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == + AArch64::GPRRegBankID && + "Unexpected ext regbank"); + + MachineInstr *ExtI; + + if (DstSize == 64) { + // FIXME: Can we avoid manually doing this? + if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) { + LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) + << " operand\n"); + return false; + } + SrcReg = + MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {}) + .addImm(0) + .addUse(SrcReg) + .addImm(AArch64::sub_32) + .getReg(0); + Register Src0; + if (mi_match(I.getOperand(1).getReg(), MRI, + m_Copy(m_GExtVecElt(m_Reg(Src0), m_SpecificICst(1))))) { + + ExtI = emitUSMOV(DstSize, Src0, DefReg, true, 0, MRI, I); + } else + ExtI = MIB.buildInstr(AArch64::SBFMXri, {DefReg}, {SrcReg}) + .addImm(0) + .addImm(SrcSize - 1); + } else { + return false; + } + + constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); + I.eraseFromParent(); + return true; + } + case TargetOpcode::G_BR: return false; case TargetOpcode::G_SHL: @@ -3097,10 +3151,21 @@ SubregToRegSrc = OrDst; } - MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) - .addImm(0) - .addUse(SubregToRegSrc) - .addImm(AArch64::sub_32); + Register Src0; + if (mi_match(I.getOperand(1).getReg(), MRI, + m_Copy(m_GExtVecElt(m_Reg(Src0), m_SpecificICst(1))))) { + Register New = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + + ExtI = emitUSMOV(DstSize, Src0, New, IsSigned, 0, MRI, I); + MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) + .addImm(0) + .addUse(New) + .addImm(AArch64::sub_32); + } else + MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) + .addImm(0) + .addUse(SubregToRegSrc) + .addImm(AArch64::sub_32); if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI)) { @@ -3128,6 +3193,7 @@ << " operand\n"); return false; } + SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {&AArch64::GPR64RegClass}, {}) .addImm(0) @@ -3135,16 +3201,26 @@ .addImm(AArch64::sub_32) .getReg(0); } - - ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, - {DefReg}, {SrcReg}) - .addImm(0) - .addImm(SrcSize - 1); + Register Src0; + if (mi_match(I.getOperand(1).getReg(), MRI, + m_Copy(m_GExtVecElt(m_Reg(Src0), m_SpecificICst(1))))) { + ExtI = emitUSMOV(DstSize, Src0, DefReg, IsSigned, 0, MRI, I); + } else + ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, + {DefReg}, {SrcReg}) + .addImm(0) + .addImm(SrcSize - 1); } else if (DstSize <= 32) { - ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, - {DefReg}, {SrcReg}) - .addImm(0) - .addImm(SrcSize - 1); + Register Src0; + int64_t Lane; + if (mi_match(I.getOperand(1).getReg(), MRI, + m_Copy(m_GExtVecElt(m_Reg(Src0), m_ICst(Lane))))) + ExtI = emitUSMOV(DstSize, Src0, DefReg, IsSigned, Lane, MRI, I); + else + ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, + {DefReg}, {SrcReg}) + .addImm(0) + .addImm(SrcSize - 1); } else { return false; } @@ -4783,6 +4859,35 @@ return InsElt; } +MachineInstr *AArch64InstructionSelector::emitUSMOV( + unsigned DstSize, Register Src0, Register DefReg, bool IsSigned, + int64_t Lane, MachineRegisterInfo &MRI, MachineInstr &I) { + MachineInstr *RetInstr = nullptr; + + const LLT &VecTy = MRI.getType(Src0); + + if (VecTy.getSizeInBits() != 128) { + const MachineInstr *ScalarToVector = emitScalarToVector( + VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB); + if (!ScalarToVector) + return RetInstr; + Src0 = ScalarToVector->getOperand(0).getReg(); + } + + if (DstSize == 64) + RetInstr = + MIB.buildInstr(IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16, + {DefReg}, {Src0}) + .addImm(Lane); + else + RetInstr = + MIB.buildInstr(IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16, + {DefReg}, {Src0}) + .addImm(Lane); + + return RetInstr; +} + bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) { assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -625,8 +625,9 @@ .legalIf([=](const LegalityQuery &Query) { const LLT &VecTy = Query.Types[1]; return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 || - VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 || - VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0; + VecTy == v8s8 || VecTy == v4s32 || VecTy == v2s64 || + VecTy == v2s32 || VecTy == v16s8 || VecTy == v2s32 || + VecTy == v2p0; }) .minScalarOrEltIf( [=](const LegalityQuery &Query) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt-with-extend.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt-with-extend.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-extract-vector-elt-with-extend.mir @@ -0,0 +1,319 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: si64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: si64 + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[CPYi32_:%[0-9]+]]:fpr32 = CPYi32 [[COPY]], 1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[CPYi32_]] + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.sub_32 + ; CHECK: [[SMOVvi16to64_:%[0-9]+]]:gpr64 = SMOVvi16to64 [[COPY]], 0 + ; CHECK: $x0 = COPY [[SMOVvi16to64_]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:fpr(<4 x s32>) = COPY $q0 + %3:gpr(s64) = G_CONSTANT i64 1 + %2:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<4 x s32>), %3(s64) + %5:gpr(s32) = COPY %2(s32) + %4:gpr(s64) = G_SEXT %5(s32) + $x0 = COPY %4(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: si64_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0 + + ; CHECK-LABEL: name: si64_2 + ; CHECK: liveins: $d0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[CPYi32_:%[0-9]+]]:fpr32 = CPYi32 [[INSERT_SUBREG]], 1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[CPYi32_]] + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY1]], %subreg.sub_32 + ; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY]], %subreg.dsub + ; CHECK: [[SMOVvi16to64_:%[0-9]+]]:gpr64 = SMOVvi16to64 [[INSERT_SUBREG1]], 0 + ; CHECK: $x0 = COPY [[SMOVvi16to64_]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:fpr(<2 x s32>) = COPY $d0 + %3:gpr(s64) = G_CONSTANT i64 1 + %2:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %3(s64) + %5:gpr(s32) = COPY %2(s32) + %4:gpr(s64) = G_SEXT %5(s32) + $x0 = COPY %4(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: zi64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: zi64 + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[CPYi32_:%[0-9]+]]:fpr32 = CPYi32 [[COPY]], 1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[CPYi32_]] + ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[COPY1]], 0 + ; CHECK: [[UMOVvi16_:%[0-9]+]]:gpr32 = UMOVvi16 [[COPY]], 0 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[UMOVvi16_]], %subreg.sub_32 + ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:fpr(<4 x s32>) = COPY $q0 + %3:gpr(s64) = G_CONSTANT i64 1 + %2:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<4 x s32>), %3(s64) + %5:gpr(s32) = COPY %2(s32) + %4:gpr(s64) = G_ZEXT %5(s32) + $x0 = COPY %4(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: zi64_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0 + + ; CHECK-LABEL: name: zi64_2 + ; CHECK: liveins: $d0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[CPYi32_:%[0-9]+]]:fpr32 = CPYi32 [[INSERT_SUBREG]], 1 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[CPYi32_]] + ; CHECK: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[COPY1]], 0 + ; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY]], %subreg.dsub + ; CHECK: [[UMOVvi16_:%[0-9]+]]:gpr32 = UMOVvi16 [[INSERT_SUBREG1]], 0 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[UMOVvi16_]], %subreg.sub_32 + ; CHECK: $x0 = COPY [[SUBREG_TO_REG]] + ; CHECK: RET_ReallyLR implicit $x0 + %0:fpr(<2 x s32>) = COPY $d0 + %3:gpr(s64) = G_CONSTANT i64 1 + %2:fpr(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %3(s64) + %5:gpr(s32) = COPY %2(s32) + %4:gpr(s64) = G_ZEXT %5(s32) + $x0 = COPY %4(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: si32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: si32 + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SMOVvi16to32_:%[0-9]+]]:gpr32 = SMOVvi16to32 [[COPY]], 1 + ; CHECK: $w0 = COPY [[SMOVvi16to32_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<8 x s16>) = COPY $q0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<8 x s16>), %4(s64) + %6:gpr(s16) = COPY %3(s16) + %5:gpr(s32) = G_SEXT %6(s16) + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: zi32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: zi32 + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[UMOVvi16_:%[0-9]+]]:gpr32 = UMOVvi16 [[COPY]], 1 + ; CHECK: $w0 = COPY [[UMOVvi16_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<8 x s16>) = COPY $q0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<8 x s16>), %4(s64) + %6:gpr(s16) = COPY %3(s16) + %5:gpr(s32) = G_ZEXT %6(s16) + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: si32_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0 + + ; CHECK-LABEL: name: si32_2 + ; CHECK: liveins: $d0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[SMOVvi16to32_:%[0-9]+]]:gpr32 = SMOVvi16to32 [[INSERT_SUBREG]], 1 + ; CHECK: $w0 = COPY [[SMOVvi16to32_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<4 x s16>) = COPY $d0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<4 x s16>), %4(s64) + %6:gpr(s16) = COPY %3(s16) + %5:gpr(s32) = G_SEXT %6(s16) + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: zi32_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0 + + ; CHECK-LABEL: name: zi32_2 + ; CHECK: liveins: $d0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[UMOVvi16_:%[0-9]+]]:gpr32 = UMOVvi16 [[INSERT_SUBREG]], 1 + ; CHECK: $w0 = COPY [[UMOVvi16_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<4 x s16>) = COPY $d0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s16) = G_EXTRACT_VECTOR_ELT %0(<4 x s16>), %4(s64) + %6:gpr(s16) = COPY %3(s16) + %5:gpr(s32) = G_ZEXT %6(s16) + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: si16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: si16 + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[SMOVvi16to32_:%[0-9]+]]:gpr32 = SMOVvi16to32 [[COPY]], 1 + ; CHECK: $w0 = COPY [[SMOVvi16to32_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<16 x s8>) = COPY $q0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<16 x s8>), %4(s64) + %7:gpr(s8) = COPY %3(s8) + %6:gpr(s32) = G_SEXT %7(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 +... +--- +name: zi16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $w0 + + ; CHECK-LABEL: name: zi16 + ; CHECK: liveins: $q0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[UMOVvi16_:%[0-9]+]]:gpr32 = UMOVvi16 [[COPY]], 1 + ; CHECK: $w0 = COPY [[UMOVvi16_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<16 x s8>) = COPY $q0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<16 x s8>), %4(s64) + %7:gpr(s8) = COPY %3(s8) + %6:gpr(s32) = G_ZEXT %7(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: si16_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0 + + ; CHECK-LABEL: name: si16_2 + ; CHECK: liveins: $d0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[SMOVvi16to32_:%[0-9]+]]:gpr32 = SMOVvi16to32 [[INSERT_SUBREG]], 1 + ; CHECK: $w0 = COPY [[SMOVvi16to32_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<8 x s8>) = COPY $d0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<8 x s8>), %4(s64) + %7:gpr(s8) = COPY %3(s8) + %6:gpr(s32) = G_SEXT %7(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 +... +--- +name: zi16_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $w0 + + ; CHECK-LABEL: name: zi16_2 + ; CHECK: liveins: $d0, $w0 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.dsub + ; CHECK: [[UMOVvi16_:%[0-9]+]]:gpr32 = UMOVvi16 [[INSERT_SUBREG]], 1 + ; CHECK: $w0 = COPY [[UMOVvi16_]] + ; CHECK: RET_ReallyLR implicit $w0 + %0:fpr(<8 x s8>) = COPY $d0 + %4:gpr(s64) = G_CONSTANT i64 1 + %3:fpr(s8) = G_EXTRACT_VECTOR_ELT %0(<8 x s8>), %4(s64) + %7:gpr(s8) = COPY %3(s8) + %6:gpr(s32) = G_ZEXT %7(s8) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir @@ -340,9 +340,8 @@ ; CHECK-LABEL: name: sext_s64_from_s32 ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 - ; CHECK: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_32 - ; CHECK: [[SBFMXri:%[0-9]+]]:gpr64 = SBFMXri [[INSERT_SUBREG]], 0, 31 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY]], %subreg.sub_32 + ; CHECK: [[SBFMXri:%[0-9]+]]:gpr64 = SBFMXri [[SUBREG_TO_REG]], 0, 31 ; CHECK: $x0 = COPY [[SBFMXri]] %0(s32) = COPY $w0 %1(s64) = G_SEXT %0 diff --git a/llvm/test/CodeGen/AArch64/sext-zext.ll b/llvm/test/CodeGen/AArch64/sext-zext.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sext-zext.ll @@ -0,0 +1,197 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-eabi %s -o - | FileCheck %s --check-prefixes=CHECK-ISEL +; RUN: llc -mtriple=aarch64-eabi -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK-GLOBAL + +define i64 @si64(<4 x i32> %0, i32 %1) { +; CHECK-ISEL-LABEL: si64: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: smov x0, v0.s[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: si64: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: smov x0, v0.h[0] +; CHECK-GLOBAL-NEXT: ret + %3 = extractelement <4 x i32> %0, i64 1 + %s = sext i32 %3 to i64 + ret i64 %s +} + +define i64 @zi64(<4 x i32> %0, i32 %1) { +; CHECK-ISEL-LABEL: zi64: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: mov w0, v0.s[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: zi64: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: umov w0, v0.h[0] +; CHECK-GLOBAL-NEXT: ret + %3 = extractelement <4 x i32> %0, i64 1 + %s = zext i32 %3 to i64 + ret i64 %s +} + +define i64 @si64_2(<2 x i32> %0, i32 %1) { +; CHECK-ISEL-LABEL: si64_2: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-ISEL-NEXT: smov x0, v0.s[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: si64_2: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GLOBAL-NEXT: smov x0, v0.h[0] +; CHECK-GLOBAL-NEXT: ret + %3 = extractelement <2 x i32> %0, i64 1 + %s = sext i32 %3 to i64 + ret i64 %s +} + +define i64 @zi64_2(<2 x i32> %0, i32 %1) { +; CHECK-ISEL-LABEL: zi64_2: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-ISEL-NEXT: mov w0, v0.s[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: zi64_2: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GLOBAL-NEXT: umov w0, v0.h[0] +; CHECK-GLOBAL-NEXT: ret + %3 = extractelement <2 x i32> %0, i64 1 + %z = zext i32 %3 to i64 + ret i64 %z +} + + +define i32 @si32(<8 x i16> %0, i16 %1) { +; CHECK-ISEL-LABEL: si32: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: smov w0, v0.h[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: si32: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: smov w0, v0.h[1] +; CHECK-GLOBAL-NEXT: ret + %3 = extractelement <8 x i16> %0, i32 1 + %s = sext i16 %3 to i32 + ret i32 %s +} + +define i32 @si32_4(<4 x i16> %0, i16 %1) { +; CHECK-ISEL-LABEL: si32_4: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-ISEL-NEXT: smov w0, v0.h[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: si32_4: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GLOBAL-NEXT: smov w0, v0.h[1] +; CHECK-GLOBAL-NEXT: ret + %3 = extractelement <4 x i16> %0, i32 1 + %s = sext i16 %3 to i32 + ret i32 %s +} + +define i32 @zi32(<8 x i16> %0, i16 %1) { +; CHECK-ISEL-LABEL: zi32: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: umov w0, v0.h[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: zi32: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: umov w0, v0.h[1] +; CHECK-GLOBAL-NEXT: ret + %3 = extractelement <8 x i16> %0, i32 1 + %s = zext i16 %3 to i32 + ret i32 %s +} + +define i32 @zi32_4(<4 x i16> %0, i16 %1) { +; CHECK-ISEL-LABEL: zi32_4: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-ISEL-NEXT: umov w0, v0.h[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: zi32_4: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GLOBAL-NEXT: umov w0, v0.h[1] +; CHECK-GLOBAL-NEXT: ret + %3 = extractelement <4 x i16> %0, i32 1 + %z = zext i16 %3 to i32 + ret i32 %z +} + +define i16 @si16(<16 x i8> %0, i8 %1) { +; CHECK-ISEL-LABEL: si16: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: smov w0, v0.b[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: si16: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: smov w0, v0.h[1] +; CHECK-GLOBAL-NEXT: ret + %3 = extractelement <16 x i8> %0, i16 1 + %s = sext i8 %3 to i16 + ret i16 %s +} + +define i16 @zi16(<16 x i8> %0, i8 %1) { +; CHECK-ISEL-LABEL: zi16: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: umov w0, v0.b[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: zi16: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: umov w0, v0.h[1] +; CHECK-GLOBAL-NEXT: ret + %3 = extractelement <16 x i8> %0, i16 1 + %z = zext i8 %3 to i16 + ret i16 %z +} + +define i16 @si16_8(<8 x i8> %0, i8 %1) { +; CHECK-ISEL-LABEL: si16_8: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-ISEL-NEXT: smov w0, v0.b[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: si16_8: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GLOBAL-NEXT: smov w0, v0.h[1] +; CHECK-GLOBAL-NEXT: ret + %3 = extractelement <8 x i8> %0, i16 1 + %s = sext i8 %3 to i16 + ret i16 %s +} + +define i16 @zi16_8(<8 x i8> %0, i8 %1) { +; CHECK-ISEL-LABEL: zi16_8: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-ISEL-NEXT: umov w0, v0.b[1] +; CHECK-ISEL-NEXT: ret +; +; CHECK-GLOBAL-LABEL: zi16_8: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GLOBAL-NEXT: umov w0, v0.h[1] +; CHECK-GLOBAL-NEXT: ret + %3 = extractelement <8 x i8> %0, i16 1 + %z = zext i8 %3 to i16 + ret i16 %z +} +