diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" namespace llvm { @@ -45,6 +46,7 @@ bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const; + bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const; const AArch64Subtarget *ST; }; } // End llvm namespace. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" #include +#include "llvm/Support/MathExtras.h" #define DEBUG_TYPE "aarch64-legalinfo" @@ -373,7 +374,8 @@ .minScalarOrEltIf( [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0, s64) - .widenScalarOrEltToNextPow2(1); + .widenScalarOrEltToNextPow2(1) + .clampNumElements(0, v2s32, v4s32); getActionDefinitionsBuilder(G_FCMP) .legalFor({{s32, s32}, {s32, s64}}) @@ -412,7 +414,16 @@ .legalIf(ExtLegalFunc) .clampScalar(0, s64, s64); // Just for s128, others are handled above. - getActionDefinitionsBuilder(G_TRUNC).alwaysLegal(); + getActionDefinitionsBuilder(G_TRUNC) + .minScalarOrEltIf( + [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); }, + 0, s8) + .customIf([=](const LegalityQuery &Query) { + LLT DstTy = Query.Types[0]; + LLT SrcTy = Query.Types[1]; + return DstTy == v8s8 && SrcTy.getSizeInBits() > 128; + }) + .alwaysLegal(); getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower(); @@ -709,11 +720,60 @@ return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer); case TargetOpcode::G_GLOBAL_VALUE: return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer); + case TargetOpcode::G_TRUNC: + return legalizeVectorTrunc(MI, Helper); } llvm_unreachable("expected switch to return"); } +static void extractParts(Register Reg, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts, + SmallVectorImpl &VRegs) { + for (int I = 0; I < NumParts; ++I) + VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); + MIRBuilder.buildUnmerge(VRegs, Reg); +} + +bool AArch64LegalizerInfo::legalizeVectorTrunc( + MachineInstr &MI, LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + // Similar to how operand splitting is done in SelectiondDAG, we can handle + // %res(v8s8) = G_TRUNC %in(v8s32) by generating: + // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>) + // %lo16(<4 x s16>) = G_TRUNC %inlo + // %hi16(<4 x s16>) = G_TRUNC %inhi + // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16 + // %res(<8 x s8>) = G_TRUNC %in16 + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + assert(isPowerOf2_32(DstTy.getSizeInBits()) && + isPowerOf2_32(SrcTy.getSizeInBits())); + + // Split input type. + LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2); + // First, split the source into two smaller vectors. + SmallVector SplitSrcs; + extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs); + + // Truncate the splits into intermediate narrower elements. + LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2); + for (unsigned I = 0; I < SplitSrcs.size(); ++I) + SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0); + + auto Concat = MIRBuilder.buildConcatVectors( + DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs); + + Helper.Observer.changingInstr(MI); + MI.getOperand(1).setReg(Concat.getReg(0)); + Helper.Observer.changedInstr(MI); + return true; +} + bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir @@ -1920,3 +1920,79 @@ RET_ReallyLR implicit $d0 ... +--- +name: icmp_8xs1 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } + - { reg: '$q1' } + - { reg: '$q2' } + - { reg: '$q3' } +body: | + bb.1: + liveins: $q0, $q1, $q2, $q3 + + ; CHECK-LABEL: name: icmp_8xs1 + ; CHECK: liveins: $q0, $q1, $q2, $q3 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 + ; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3 + ; CHECK: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY]](<4 x s32>), [[COPY2]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY1]](<4 x s32>), [[COPY3]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) + ; CHECK: $d0 = COPY [[TRUNC2]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %2:_(<4 x s32>) = COPY $q0 + %3:_(<4 x s32>) = COPY $q1 + %0:_(<8 x s32>) = G_CONCAT_VECTORS %2(<4 x s32>), %3(<4 x s32>) + %4:_(<4 x s32>) = COPY $q2 + %5:_(<4 x s32>) = COPY $q3 + %1:_(<8 x s32>) = G_CONCAT_VECTORS %4(<4 x s32>), %5(<4 x s32>) + %6:_(<8 x s1>) = G_ICMP intpred(eq), %0(<8 x s32>), %1 + %7:_(<8 x s8>) = G_ANYEXT %6(<8 x s1>) + $d0 = COPY %7(<8 x s8>) + RET_ReallyLR implicit $d0 +... +--- +name: icmp_8xs32 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$q0' } + - { reg: '$q1' } + - { reg: '$q2' } + - { reg: '$q3' } +body: | + bb.1: + liveins: $q0, $q1, $q2, $q3 + + ; CHECK-LABEL: name: icmp_8xs32 + ; CHECK: liveins: $q0, $q1, $q2, $q3 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 + ; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3 + ; CHECK: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY]](<4 x s32>), [[COPY2]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY1]](<4 x s32>), [[COPY3]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) + ; CHECK: $d0 = COPY [[TRUNC2]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %2:_(<4 x s32>) = COPY $q0 + %3:_(<4 x s32>) = COPY $q1 + %0:_(<8 x s32>) = G_CONCAT_VECTORS %2(<4 x s32>), %3(<4 x s32>) + %4:_(<4 x s32>) = COPY $q2 + %5:_(<4 x s32>) = COPY $q3 + %1:_(<8 x s32>) = G_CONCAT_VECTORS %4(<4 x s32>), %5(<4 x s32>) + %6:_(<8 x s32>) = G_ICMP intpred(eq), %0(<8 x s32>), %1 + %7:_(<8 x s8>) = G_TRUNC %6(<8 x s32>) + $d0 = COPY %7(<8 x s8>) + RET_ReallyLR implicit $d0 +...