diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -368,6 +368,8 @@ LegalizeResult lowerBitreverse(MachineInstr &MI); LegalizeResult lowerReadWriteRegister(MachineInstr &MI); LegalizeResult lowerSMULH_UMULH(MachineInstr &MI); + LegalizeResult lowerSelect(MachineInstr &MI); + }; /// Helper function that creates a libcall to the given \p Name using the given diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3110,6 +3110,8 @@ MI.eraseFromParent(); return Legalized; } + case G_SELECT: + return lowerSelect(MI); } } @@ -6176,3 +6178,26 @@ MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) { + // Implement vector G_SELECT in terms of XOR, AND, OR. + Register DstReg = MI.getOperand(0).getReg(); + Register MaskReg = MI.getOperand(1).getReg(); + Register Op1Reg = MI.getOperand(2).getReg(); + Register Op2Reg = MI.getOperand(3).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT MaskTy = MRI.getType(MaskReg); + LLT Op1Ty = MRI.getType(Op1Reg); + if (!DstTy.isVector()) + return UnableToLegalize; + + if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) + return UnableToLegalize; + + auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg); + auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg); + auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask); + MIRBuilder.buildOr(DstReg, NewOp1, NewOp2); + MI.eraseFromParent(); + return Legalized; +} \ No newline at end of file diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -419,14 +419,12 @@ getActionDefinitionsBuilder(G_BRCOND).legalFor({s1, s8, s16, s32}); getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0}); - // Select - // FIXME: We can probably do a bit better than just scalarizing vector - // selects. getActionDefinitionsBuilder(G_SELECT) .legalFor({{s32, s1}, {s64, s1}, {p0, s1}}) .clampScalar(0, s32, s64) .widenScalarToNextPow2(0) - .scalarize(0); + .minScalarEltSameAsIf(isVector(0), 1, 0) + .lowerIf(isVector(0)); // Pointer-handling getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir @@ -16,15 +16,18 @@ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK: [[ICMP:%[0-9]+]]:_(<2 x s64>) = G_ICMP intpred(sgt), [[COPY]](<2 x s64>), [[BUILD_VECTOR]] - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ICMP]](<2 x s64>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UV]](s64) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[UV1]](s64) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[UV2]], [[UV4]] - ; CHECK: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC1]](s1), [[UV3]], [[UV5]] - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; CHECK: $q0 = COPY [[BUILD_VECTOR1]](<2 x s64>) + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY [[ICMP]](<2 x s64>) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64) + ; CHECK: [[SHL:%[0-9]+]]:_(<2 x s64>) = G_SHL [[COPY2]], [[BUILD_VECTOR1]](<2 x s64>) + ; CHECK: [[ASHR:%[0-9]+]]:_(<2 x s64>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<2 x s64>) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64) + ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s64>) = G_XOR [[ASHR]], [[BUILD_VECTOR2]] + ; CHECK: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[COPY1]], [[ASHR]] + ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[COPY]], [[XOR]] + ; CHECK: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND]], [[AND1]] + ; CHECK: $q0 = COPY [[OR]](<2 x s64>) ; CHECK: RET_ReallyLR implicit $q0 %0:_(<2 x s64>) = COPY $q0 %1:_(<2 x s64>) = COPY $q1 @@ -51,15 +54,18 @@ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) ; CHECK: [[ICMP:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(sgt), [[COPY]](<2 x s32>), [[BUILD_VECTOR]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ICMP]](<2 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[UV]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[UV1]](s32) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[UV2]], [[UV4]] - ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC1]](s1), [[UV3]], [[UV5]] - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; CHECK: $d0 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY [[ICMP]](<2 x s32>) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL [[COPY2]], [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK: [[ASHR:%[0-9]+]]:_(<2 x s32>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32) + ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[ASHR]], [[BUILD_VECTOR2]] + ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY1]], [[ASHR]] + ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[XOR]] + ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] + ; CHECK: $d0 = COPY [[OR]](<2 x s32>) ; CHECK: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s32>) = COPY $d1 @@ -71,3 +77,40 @@ RET_ReallyLR implicit $d0 ... +--- +name: v16s8 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + + ; CHECK-LABEL: name: v16s8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 + ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8) + ; CHECK: [[ICMP:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(sgt), [[COPY]](<16 x s8>), [[BUILD_VECTOR]] + ; CHECK: [[COPY2:%[0-9]+]]:_(<16 x s8>) = COPY [[ICMP]](<16 x s8>) + ; CHECK: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 7 + ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8), [[C1]](s8) + ; CHECK: [[SHL:%[0-9]+]]:_(<16 x s8>) = G_SHL [[COPY2]], [[BUILD_VECTOR1]](<16 x s8>) + ; CHECK: [[ASHR:%[0-9]+]]:_(<16 x s8>) = G_ASHR [[SHL]], [[BUILD_VECTOR1]](<16 x s8>) + ; CHECK: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 -1 + ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8) + ; CHECK: [[XOR:%[0-9]+]]:_(<16 x s8>) = G_XOR [[ASHR]], [[BUILD_VECTOR2]] + ; CHECK: [[AND:%[0-9]+]]:_(<16 x s8>) = G_AND [[COPY1]], [[ASHR]] + ; CHECK: [[AND1:%[0-9]+]]:_(<16 x s8>) = G_AND [[COPY]], [[XOR]] + ; CHECK: [[OR:%[0-9]+]]:_(<16 x s8>) = G_OR [[AND]], [[AND1]] + ; CHECK: $q0 = COPY [[OR]](<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<16 x s8>) = COPY $q0 + %1:_(<16 x s8>) = COPY $q1 + %3:_(s8) = G_CONSTANT i8 0 + %2:_(<16 x s8>) = G_BUILD_VECTOR %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8) + %4:_(<16 x s1>) = G_ICMP intpred(sgt), %0(<16 x s8>), %2 + %5:_(<16 x s8>) = G_SELECT %4(<16 x s1>), %1, %0 + $q0 = COPY %5(<16 x s8>) + RET_ReallyLR implicit $q0 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -288,8 +288,8 @@ # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_SELECT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. the first uncovered type index: 2, OK -# DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_UADDO (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 2, OK diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -958,8 +958,7 @@ ; GISEL: neg.2s ; GISEL: cmge.2s -; GISEL: fcsel -; GISEL: fcsel +; GISEL: bif.8b %tmp1neg = sub <2 x i32> zeroinitializer, %a %b = icmp sge <2 x i32> %a, zeroinitializer %abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg @@ -974,10 +973,7 @@ ; For GlobalISel, this generates terrible code until we can pattern match this to abs. ; GISEL-DAG: neg.4h ; GISEL-DAG: cmgt.4h -; GISEL: csel -; GISEL: csel -; GISEL: csel -; GISEL: csel +; GISEL: bif.8b %tmp1neg = sub <4 x i16> zeroinitializer, %a %b = icmp sgt <4 x i16> %a, zeroinitializer %abs = select <4 x i1> %b, <4 x i16> %a, <4 x i16> %tmp1neg @@ -1000,10 +996,7 @@ ; DAG-NEXT: ret ; GISEL: cmge.4s -; GISEL: fcsel -; GISEL: fcsel -; GISEL: fcsel -; GISEL: fcsel +; GISEL: bif.16b %tmp1neg = sub <4 x i32> zeroinitializer, %a %b = icmp sge <4 x i32> %a, zeroinitializer %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg @@ -1017,14 +1010,7 @@ ; GISEL-DAG: cmgt.8h ; GISEL-DAG: neg.8h -; GISEL: csel -; GISEL: csel -; GISEL: csel -; GISEL: csel -; GISEL: csel -; GISEL: csel -; GISEL: csel -; GISEL: csel +; GISEL: bif.16b %tmp1neg = sub <8 x i16> zeroinitializer, %a %b = icmp sgt <8 x i16> %a, zeroinitializer %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg @@ -1033,8 +1019,11 @@ define <16 x i8> @abspattern6(<16 x i8> %a) nounwind { ; CHECK-LABEL: abspattern6: -; CHECK: abs.16b -; CHECK-NEXT: ret +; DAG: abs.16b +; DAG-NEXT: ret + +; GISEL: cmgt.16b +; GISEL: bit.16b %tmp1neg = sub <16 x i8> zeroinitializer, %a %b = icmp slt <16 x i8> %a, zeroinitializer %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a @@ -1048,8 +1037,7 @@ ; GISEL: neg.2d ; GISEL: cmge.2d -; GISEL: fcsel -; GISEL: fcsel +; GISEL: bit.16b %tmp1neg = sub <2 x i64> zeroinitializer, %a %b = icmp sle <2 x i64> %a, zeroinitializer %abs = select <2 x i1> %b, <2 x i64> %tmp1neg, <2 x i64> %a