diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1424,8 +1424,100 @@ if (const SelectInst *SI = dyn_cast(&U)) Flags = MachineInstr::copyFlagsFromInstruction(*SI); + Optional Opc; + bool IsUnaryAbs = false; + bool Negate = false; + if (EnableOpts) { + Value *LHS, *RHS; + auto SPR = matchSelectPattern(const_cast(&U), LHS, RHS); + switch (SPR.Flavor) { + case SPF_UMAX: + Opc = TargetOpcode::G_UMAX; + break; + case SPF_UMIN: + Opc = TargetOpcode::G_UMIN; + break; + case SPF_SMAX: + Opc = TargetOpcode::G_SMAX; + break; + case SPF_SMIN: + Opc = TargetOpcode::G_SMIN; + break; + case SPF_FMINNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: + llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: + Opc = TargetOpcode::G_FMINIMUM; + break; + // Treat ANY like OTHER + case SPNB_RETURNS_ANY: + case SPNB_RETURNS_OTHER: + Opc = TargetOpcode::G_FMINNUM; + break; + } + break; + case SPF_FMAXNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: + llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: + Opc = TargetOpcode::G_FMAXIMUM; + break; + // Treat ANY like OTHER + case SPNB_RETURNS_ANY: + case SPNB_RETURNS_OTHER: + Opc = TargetOpcode::G_FMAXNUM; + break; + } + break; + case SPF_NABS: + Negate = true; + LLVM_FALLTHROUGH; + case SPF_ABS: + IsUnaryAbs = true; + Opc = TargetOpcode::G_ABS; + break; + default: + break; + } + + auto hasOnlySelectUsers = [](const Value *Cond) { + return llvm::all_of(Cond->users(), + [](const Value *V) { return isa(V); }); + }; + + if (!IsUnaryAbs && Opc && + // If the underlying comparison instruction is used by any other + // instruction, the consumed instructions won't be destroyed, so it is + // not profitable to convert to a min/max. + hasOnlySelectUsers(cast(&U)->getCondition())) { + Op0Regs = getOrCreateVRegs(*LHS); + Op1Regs = getOrCreateVRegs(*RHS); + } + + if (IsUnaryAbs) { + Op0Regs = getOrCreateVRegs(*LHS); + } + } + for (unsigned i = 0; i < ResRegs.size(); ++i) { - MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i], Flags); + if (!Opc.hasValue()) + MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i], Flags); + // convert to abs or abs + neg + else if (IsUnaryAbs) { + LLT Ty = MRI->getType(ResRegs[i]); + Register ABSOutReg = + Negate ? MRI->createGenericVirtualRegister(Ty) : ResRegs[i]; + MIRBuilder.buildInstr(*Opc, {ABSOutReg}, {Op0Regs[i]}); + if (Negate) { + MIRBuilder.buildInstr(TargetOpcode::G_SUB, {ResRegs[i]}, + {MIRBuilder.buildConstant(Ty, 0), ABSOutReg}); + } + // convert to min/max + } else { + MIRBuilder.buildInstr(*Opc, {ResRegs[i]}, {Op0Regs[i], Op1Regs[i]}); + } } return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -1742,7 +1742,7 @@ ; CHECK-NOLSE-O1-NEXT: ldaxrb w8, [x0] ; CHECK-NOLSE-O1-NEXT: sxtb w9, w8 ; CHECK-NOLSE-O1-NEXT: cmp w9, w1, sxtb -; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, le +; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, lt ; CHECK-NOLSE-O1-NEXT: stxrb w10, w9, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB33_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -1883,7 +1883,7 @@ ; CHECK-NOLSE-O1-NEXT: ldaxrb w8, [x0] ; CHECK-NOLSE-O1-NEXT: and w10, w8, #0xff ; CHECK-NOLSE-O1-NEXT: cmp w10, w9 -; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, ls +; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, lo ; CHECK-NOLSE-O1-NEXT: stlxrb w11, w10, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB35_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -2423,7 +2423,7 @@ ; CHECK-NOLSE-O1-NEXT: ldaxrh w8, [x0] ; CHECK-NOLSE-O1-NEXT: sxth w9, w8 ; CHECK-NOLSE-O1-NEXT: cmp w9, w1, sxth -; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, le +; CHECK-NOLSE-O1-NEXT: csel w9, w8, w1, lt ; CHECK-NOLSE-O1-NEXT: stxrh w10, w9, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB43_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end @@ -2564,7 +2564,7 @@ ; CHECK-NOLSE-O1-NEXT: ldaxrh w8, [x0] ; CHECK-NOLSE-O1-NEXT: and w10, w8, #0xffff ; CHECK-NOLSE-O1-NEXT: cmp w10, w9 -; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, ls +; CHECK-NOLSE-O1-NEXT: csel w10, w10, w9, lo ; CHECK-NOLSE-O1-NEXT: stlxrh w11, w10, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w11, LBB45_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-select.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-select.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-select.ll @@ -0,0 +1,131 @@ +; RUN: llc -O0 -global-isel -mtriple=aarch64-unknown-unknown -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-O0 +; RUN: llc -O2 -global-isel -mtriple=aarch64-unknown-unknown -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-O2 + +define i32 @max_s32(i32 %a, i32 %b) { +; CHECK-LABEL: name: max_s32 +; CHECK-O0: [[LHS:%.*]]:_(s32) = COPY $w0 +; CHECK-O0: [[RHS:%.*]]:_(s32) = COPY $w1 +; CHECK-O0: [[CMP:%.*]]:_(s1) = G_ICMP intpred(sge), [[LHS]](s32), [[RHS]] +; CHECK-O0: [[RES:%.*]]:_(s32) = G_SELECT [[CMP]](s1), [[LHS]], [[RHS]] +; CHECK-O0: $w0 = COPY [[RES]] +; CHECK-O0: RET_ReallyLR implicit $w0 + +; CHECK-O2: [[LHS:%.*]]:_(s32) = COPY $w0 +; CHECK-O2: [[RHS:%.*]]:_(s32) = COPY $w1 +; CHECK-O2: [[UNUSED:%.*]]:_(s1) = G_ICMP intpred(sge), [[LHS]](s32), [[RHS]] +; CHECK-O2: [[RES:%.*]]:_(s32) = G_SMAX [[LHS]], [[RHS]] +; CHECK-O2: $w0 = COPY [[RES]] +; CHECK-O2: RET_ReallyLR implicit $w0 + %cmp = icmp sge i32 %a, %b + %res = select i1 %cmp, i32 %a, i32 %b + ret i32 %res +} + +; CHECK-LABEL: name: max_f32 +; CHECK-O0: [[LHS:%.*]]:_(s32) = COPY $s0 +; CHECK-O0: [[RHS:%.*]]:_(s32) = G_FCONSTANT float 1.000000e+00 +; CHECK-O0: [[CMP:%.*]]:_(s1) = G_FCMP floatpred(ogt), [[LHS]](s32), [[RHS]] +; CHECK-O0: [[RES:%.*]]:_(s32) = G_SELECT [[CMP]](s1), [[LHS]], [[RHS]] +; CHECK-O0: $s0 = COPY [[RES]] +; CHECK-O0: RET_ReallyLR implicit $s0 + +; CHECK-O2: [[LHS:%.*]]:_(s32) = COPY $s0 +; CHECK-O2: [[RHS:%.*]]:_(s32) = G_FCONSTANT float 1.000000e+00 +; CHECK-O2: [[UNUSED:%.*]]:_(s1) = G_FCMP floatpred(ogt), [[LHS]](s32), [[RHS]] +; CHECK-O2: [[RES:%.*]]:_(s32) = G_FMAXNUM [[LHS]], [[RHS]] +; CHECK-O2: $s0 = COPY [[RES]] +; CHECK-O2: RET_ReallyLR implicit $s0 +define float @max_f32(float %a) { + %cmp = fcmp ogt float %a, 1.000000e+00 + %narrow.sel = select i1 %cmp, float %a, float 1.000000e+00 + ret float %narrow.sel +} + +; CHECK-LABEL: name: min_s32 +; CHECK-O0: [[LHS:%.*]]:_(s32) = COPY $w0 +; CHECK-O0: [[RHS:%.*]]:_(s32) = COPY $w1 +; CHECK-O0: [[CMP:%.*]]:_(s1) = G_ICMP intpred(sge), [[LHS]](s32), [[RHS]] +; CHECK-O0: [[RES:%.*]]:_(s32) = G_SELECT [[CMP]](s1), [[RHS]], [[LHS]] +; CHECK-O0: $w0 = COPY [[RES]] +; CHECK-O0: RET_ReallyLR implicit $w0 + +; CHECK-O2: [[LHS:%.*]]:_(s32) = COPY $w0 +; CHECK-O2: [[RHS:%.*]]:_(s32) = COPY $w1 +; CHECK-O2: [[UNUSED:%.*]]:_(s1) = G_ICMP intpred(sge), [[LHS]](s32), [[RHS]] +; CHECK-O2: [[RES:%.*]]:_(s32) = G_SMIN [[LHS]], [[RHS]] +; CHECK-O2: $w0 = COPY [[RES]] +; CHECK-O2: RET_ReallyLR implicit $w0 +define i32 @min_s32(i32 %a, i32 %b) { + %cmp = icmp sge i32 %a, %b + %res = select i1 %cmp, i32 %b, i32 %a + ret i32 %res +} + +; CHECK-LABEL: name: min_f32 +; CHECK-O0: [[LHS:%.*]]:_(s32) = COPY $s0 +; CHECK-O0: [[RHS:%.*]]:_(s32) = G_FCONSTANT float 1.000000e+00 +; CHECK-O0: [[CMP:%.*]]:_(s1) = G_FCMP floatpred(ole), [[LHS]](s32), [[RHS]] +; CHECK-O0: [[RES:%.*]]:_(s32) = G_SELECT [[CMP]](s1), [[LHS]], [[RHS]] +; CHECK-O0: $s0 = COPY [[RES]] +; CHECK-O0: RET_ReallyLR implicit $s0 + +; CHECK-O2: [[LHS:%.*]]:_(s32) = COPY $s0 +; CHECK-O2: [[RHS:%.*]]:_(s32) = G_FCONSTANT float 1.000000e+00 +; CHECK-O2: [[UNUSED:%.*]]:_(s1) = G_FCMP floatpred(ole), [[LHS]](s32), [[RHS]] +; CHECK-O2: [[RES:%.*]]:_(s32) = G_FMINNUM [[LHS]], [[RHS]] +; CHECK-O2: $s0 = COPY [[RES]] +; CHECK-O2: RET_ReallyLR implicit $s0 +define float @min_f32(float %a) { + %cmp = fcmp ole float %a, 1.000000e+00 + %narrow.sel = select i1 %cmp, float %a, float 1.000000e+00 + ret float %narrow.sel +} + +; CHECK-LABEL: name: abs_s32 +; CHECK-O0: [[LHS:%.*]]:_(s32) = COPY $w0 +; CHECK-O0: [[ZERO:%.*]]:_(s32) = G_CONSTANT i32 0 +; CHECK-O0: [[RHS:%.*]]:_(s32) = G_SUB [[ZERO]], [[LHS]] +; CHECK-O0: [[CMP:%.*]]:_(s1) = G_ICMP intpred(slt), [[LHS]](s32), [[ZERO]] +; CHECK-O0: [[RES:%.*]]:_(s32) = G_SELECT [[CMP]](s1), [[RHS]], [[LHS]] +; CHECK-O0: $w0 = COPY [[RES]] +; CHECK-O0: RET_ReallyLR implicit $w0 + +; CHECK-O2: [[LHS:%.*]]:_(s32) = COPY $w0 +; CHECK-O2: [[ZERO:%.*]]:_(s32) = G_CONSTANT i32 0 +; CHECK-O2: [[UNUSED_RHS:%.*]]:_(s32) = G_SUB [[ZERO]], [[LHS]] +; CHECK-O2: [[UNUSED_CMP:%.*]]:_(s1) = G_ICMP intpred(slt), [[LHS]](s32), [[ZERO]] +; CHECK-O2: [[RES:%.*]]:_(s32) = G_ABS [[LHS]] +; CHECK-O2: $w0 = COPY [[RES]] +; CHECK-O2: RET_ReallyLR implicit $w0 +define i32 @abs_s32(i32 %a) { + %tmp1neg = sub i32 0, %a + %b = icmp slt i32 %a, 0 + %abs = select i1 %b, i32 %tmp1neg, i32 %a + ret i32 %abs +} + +; CHECK-LABEL: name: abs_neg +; CHECK-O0: [[LHS:%.*]]:_(s32) = COPY $w0 +; CHECK-O0: [[ZERO:%.*]]:_(s32) = G_CONSTANT i32 0 +; CHECK-O0: [[RHS:%.*]]:_(s32) = G_SUB [[ZERO]], [[LHS]] +; CHECK-O0: [[CMP:%.*]]:_(s1) = G_ICMP intpred(slt), [[LHS]](s32), [[ZERO]] +; CHECK-O0: [[RES:%.*]]:_(s32) = G_SELECT [[CMP]](s1), [[LHS]], [[RHS]] +; CHECK-O0: $w0 = COPY [[RES]] +; CHECK-O0: RET_ReallyLR implicit $w0 + +; CHECK-O2: [[LHS:%.*]]:_(s32) = COPY $w0 +; CHECK-O2: [[ZERO:%.*]]:_(s32) = G_CONSTANT i32 0 +; CHECK-O2: [[UNUSED_RHS:%.*]]:_(s32) = G_SUB [[ZERO]], [[LHS]] +; CHECK-O2: [[UNUSED_CMP:%.*]]:_(s1) = G_ICMP intpred(slt), [[LHS]](s32), [[ZERO]] +; CHECK-O2: [[ABS_RES:%.*]]:_(s32) = G_ABS [[LHS]] +; CHECK-O2: [[ZERO:%.*]]:_(s32) = G_CONSTANT i32 0 +; CHECK-O2: [[RES:%.*]]:_(s32) = G_SUB [[ZERO]], [[ABS_RES]] +; CHECK-O2: $w0 = COPY [[RES]] +; CHECK-O2: RET_ReallyLR implicit $w0 +define i32 @abs_neg(i32 %a) { + %tmp1neg = sub i32 0, %a + %b = icmp slt i32 %a, 0 + %abs = select i1 %b, i32 %a, i32 %tmp1neg + ret i32 %abs +} + diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -301,18 +301,11 @@ ; ; GISEL-LABEL: uabdl4s_rdx_i32: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 -; GISEL-NEXT: usubl.4s v0, v0, v1 -; GISEL-NEXT: cmgt.4s v1, v2, v0 -; GISEL-NEXT: neg.4s v2, v0 -; GISEL-NEXT: shl.4s v1, v1, #31 -; GISEL-NEXT: sshr.4s v1, v1, #31 -; GISEL-NEXT: bit.16b v0, v2, v1 +; GISEL-NEXT: uabdl.4s v0, v0, v1 ; GISEL-NEXT: addv.4s s0, v0 ; GISEL-NEXT: fmov w0, s0 ; GISEL-NEXT: ret -; GISel doesn't match this pattern yet. %aext = zext <4 x i16> %a to <4 x i32> %bext = zext <4 x i16> %b to <4 x i32> %abdiff = sub nsw <4 x i32> %aext, %bext @@ -374,13 +367,7 @@ ; ; GISEL-LABEL: uabdl2d_rdx_i64: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 -; GISEL-NEXT: usubl.2d v0, v0, v1 -; GISEL-NEXT: cmgt.2d v1, v2, v0 -; GISEL-NEXT: neg.2d v2, v0 -; GISEL-NEXT: shl.2d v1, v1, #63 -; GISEL-NEXT: sshr.2d v1, v1, #63 -; GISEL-NEXT: bit.16b v0, v2, v1 +; GISEL-NEXT: uabdl.2d v0, v0, v1 ; GISEL-NEXT: addp.2d d0, v0 ; GISEL-NEXT: fmov x0, d0 ; GISEL-NEXT: ret @@ -1572,12 +1559,7 @@ ; ; GISEL-LABEL: abspattern1: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.2s v2, v0 -; GISEL-NEXT: cmge.2s v1, v0, v1 -; GISEL-NEXT: shl.2s v1, v1, #31 -; GISEL-NEXT: sshr.2s v1, v1, #31 -; GISEL-NEXT: bif.8b v0, v2, v1 +; GISEL-NEXT: abs.2s v0, v0 ; GISEL-NEXT: ret %tmp1neg = sub <2 x i32> zeroinitializer, %a @@ -1594,14 +1576,8 @@ ; ; GISEL-LABEL: abspattern2: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.4h v2, v0 -; GISEL-NEXT: cmgt.4h v1, v0, v1 -; GISEL-NEXT: shl.4h v1, v1, #15 -; GISEL-NEXT: sshr.4h v1, v1, #15 -; GISEL-NEXT: bif.8b v0, v2, v1 +; GISEL-NEXT: abs.4h v0, v0 ; GISEL-NEXT: ret -; For GlobalISel, this generates terrible code until we can pattern match this to abs. %tmp1neg = sub <4 x i16> zeroinitializer, %a %b = icmp sgt <4 x i16> %a, zeroinitializer @@ -1617,12 +1593,7 @@ ; ; GISEL-LABEL: abspattern3: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.8b v2, v0 -; GISEL-NEXT: cmgt.8b v1, v1, v0 -; GISEL-NEXT: shl.8b v1, v1, #7 -; GISEL-NEXT: sshr.8b v1, v1, #7 -; GISEL-NEXT: bit.8b v0, v2, v1 +; GISEL-NEXT: abs.8b v0, v0 ; GISEL-NEXT: ret %tmp1neg = sub <8 x i8> zeroinitializer, %a @@ -1639,12 +1610,7 @@ ; ; GISEL-LABEL: abspattern4: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.4s v2, v0 -; GISEL-NEXT: cmge.4s v1, v0, v1 -; GISEL-NEXT: shl.4s v1, v1, #31 -; GISEL-NEXT: sshr.4s v1, v1, #31 -; GISEL-NEXT: bif.16b v0, v2, v1 +; GISEL-NEXT: abs.4s v0, v0 ; GISEL-NEXT: ret %tmp1neg = sub <4 x i32> zeroinitializer, %a @@ -1661,12 +1627,7 @@ ; ; GISEL-LABEL: abspattern5: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.8h v2, v0 -; GISEL-NEXT: cmgt.8h v1, v0, v1 -; GISEL-NEXT: shl.8h v1, v1, #15 -; GISEL-NEXT: sshr.8h v1, v1, #15 -; GISEL-NEXT: bif.16b v0, v2, v1 +; GISEL-NEXT: abs.8h v0, v0 ; GISEL-NEXT: ret %tmp1neg = sub <8 x i16> zeroinitializer, %a @@ -1683,12 +1644,7 @@ ; ; GISEL-LABEL: abspattern6: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.16b v2, v0 -; GISEL-NEXT: cmgt.16b v1, v1, v0 -; GISEL-NEXT: shl.16b v1, v1, #7 -; GISEL-NEXT: sshr.16b v1, v1, #7 -; GISEL-NEXT: bit.16b v0, v2, v1 +; GISEL-NEXT: abs.16b v0, v0 ; GISEL-NEXT: ret %tmp1neg = sub <16 x i8> zeroinitializer, %a @@ -1712,6 +1668,7 @@ ; GISEL-NEXT: sshr.2d v1, v1, #63 ; GISEL-NEXT: bit.16b v0, v2, v1 ; GISEL-NEXT: ret +; the difference from dag-combine %tmp1neg = sub <2 x i64> zeroinitializer, %a %b = icmp sle <2 x i64> %a, zeroinitializer @@ -1727,14 +1684,10 @@ ; ; GISEL-LABEL: uabd_i32: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: ssubl.2d v0, v0, v1 -; GISEL-NEXT: cmgt.2d v1, v2, v0 -; GISEL-NEXT: neg.2d v2, v0 -; GISEL-NEXT: shl.2d v1, v1, #63 -; GISEL-NEXT: sshr.2d v1, v1, #63 -; GISEL-NEXT: bit.16b v0, v2, v1 +; GISEL-NEXT: abs.2d v0, v0 ; GISEL-NEXT: ret +; the difference from dag-combine %aext = sext <2 x i32> %a to <2 x i64> %bext = sext <2 x i32> %b to <2 x i64> %abdiff = sub nsw <2 x i64> %aext, %bext