diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2499,6 +2499,68 @@ } } +// Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need +// fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. +static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI, + InstCombinerImpl &IC) { + Value *CondVal = SI.getCondition(); + + for (bool Swap : {false, true}) { + Value *TrueVal = SI.getTrueValue(); + Value *X = SI.getFalseValue(); + CmpInst::Predicate Pred; + + if (Swap) + std::swap(TrueVal, X); + + if (!match(CondVal, m_FCmp(Pred, m_Specific(X), m_AnyZeroFP()))) + continue; + + // fold (X <= +/-0.0) ? (0.0 - X) : X to fabs(X), when 'Swap' is false + // fold (X > +/-0.0) ? X : (0.0 - X) to fabs(X), when 'Swap' is true + if (match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X)))) { + if (!Swap && (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) { + Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI); + return IC.replaceInstUsesWith(SI, Fabs); + } + if (Swap && (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) { + Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI); + return IC.replaceInstUsesWith(SI, Fabs); + } + } + + // With nsz, when 'Swap' is false: + // fold (X < +/-0.0) ? -X : X or (X <= +/-0.0) ? -X : X to fabs(X) + // fold (X > +/-0.0) ? -X : X or (X >= +/-0.0) ? -X : X to -fabs(x) + // when 'Swap' is true: + // fold (X > +/-0.0) ? X : -X or (X >= +/-0.0) ? X : -X to fabs(X) + // fold (X < +/-0.0) ? X : -X or (X <= +/-0.0) ? X : -X to -fabs(X) + if (!match(TrueVal, m_FNeg(m_Specific(X))) || !SI.hasNoSignedZeros()) + return nullptr; + + if (Swap) + Pred = FCmpInst::getSwappedPredicate(Pred); + + bool IsLTOrLE = Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE || + Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE; + bool IsGTOrGE = Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE || + Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE; + + if (IsLTOrLE) { + Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI); + return IC.replaceInstUsesWith(SI, Fabs); + } + if (IsGTOrGE) { + Value *Fabs = IC.Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, &SI); + Instruction *NewFNeg = UnaryOperator::CreateFNeg(Fabs); + NewFNeg->setFastMathFlags(SI.getFastMathFlags()); + return NewFNeg; + } + } + + return nullptr; +} + Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -2531,8 +2593,6 @@ if (Instruction *I = canonicalizeScalarSelectOfVecs(SI, *this)) return I; - CmpInst::Predicate Pred; - // Avoid potential infinite loops by checking for non-constant condition. // TODO: Can we assert instead by improving canonicalizeSelectToShuffle()? // Scalar select must have simplified? @@ -2773,42 +2833,9 @@ } } - // Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need - // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. - // (X <= +/-0.0) ? (0.0 - X) : X --> fabs(X) - if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) && - match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(FalseVal))) && - (Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) { - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, &SI); - return replaceInstUsesWith(SI, Fabs); - } - // (X > +/-0.0) ? X : (0.0 - X) --> fabs(X) - if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) && - match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(TrueVal))) && - (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) { - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, &SI); - return replaceInstUsesWith(SI, Fabs); - } - // With nnan and nsz: - // (X < +/-0.0) ? -X : X --> fabs(X) - // (X <= +/-0.0) ? -X : X --> fabs(X) - if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) && - match(TrueVal, m_FNeg(m_Specific(FalseVal))) && SI.hasNoSignedZeros() && - (Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE || - Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE)) { - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, &SI); - return replaceInstUsesWith(SI, Fabs); - } - // With nnan and nsz: - // (X > +/-0.0) ? X : -X --> fabs(X) - // (X >= +/-0.0) ? X : -X --> fabs(X) - if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) && - match(FalseVal, m_FNeg(m_Specific(TrueVal))) && SI.hasNoSignedZeros() && - (Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE || - Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE)) { - Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, &SI); - return replaceInstUsesWith(SI, Fabs); - } + // Fold selecting to fabs. + if (Instruction *Fabs = foldSelectWithFCmpToFabs(SI, *this)) + return Fabs; // See if we are selecting two values based on a comparison of the two values. if (ICmpInst *ICI = dyn_cast(CondVal)) diff --git a/llvm/test/Transforms/InstCombine/fneg-fabs.ll b/llvm/test/Transforms/InstCombine/fneg-fabs.ll --- a/llvm/test/Transforms/InstCombine/fneg-fabs.ll +++ b/llvm/test/Transforms/InstCombine/fneg-fabs.ll @@ -20,9 +20,8 @@ ; One test where the neg has fmfs. define double @select_nsz_nfabs_lt_fmfProp(double %x) { ; CHECK-LABEL: @select_nsz_nfabs_lt_fmfProp( -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[NEGX:%.*]] = fneg fast double [[X]] -; CHECK-NEXT: [[SEL:%.*]] = select nsz i1 [[CMP]], double [[X]], double [[NEGX]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SEL:%.*]] = fneg nsz double [[TMP1]] ; CHECK-NEXT: ret double [[SEL]] ; %cmp = fcmp olt double %x, 0.000000e+00 @@ -34,9 +33,8 @@ ; Tests with various predicate types. define double @select_nsz_nfabs_olt(double %x) { ; CHECK-LABEL: @select_nsz_nfabs_olt( -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[NEGX:%.*]] = fneg double [[X]] -; CHECK-NEXT: [[SEL:%.*]] = select nsz i1 [[CMP]], double [[X]], double [[NEGX]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SEL:%.*]] = fneg nsz double [[TMP1]] ; CHECK-NEXT: ret double [[SEL]] ; %cmp = fcmp olt double %x, 0.000000e+00 @@ -47,9 +45,8 @@ define double @select_nsz_nfabs_ult(double %x) { ; CHECK-LABEL: @select_nsz_nfabs_ult( -; CHECK-NEXT: [[CMP:%.*]] = fcmp ult double [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[NEGX:%.*]] = fneg double [[X]] -; CHECK-NEXT: [[SEL:%.*]] = select nsz i1 [[CMP]], double [[X]], double [[NEGX]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SEL:%.*]] = fneg nsz double [[TMP1]] ; CHECK-NEXT: ret double [[SEL]] ; %cmp = fcmp ult double %x, 0.000000e+00 @@ -60,9 +57,8 @@ define double @select_nsz_nfabs_ole(double %x) { ; CHECK-LABEL: @select_nsz_nfabs_ole( -; CHECK-NEXT: [[CMP:%.*]] = fcmp ole double [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[NEGX:%.*]] = fneg double [[X]] -; CHECK-NEXT: [[SEL:%.*]] = select nsz i1 [[CMP]], double [[X]], double [[NEGX]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SEL:%.*]] = fneg nsz double [[TMP1]] ; CHECK-NEXT: ret double [[SEL]] ; %cmp = fcmp ole double %x, 0.000000e+00 @@ -73,9 +69,8 @@ define double @select_nsz_nfabs_ule(double %x) { ; CHECK-LABEL: @select_nsz_nfabs_ule( -; CHECK-NEXT: [[CMP:%.*]] = fcmp ule double [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[NEGX:%.*]] = fneg double [[X]] -; CHECK-NEXT: [[SEL:%.*]] = select nsz i1 [[CMP]], double [[X]], double [[NEGX]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SEL:%.*]] = fneg nsz double [[TMP1]] ; CHECK-NEXT: ret double [[SEL]] ; %cmp = fcmp ule double %x, 0.000000e+00 @@ -103,9 +98,8 @@ ; One test where the neg has fmfs. define double @select_nsz_nfabs_gt_fmfProp(double %x) { ; CHECK-LABEL: @select_nsz_nfabs_gt_fmfProp( -; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt double [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[NEGX:%.*]] = fneg fast double [[X]] -; CHECK-NEXT: [[SEL:%.*]] = select nsz i1 [[CMP]], double [[NEGX]], double [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SEL:%.*]] = fneg nsz double [[TMP1]] ; CHECK-NEXT: ret double [[SEL]] ; %cmp = fcmp ogt double %x, 0.000000e+00 @@ -117,9 +111,8 @@ ; Tests with various predicate types. define double @select_nsz_nfabs_ogt(double %x) { ; CHECK-LABEL: @select_nsz_nfabs_ogt( -; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt double [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[NEGX:%.*]] = fneg double [[X]] -; CHECK-NEXT: [[SEL:%.*]] = select nsz i1 [[CMP]], double [[NEGX]], double [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SEL:%.*]] = fneg nsz double [[TMP1]] ; CHECK-NEXT: ret double [[SEL]] ; %cmp = fcmp ogt double %x, 0.000000e+00 @@ -130,9 +123,8 @@ define double @select_nsz_nfabs_ugt(double %x) { ; CHECK-LABEL: @select_nsz_nfabs_ugt( -; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt double [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[NEGX:%.*]] = fneg double [[X]] -; CHECK-NEXT: [[SEL:%.*]] = select nsz i1 [[CMP]], double [[NEGX]], double [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SEL:%.*]] = fneg nsz double [[TMP1]] ; CHECK-NEXT: ret double [[SEL]] ; %cmp = fcmp ugt double %x, 0.000000e+00 @@ -143,9 +135,8 @@ define double @select_nsz_nfabs_oge(double %x) { ; CHECK-LABEL: @select_nsz_nfabs_oge( -; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[NEGX:%.*]] = fneg double [[X]] -; CHECK-NEXT: [[SEL:%.*]] = select nsz i1 [[CMP]], double [[NEGX]], double [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SEL:%.*]] = fneg nsz double [[TMP1]] ; CHECK-NEXT: ret double [[SEL]] ; %cmp = fcmp oge double %x, 0.000000e+00 @@ -156,9 +147,8 @@ define double @select_nsz_nfabs_uge(double %x) { ; CHECK-LABEL: @select_nsz_nfabs_uge( -; CHECK-NEXT: [[CMP:%.*]] = fcmp uge double [[X:%.*]], 0.000000e+00 -; CHECK-NEXT: [[NEGX:%.*]] = fneg double [[X]] -; CHECK-NEXT: [[SEL:%.*]] = select nsz i1 [[CMP]], double [[NEGX]], double [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = call nsz double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT: [[SEL:%.*]] = fneg nsz double [[TMP1]] ; CHECK-NEXT: ret double [[SEL]] ; %cmp = fcmp uge double %x, 0.000000e+00 diff --git a/llvm/test/Transforms/InstCombine/fneg.ll b/llvm/test/Transforms/InstCombine/fneg.ll --- a/llvm/test/Transforms/InstCombine/fneg.ll +++ b/llvm/test/Transforms/InstCombine/fneg.ll @@ -714,13 +714,10 @@ ret float %fneg1 } -; TODO: This should reduce to fneg-of-fabs. - define float @fnabs(float %a) { ; CHECK-LABEL: @fnabs( -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00 -; CHECK-NEXT: [[A_NEG:%.*]] = fneg fast float [[A]] -; CHECK-NEXT: [[FNEG1:%.*]] = select fast i1 [[CMP]], float [[A]], float [[A_NEG]] +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.fabs.f32(float [[A:%.*]]) +; CHECK-NEXT: [[FNEG1:%.*]] = fneg fast float [[TMP1]] ; CHECK-NEXT: ret float [[FNEG1]] ; %fneg = fneg float %a @@ -732,6 +729,19 @@ define float @fnabs_1(float %a) { ; CHECK-LABEL: @fnabs_1( +; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.fabs.f32(float [[A:%.*]]) +; CHECK-NEXT: [[FNEG1:%.*]] = fneg fast float [[TMP1]] +; CHECK-NEXT: ret float [[FNEG1]] +; + %fneg = fneg float %a + %cmp = fcmp ogt float %a, %fneg + %sel = select i1 %cmp, float %a, float %fneg + %fneg1 = fneg fast float %sel + ret float %fneg1 +} + +define float @fnabs_2(float %a) { +; CHECK-LABEL: @fnabs_2( ; CHECK-NEXT: [[TMP1:%.*]] = call nsz float @llvm.fabs.f32(float [[A:%.*]]) ; CHECK-NEXT: [[FNEG1:%.*]] = fneg float [[TMP1]] ; CHECK-NEXT: ret float [[FNEG1]]