Index: llvm/include/llvm/Analysis/ValueTracking.h =================================================================== --- llvm/include/llvm/Analysis/ValueTracking.h +++ llvm/include/llvm/Analysis/ValueTracking.h @@ -243,6 +243,10 @@ return (KnownFPClasses & Mask) == fcNone; } + bool isUnknown() const { + return KnownFPClasses == fcAllFlags && !SignBit; + } + /// Return true if it's known this can never be a nan. bool isKnownNeverNaN() const { return isKnownNever(fcNan); Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -4105,12 +4105,28 @@ return KnownFromAssume; } +void computeKnownFPClass(const Value *V, const APInt &DemandedElts, + FPClassTest InterestedClasses, KnownFPClass &Known, + unsigned Depth, const Query &Q, + const TargetLibraryInfo *TLI); + +static void computeKnownFPClass(const Value *V, KnownFPClass &Known, + FPClassTest InterestedClasses, unsigned Depth, + const Query &Q, const TargetLibraryInfo *TLI) { + auto *FVTy = dyn_cast(V->getType()); + APInt DemandedElts = + FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); + computeKnownFPClass(V, DemandedElts, InterestedClasses, Known, Depth, Q, TLI); +} + // TODO: Merge implementations of CannotBeNegativeZero, // cannotBeOrderedLessThanZero into here. void computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, KnownFPClass &Known, unsigned Depth, const Query &Q, const TargetLibraryInfo *TLI) { + assert(Known.isUnknown() && "should not be called with known information"); + if (!DemandedElts) { // No demanded elts, better to assume we don't know anything. Known.resetAll(); @@ -4545,6 +4561,41 @@ break; } + case Instruction::InsertElement: { + if (isa(Op->getType())) + return; + + const Value *Vec = Op->getOperand(0); + const Value *Elt = Op->getOperand(1); + auto *CIdx = dyn_cast(Op->getOperand(2)); + // Early out if the index is non-constant or out-of-range. + unsigned NumElts = DemandedElts.getBitWidth(); + if (!CIdx || CIdx->getValue().uge(NumElts)) + return; + + unsigned EltIdx = CIdx->getZExtValue(); + // Do we demand the inserted element? + if (DemandedElts[EltIdx]) { + computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q, TLI); + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + } else { + Known.KnownFPClasses = fcNone; + } + + // We don't need the base vector element that has been inserted. + APInt DemandedVecElts = DemandedElts; + DemandedVecElts.clearBit(EltIdx); + if (!!DemandedVecElts) { + KnownFPClass Known2; + computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2, + Depth + 1, Q, TLI); + Known |= Known2; + } + + break; + } case Instruction::ExtractValue: case Instruction::Freeze: { computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, Index: llvm/test/Transforms/Attributor/nofpclass.ll =================================================================== --- llvm/test/Transforms/Attributor/nofpclass.ll +++ llvm/test/Transforms/Attributor/nofpclass.ll @@ -908,3 +908,117 @@ %elt = extractelement <2 x float> %freeze, i32 0 ret float %elt } + +define <4 x float> @insertelement_constant_chain() { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan ninf nzero sub) <4 x float> @insertelement_constant_chain +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <4 x float> poison, float 1.000000e+00, i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x float> [[INS_0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float -9.000000e+00, i32 2 +; CHECK-NEXT: [[INS_3:%.*]] = insertelement <4 x float> [[INS_2]], float 0x7FF0000000000000, i32 3 +; CHECK-NEXT: ret <4 x float> [[INS_3]] +; + %ins.0 = insertelement <4 x float> poison, float 1.0, i32 0 + %ins.1 = insertelement <4 x float> %ins.0, float 0.0, i32 1 + %ins.2 = insertelement <4 x float> %ins.1, float -9.0, i32 2 + %ins.3 = insertelement <4 x float> %ins.2, float 0x7FF0000000000000, i32 3 + ret <4 x float> %ins.3 +} + +define @insertelement_scalable_constant_chain() { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define @insertelement_scalable_constant_chain +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: [[INS_0:%.*]] = insertelement poison, float 1.000000e+00, i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement [[INS_0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement [[INS_1]], float -9.000000e+00, i32 2 +; CHECK-NEXT: [[INS_3:%.*]] = insertelement [[INS_2]], float 0x7FF0000000000000, i32 3 +; CHECK-NEXT: ret [[INS_3]] +; + %ins.0 = insertelement poison, float 1.0, i32 0 + %ins.1 = insertelement %ins.0, float 0.0, i32 1 + %ins.2 = insertelement %ins.1, float -9.0, i32 2 + %ins.3 = insertelement %ins.2, float 0x7FF0000000000000, i32 3 + ret %ins.3 +} + +define <4 x float> @insertelement_unknown_base(<4 x float> %arg0) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define <4 x float> @insertelement_unknown_base +; CHECK-SAME: (<4 x float> [[ARG0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> [[ARG0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: ret <4 x float> [[INSERT]] +; + %insert = insertelement <4 x float> %arg0, float 0.0, i32 1 + ret <4 x float> %insert +} + +define float @insertelement_extractelement_same(<4 x float> %arg0) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan inf nzero sub norm) float @insertelement_extractelement_same +; CHECK-SAME: (<4 x float> [[ARG0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> [[ARG0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[INSERT]], i32 1 +; CHECK-NEXT: ret float [[EXTRACT]] +; + %insert = insertelement <4 x float> %arg0, float 0.0, i32 1 + %extract = extractelement <4 x float> %insert, i32 1 + ret float %extract +} + +define float @insertelement_extractelement_different(<4 x float> nofpclass(zero) %arg0) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(zero) float @insertelement_extractelement_different +; CHECK-SAME: (<4 x float> nofpclass(zero) [[ARG0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> [[ARG0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[INSERT]], i32 2 +; CHECK-NEXT: ret float [[EXTRACT]] +; + %insert = insertelement <4 x float> %arg0, float 0.0, i32 1 + %extract = extractelement <4 x float> %insert, i32 2 + ret float %extract +} + +define float @insertelement_extractelement_unknown(<4 x float> nofpclass(zero) %arg0, i32 %idx) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nzero) float @insertelement_extractelement_unknown +; CHECK-SAME: (<4 x float> nofpclass(zero) [[ARG0:%.*]], i32 [[IDX:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> [[ARG0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[INSERT]], i32 [[IDX]] +; CHECK-NEXT: ret float [[EXTRACT]] +; + %insert = insertelement <4 x float> %arg0, float 0.0, i32 1 + %extract = extractelement <4 x float> %insert, i32 %idx + ret float %extract +} + +define <4 x float> @insertelement_index_oob_chain() { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define <4 x float> @insertelement_index_oob_chain +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> zeroinitializer, float 0x7FF0000000000000, i32 4 +; CHECK-NEXT: ret <4 x float> [[INSERT]] +; + %insert = insertelement <4 x float> zeroinitializer, float 0x7FF0000000000000, i32 4 + ret <4 x float> %insert +} + +define <2 x float> @multiple_extractelement(<4 x float> nofpclass(zero) %arg0) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(zero) <2 x float> @multiple_extractelement +; CHECK-SAME: (<4 x float> nofpclass(zero) [[ARG0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> [[ARG0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractelement <4 x float> [[INSERT]], i32 2 +; CHECK-NEXT: [[EXTRACT3:%.*]] = extractelement <4 x float> [[INSERT]], i32 3 +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <2 x float> poison, float [[EXTRACT3]], i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <2 x float> [[INS_0]], float [[EXTRACT2]], i32 1 +; CHECK-NEXT: ret <2 x float> [[INS_1]] +; + %insert = insertelement <4 x float> %arg0, float 0.0, i32 1 + %extract2 = extractelement <4 x float> %insert, i32 2 + %extract3 = extractelement <4 x float> %insert, i32 3 + %ins.0 = insertelement <2 x float> poison, float %extract3, i32 0 + %ins.1 = insertelement <2 x float> %ins.0, float %extract2, i32 1 + ret <2 x float> %ins.1 +}