Index: llvm/include/llvm/Analysis/ValueTracking.h =================================================================== --- llvm/include/llvm/Analysis/ValueTracking.h +++ llvm/include/llvm/Analysis/ValueTracking.h @@ -239,6 +239,10 @@ std::optional SignBit; + bool isUnknown() const { + return KnownFPClasses == fcAllFlags; + } + /// Return true if it's known this can never be a nan. bool isKnownNeverNaN() const { return (KnownFPClasses & fcNan) == fcNone; Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -4090,6 +4090,20 @@ return KnownFromAssume; } +void computeKnownFPClass(const Value *V, const APInt &DemandedElts, + FPClassTest InterestedClasses, KnownFPClass &Known, + unsigned Depth, const Query &Q, + const TargetLibraryInfo *TLI); + +static void computeKnownFPClass(const Value *V, KnownFPClass &Known, + FPClassTest InterestedClasses, unsigned Depth, + const Query &Q, const TargetLibraryInfo *TLI) { + auto *FVTy = dyn_cast(V->getType()); + APInt DemandedElts = + FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1); + computeKnownFPClass(V, DemandedElts, InterestedClasses, Known, Depth, Q, TLI); +} + // TODO: Merge implementations of CannotBeNegativeZero, // cannotBeOrderedLessThanZero into here. void computeKnownFPClass(const Value *V, const APInt &DemandedElts, @@ -4520,6 +4534,45 @@ break; } + case Instruction::InsertElement: { + KnownFPClass Known2; + if (isa(Op->getType())) { + Known.resetAll(); + return; + } + const Value *Vec = Op->getOperand(0); + const Value *Elt = Op->getOperand(1); + auto *CIdx = dyn_cast(Op->getOperand(2)); + // Early out if the index is non-constant or out-of-range. + unsigned NumElts = DemandedElts.getBitWidth(); + if (!CIdx || CIdx->getValue().uge(NumElts)) { + Known.resetAll(); + return; + } + + Known.KnownFPClasses = fcNone; + Known.SignBit = std::nullopt; + + unsigned EltIdx = CIdx->getZExtValue(); + // Do we demand the inserted element? + if (DemandedElts[EltIdx]) { + computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q, TLI); + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + } + + // We don't need the base vector element that has been inserted. + APInt DemandedVecElts = DemandedElts; + DemandedVecElts.clearBit(EltIdx); + if (!!DemandedVecElts) { + computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2, + Depth + 1, Q, TLI); + Known |= Known2; + } + + break; + } case Instruction::ExtractValue: { computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses, Known, Depth + 1, Q, TLI); Index: llvm/test/Transforms/Attributor/nofpclass.ll =================================================================== --- llvm/test/Transforms/Attributor/nofpclass.ll +++ llvm/test/Transforms/Attributor/nofpclass.ll @@ -796,3 +796,98 @@ %extract = extractvalue [4 x float] %array, 0 ret float %extract } + +define <4 x float> @insertelement_constant_chain() { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan ninf nzero sub) <4 x float> @insertelement_constant_chain +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <4 x float> poison, float 1.000000e+00, i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x float> [[INS_0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float -9.000000e+00, i32 2 +; CHECK-NEXT: [[INS_3:%.*]] = insertelement <4 x float> [[INS_2]], float 0x7FF0000000000000, i32 3 +; CHECK-NEXT: ret <4 x float> [[INS_3]] +; + %ins.0 = insertelement <4 x float> poison, float 1.0, i32 0 + %ins.1 = insertelement <4 x float> %ins.0, float 0.0, i32 1 + %ins.2 = insertelement <4 x float> %ins.1, float -9.0, i32 2 + %ins.3 = insertelement <4 x float> %ins.2, float 0x7FF0000000000000, i32 3 + ret <4 x float> %ins.3 +} + +define @insertelement_scalable_constant_chain() { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define @insertelement_scalable_constant_chain +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: [[INS_0:%.*]] = insertelement poison, float 1.000000e+00, i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement [[INS_0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement [[INS_1]], float -9.000000e+00, i32 2 +; CHECK-NEXT: [[INS_3:%.*]] = insertelement [[INS_2]], float 0x7FF0000000000000, i32 3 +; CHECK-NEXT: ret [[INS_3]] +; + %ins.0 = insertelement poison, float 1.0, i32 0 + %ins.1 = insertelement %ins.0, float 0.0, i32 1 + %ins.2 = insertelement %ins.1, float -9.0, i32 2 + %ins.3 = insertelement %ins.2, float 0x7FF0000000000000, i32 3 + ret %ins.3 +} + +define <4 x float> @insertelement_unknown_base(<4 x float> %arg0) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define <4 x float> @insertelement_unknown_base +; CHECK-SAME: (<4 x float> [[ARG0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> [[ARG0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: ret <4 x float> [[INSERT]] +; + %insert = insertelement <4 x float> %arg0, float 0.0, i32 1 + ret <4 x float> %insert +} + +define float @insertelement_extractelement_same(<4 x float> %arg0) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan inf nzero sub norm) float @insertelement_extractelement_same +; CHECK-SAME: (<4 x float> [[ARG0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> [[ARG0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[INSERT]], i32 1 +; CHECK-NEXT: ret float [[EXTRACT]] +; + %insert = insertelement <4 x float> %arg0, float 0.0, i32 1 + %extract = extractelement <4 x float> %insert, i32 1 + ret float %extract +} + +define float @insertelement_extractelement_different(<4 x float> nofpclass(zero) %arg0) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(zero) float @insertelement_extractelement_different +; CHECK-SAME: (<4 x float> nofpclass(zero) [[ARG0:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> [[ARG0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[INSERT]], i32 2 +; CHECK-NEXT: ret float [[EXTRACT]] +; + %insert = insertelement <4 x float> %arg0, float 0.0, i32 1 + %extract = extractelement <4 x float> %insert, i32 2 + ret float %extract +} + +define float @insertelement_extractelement_unknown(<4 x float> nofpclass(zero) %arg0, i32 %idx) { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nzero) float @insertelement_extractelement_unknown +; CHECK-SAME: (<4 x float> nofpclass(zero) [[ARG0:%.*]], i32 [[IDX:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> [[ARG0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[INSERT]], i32 [[IDX]] +; CHECK-NEXT: ret float [[EXTRACT]] +; + %insert = insertelement <4 x float> %arg0, float 0.0, i32 1 + %extract = extractelement <4 x float> %insert, i32 %idx + ret float %extract +} + +define <4 x float> @insertelement_index_oob_chain() { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define <4 x float> @insertelement_index_oob_chain +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> zeroinitializer, float 0x7FF0000000000000, i32 4 +; CHECK-NEXT: ret <4 x float> [[INSERT]] +; + %insert = insertelement <4 x float> zeroinitializer, float 0x7FF0000000000000, i32 4 + ret <4 x float> %insert +}