diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -1420,10 +1420,11 @@ } Value *CreateFAdd(Value *L, Value *R, const Twine &Name = "", - MDNode *FPMD = nullptr) { + MDNode *FPMD = nullptr, Value *Flag = nullptr) { if (IsFPConstrained) return CreateConstrainedFPBinOp(Intrinsic::experimental_constrained_fadd, - L, R, nullptr, Name, FPMD); + L, R, nullptr, Name, FPMD, llvm::None, + llvm::None, Flag); if (Value *V = foldConstant(Instruction::FAdd, L, R, Name)) return V; Instruction *I = setFPAttrs(BinaryOperator::CreateFAdd(L, R), FPMD, FMF); @@ -1579,7 +1580,7 @@ Intrinsic::ID ID, Value *L, Value *R, Instruction *FMFSource = nullptr, const Twine &Name = "", MDNode *FPMathTag = nullptr, Optional Rounding = None, - Optional Except = None); + Optional Except = None, Value *Flag = nullptr); Value *CreateNeg(Value *V, const Twine &Name = "", bool HasNUW = false, bool HasNSW = false) { diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1709,7 +1709,7 @@ def int_vector_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [LLVMVectorElementType<0>, - llvm_anyvector_ty]>; + llvm_anyvector_ty, llvm_i32_ty]>; def int_vector_reduce_fmul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [LLVMVectorElementType<0>, llvm_anyvector_ty]>; diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp --- a/llvm/lib/CodeGen/ExpandReductions.cpp +++ b/llvm/lib/CodeGen/ExpandReductions.cpp @@ -116,7 +116,26 @@ Builder.setFastMathFlags(FMF); switch (ID) { default: llvm_unreachable("Unexpected intrinsic!"); - case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fadd: { + // FMFs must be attached to the call, otherwise it's an ordered reduction + // and it can't be handled by generating a shuffle sequence. + Value *Acc = II->getArgOperand(0); + Value *Vec = II->getArgOperand(1); + Value *Flag = II->getArgOperand(2); + + if (!FMF.allowReassoc() && !Flag) + Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), RK); + else { + if (!isPowerOf2_32( + cast(Vec->getType())->getNumElements())) + continue; + + Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); + Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID), Acc, + Rdx, "bin.rdx"); + } + break; + } case Intrinsic::vector_reduce_fmul: { // FMFs must be attached to the call, otherwise it's an ordered reduction // and it can't be handled by generating a shuffle sequence. diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -2191,9 +2191,11 @@ Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), ConstantInt::get(I32Ty, 0)); Value *EltOp; - if (Name.contains(".add.")) - EltOp = Builder.CreateFAdd(Elt0, Elt1); - else if (Name.contains(".sub.")) + if (Name.contains(".add.")) { + Value *Elt2 = Builder.CreateExtractElement(CI->getArgOperand(2), + ConstantInt::get(I32Ty, 0)); + EltOp = Builder.CreateFAdd(Elt0, Elt1, "", nullptr, Elt2); + } else if (Name.contains(".sub.")) EltOp = Builder.CreateFSub(Elt0, Elt1); else if (Name.contains(".mul.")) EltOp = Builder.CreateFMul(Elt0, Elt1); @@ -3036,7 +3038,8 @@ { CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4) }); } else { - Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1)); + Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1), "", + nullptr, CI->getArgOperand(2)); } Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -860,9 +860,8 @@ CallInst *IRBuilderBase::CreateConstrainedFPBinOp( Intrinsic::ID ID, Value *L, Value *R, Instruction *FMFSource, - const Twine &Name, MDNode *FPMathTag, - Optional Rounding, - Optional Except) { + const Twine &Name, MDNode *FPMathTag, Optional Rounding, + Optional Except, Value *Flag) { Value *RoundingV = getConstrainedFPRounding(Rounding); Value *ExceptV = getConstrainedFPExcept(Except); @@ -870,8 +869,13 @@ if (FMFSource) UseFMF = FMFSource->getFastMathFlags(); - CallInst *C = CreateIntrinsic(ID, {L->getType()}, - {L, R, RoundingV, ExceptV}, nullptr, Name); + auto Args = {L, R, RoundingV, ExceptV}; + + if (Flag) + Args = {L, R, Flag, RoundingV, ExceptV}; + + CallInst *C = + CreateIntrinsic(ID, {L->getType()}, std::move(Args), nullptr, Name); setConstrainedFPCallAttr(C); setFPAttrs(C, FPMathTag, UseFMF); return C; diff --git a/llvm/test/Verifier/reduction-intrinsics.ll b/llvm/test/Verifier/reduction-intrinsics.ll --- a/llvm/test/Verifier/reduction-intrinsics.ll +++ b/llvm/test/Verifier/reduction-intrinsics.ll @@ -18,7 +18,7 @@ define float @fadd_match_arg_types(<4 x float> %x) { ; CHECK: Intrinsic has incorrect argument type! - %r = call float @llvm.vector.reduce.fadd.v4f32(double 0.0, <4 x float> %x) + %r = call float @llvm.vector.reduce.fadd.v4f32(double 0.0, <4 x float> %x, i32 0) ret float %r } @@ -47,7 +47,7 @@ define i32 @not_integer_reduce(<4 x i32> %x) { ; CHECK: Intrinsic has incorrect argument type! - %r = call i32 @llvm.vector.reduce.fadd.v4i32(i32 0, <4 x i32> %x) + %r = call i32 @llvm.vector.reduce.fadd.v4i32(i32 0, <4 x i32> %x, i32 0) ret i32 %r } @@ -59,8 +59,8 @@ declare float @llvm.vector.reduce.umin.v4f32(<4 x float>) declare i32* @llvm.vector.reduce.or.v4p0i32(<4 x i32*>) -declare i32 @llvm.vector.reduce.fadd.v4i32(i32, <4 x i32>) -declare float @llvm.vector.reduce.fadd.v4f32(double, <4 x float>) +declare i32 @llvm.vector.reduce.fadd.v4i32(i32, <4 x i32>, i32) +declare float @llvm.vector.reduce.fadd.v4f32(double, <4 x float>, i32) declare i32* @llvm.vector.reduce.fmin.v4p0i32(<4 x i32*>) declare float @llvm.vector.reduce.fmax.f32(float) declare i32 @llvm.vector.reduce.smax.i32(i32)