diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -737,6 +737,10 @@ /// /// If the multiplication is known not to overflow then NoSignedWrap is set. Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap); + + /// Try to match a complex intrinsic that produces the given real/imaginary + /// pair. Returns whether or not it was successful. + bool createComplexMathInstruction(Value *Real, Value *Imag); }; class Negator final { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1413,6 +1413,33 @@ eraseInstFromFunction(*PrevSI); return nullptr; } + + // Is this potentially a complex instruction? + auto OurGEP = dyn_cast(Ptr); + auto TheirGEP = dyn_cast(PrevSI->getOperand(1)); + if (PrevSI->isUnordered() && OurGEP && TheirGEP && + OurGEP->getOperand(0) == TheirGEP->getOperand(0) && + OurGEP->getNumIndices() == TheirGEP->getNumIndices() && + OurGEP->getType() == TheirGEP->getType()) { + bool AllMatch = true; + unsigned LastIndex = OurGEP->getNumIndices(); + for (unsigned Index = 1; Index < LastIndex; Index++) { + if (OurGEP->getOperand(Index) != TheirGEP->getOperand(Index)) { + AllMatch = false; + break; + } + } + if (!AllMatch) + break; + if (match(OurGEP->getOperand(LastIndex), m_ConstantInt<1>()) && + match(TheirGEP->getOperand(LastIndex), m_ConstantInt<0>())) { + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(PrevSI); + if (createComplexMathInstruction(PrevSI->getOperand(0), Val)) + return &SI; + } + } + break; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1109,6 +1109,21 @@ if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I)) return NewI; + // Check if this is potentially a complex instruction that has been manually + // expanded. + ArrayRef Fields = I.getType()->subtypes(); + if (Fields.size() == 2 && Fields[0] == Fields[1] && + Fields[0]->isFloatingPointTy()) { + Value *RealV, *ImgV; + if (match(&I, m_InsertValue<1>(m_InsertValue<0>(m_Value(), m_Value(RealV)), + m_Value(ImgV)))) { + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(cast(I.getOperand(0))); + if (createComplexMathInstruction(RealV, ImgV)) + return &I; + } + } + return nullptr; } @@ -1589,6 +1604,17 @@ if (Instruction *Ext = narrowInsElt(IE, Builder)) return Ext; + // Check for a potential computation of a complex instruction. + ElementCount Count = IE.getType()->getElementCount(); + Value *RealV, *ImagV; + if (!Count.isScalable() && Count.getFixedValue() == 2 && + match(&IE, m_InsertElt( + m_InsertElt(m_Value(), m_Value(RealV), m_ConstantInt<0>()), + m_Value(ImagV), m_ConstantInt<1>()))) { + if (createComplexMathInstruction(RealV, ImagV)) + return &IE; + } + return nullptr; } @@ -2793,3 +2819,120 @@ return MadeChange ? &SVI : nullptr; } + +static cl::opt InstCombineComplex( + "inst-combine-complex", + cl::desc("Enable pattern match to llvm.complex.* intrinsics")); + +bool InstCombinerImpl::createComplexMathInstruction(Value *Real, Value *Imag) { + if (!InstCombineComplex) + return false; + + Instruction *RealI = dyn_cast(Real); + Instruction *ImagI = dyn_cast(Imag); + if (!RealI || !ImagI) + return false; + + // Don't try to handle vector instructions for now. + if (RealI->getType()->isVectorTy()) + return false; + + Value *Op0R, *Op0I, *Op1R, *Op1I, *Scale, *Numerator; + // Compute the intersection of all the fast math flags of the entire tree up + // to the point that the input complex numbers are specified. + auto computeFMF = [&]() { + SmallVector Worklist = {RealI, ImagI}; + FastMathFlags Flags; + Flags.set(); + while (!Worklist.empty()) { + Instruction *I = Worklist.back(); + Worklist.pop_back(); + Flags &= I->getFastMathFlags(); + for (Use &U : I->operands()) { + Value *V = U.get(); + if (V == Op0R || V == Op0I || V == Op1R || V == Op1I) + continue; + Worklist.push_back(cast(V)); + } + } + return Flags; + }; + + Intrinsic::ID NewIntrinsic = Intrinsic::not_intrinsic; + // Check for complex multiply: + // real = op0.real * op1.real - op0.imag * op1.imag + // imag = op0.real * op1.imag + op1.imag * op0.real + if (match(Real, m_FSub(m_OneUse(m_FMul(m_Value(Op0R), m_Value(Op1R))), + m_OneUse(m_FMul(m_Value(Op0I), m_Value(Op1I)))))) { + if (match( + Imag, + m_c_FAdd(m_OneUse(m_c_FMul(m_Specific(Op0R), m_Specific(Op1I))), + m_OneUse(m_c_FMul(m_Specific(Op1R), m_Specific(Op0I)))))) { + NewIntrinsic = Intrinsic::experimental_complex_fmul; + } + } + // Check for complex div: + // real = (op0.real * op1.real + op0.imag * op1.imag) / scale + // imag = (op0.imag * op1.real - op0.real * op1.imag) / scale + // where scale = op1.real * op1.real + op1.imag * op1.imag + else if (match(Imag, m_FDiv(m_Value(Numerator), m_Value(Scale)))) { + if (match(Scale, + m_FAdd(m_OneUse(m_FMul(m_Value(Op1R), m_Deferred(Op1R))), + m_OneUse(m_FMul(m_Value(Op1I), m_Deferred(Op1I)))))) { + // The matching of Op1R and Op1I are temporary, we may need to reverse the + // assignments. + auto checkNumerator = [&]() { + return match(Numerator, + m_OneUse(m_FSub( + m_OneUse(m_c_FMul(m_Value(Op0I), m_Specific(Op1R))), + m_OneUse(m_c_FMul(m_Value(Op0R), m_Specific(Op1I)))))); + }; + bool ImagMatches = checkNumerator(); + if (!ImagMatches) { + std::swap(Op1R, Op1I); + ImagMatches = checkNumerator(); + } + if (ImagMatches && + match(Real, + m_FDiv(m_OneUse(m_c_FAdd(m_OneUse(m_c_FMul(m_Specific(Op0R), + m_Specific(Op1R))), + m_OneUse(m_c_FMul(m_Specific(Op0I), + m_Specific(Op1I))))), + m_Specific(Scale)))) { + NewIntrinsic = Intrinsic::experimental_complex_fdiv; + } + } + } + + // Make sure we matched an intrinsic. + if (NewIntrinsic == Intrinsic::not_intrinsic) + return false; + + // Use the computation tree to capture all of the fast-math flags. + IRBuilderBase::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(computeFMF()); + + Value *Op0 = Builder.CreateComplexValue(Op0R, Op0I); + Value *Op1 = Builder.CreateComplexValue(Op1R, Op1I); + + // Create new intrinsics. From our pattern matching of only the direct + // arithmetic formulas, we have to create them with the complex-limited-range. + Value *Result; + switch (NewIntrinsic) { + case Intrinsic::experimental_complex_fmul: + Result = Builder.CreateComplexMul(Op0, Op1, true); + break; + case Intrinsic::experimental_complex_fdiv: + Result = Builder.CreateComplexDiv(Op0, Op1, true); + break; + default: + llvm_unreachable("Unexpected complex intrinsic"); + } + + replaceInstUsesWith(*RealI, + Builder.CreateExtractElement(Result, uint64_t(0))); + replaceInstUsesWith(*ImagI, + Builder.CreateExtractElement(Result, uint64_t(1))); + + return true; +} diff --git a/llvm/test/Transforms/InstCombine/complex-math.ll b/llvm/test/Transforms/InstCombine/complex-math.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/complex-math.ll @@ -0,0 +1,279 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals +; RUN: opt < %s -instcombine -S -inst-combine-complex | FileCheck %s + +; Check that we match the simple expansions of complex multiplication and +; division, whether the target complex value is made by returning a struct, +; vector, or by storing into memory. + +%complex.double = type {double, double} + +define %complex.double @struct_mul(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: @struct_mul( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B:%.*]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[D:%.*]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.experimental.complex.fmul.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 1 +; CHECK-NEXT: [[RES:%.*]] = insertvalue [[COMPLEX_DOUBLE:%.*]] zeroinitializer, double [[TMP6]], 0 +; CHECK-NEXT: [[RES_1:%.*]] = insertvalue [[COMPLEX_DOUBLE]] [[RES]], double [[TMP7]], 1 +; CHECK-NEXT: ret [[COMPLEX_DOUBLE]] [[RES_1]] +; + %ac = fmul double %a, %c + %bd = fmul double %b, %d + %ad = fmul double %a, %d + %bc = fmul double %b, %c + %x = fsub double %ac, %bd + %y = fadd double %ad, %bc + %res = insertvalue %complex.double zeroinitializer, double %x, 0 + %res.1 = insertvalue %complex.double %res, double %y, 1 + ret %complex.double %res.1 +} + +define <2 x double> @vector_mul(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: @vector_mul( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B:%.*]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[D:%.*]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.experimental.complex.fmul.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) #[[ATTR1]] +; CHECK-NEXT: ret <2 x double> [[TMP5]] +; + %ac = fmul double %a, %c + %bd = fmul double %b, %d + %ad = fmul double %a, %d + %bc = fmul double %b, %c + %x = fsub double %ac, %bd + %y = fadd double %ad, %bc + %res = insertelement <2 x double> zeroinitializer, double %x, i32 0 + %res.1 = insertelement <2 x double> %res, double %y, i32 1 + ret <2 x double> %res.1 +} + +define void @memory_mul(double %a, double %b, double %c, double %d, %complex.double* %dest) { +; CHECK-LABEL: @memory_mul( +; CHECK-NEXT: [[DEST_REAL:%.*]] = getelementptr [[COMPLEX_DOUBLE:%.*]], %complex.double* [[DEST:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[DEST_IMAG:%.*]] = getelementptr [[COMPLEX_DOUBLE]], %complex.double* [[DEST]], i64 0, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B:%.*]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[D:%.*]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.experimental.complex.fmul.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) #[[ATTR1]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 1 +; CHECK-NEXT: store double [[TMP6]], double* [[DEST_REAL]], align 8 +; CHECK-NEXT: store double [[TMP7]], double* [[DEST_IMAG]], align 8 +; CHECK-NEXT: ret void +; + %ac = fmul double %a, %c + %bd = fmul double %b, %d + %ad = fmul double %a, %d + %bc = fmul double %b, %c + %x = fsub double %ac, %bd + %y = fadd double %ad, %bc + %dest.real = getelementptr %complex.double, %complex.double* %dest, i64 0, i32 0 + %dest.imag = getelementptr %complex.double, %complex.double* %dest, i64 0, i32 1 + store double %x, double* %dest.real + store double %y, double* %dest.imag + ret void +} + +define %complex.double @fast_mul(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: @fast_mul( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B:%.*]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[D:%.*]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = call fast <2 x double> @llvm.experimental.complex.fmul.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) #[[ATTR1]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 1 +; CHECK-NEXT: [[RES:%.*]] = insertvalue [[COMPLEX_DOUBLE:%.*]] zeroinitializer, double [[TMP6]], 0 +; CHECK-NEXT: [[RES_1:%.*]] = insertvalue [[COMPLEX_DOUBLE]] [[RES]], double [[TMP7]], 1 +; CHECK-NEXT: ret [[COMPLEX_DOUBLE]] [[RES_1]] +; + %ac = fmul fast double %a, %c + %bd = fmul fast double %b, %d + %ad = fmul fast double %a, %d + %bc = fmul fast double %b, %c + %x = fsub fast double %ac, %bd + %y = fadd fast double %ad, %bc + %res = insertvalue %complex.double zeroinitializer, double %x, 0 + %res.1 = insertvalue %complex.double %res, double %y, 1 + ret %complex.double %res.1 +} + +define %complex.double @fastish_mul(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: @fastish_mul( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B:%.*]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[D:%.*]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = call ninf <2 x double> @llvm.experimental.complex.fmul.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) #[[ATTR1]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 1 +; CHECK-NEXT: [[RES:%.*]] = insertvalue [[COMPLEX_DOUBLE:%.*]] zeroinitializer, double [[TMP6]], 0 +; CHECK-NEXT: [[RES_1:%.*]] = insertvalue [[COMPLEX_DOUBLE]] [[RES]], double [[TMP7]], 1 +; CHECK-NEXT: ret [[COMPLEX_DOUBLE]] [[RES_1]] +; + %ac = fmul fast double %a, %c + %bd = fmul nnan ninf nsz double %b, %d + %ad = fmul ninf arcp contract double %a, %d + %bc = fmul reassoc nsz ninf double %b, %c + %x = fsub ninf arcp afn double %ac, %bd + %y = fadd afn nnan ninf double %ad, %bc + %res = insertvalue %complex.double zeroinitializer, double %x, 0 + %res.1 = insertvalue %complex.double %res, double %y, 1 + ret %complex.double %res.1 +} + +define %complex.double @struct_div(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: @struct_div( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B:%.*]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[D:%.*]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.experimental.complex.fdiv.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 1 +; CHECK-NEXT: [[RES:%.*]] = insertvalue [[COMPLEX_DOUBLE:%.*]] zeroinitializer, double [[TMP6]], 0 +; CHECK-NEXT: [[RES_1:%.*]] = insertvalue [[COMPLEX_DOUBLE]] [[RES]], double [[TMP7]], 1 +; CHECK-NEXT: ret [[COMPLEX_DOUBLE]] [[RES_1]] +; + %ac = fmul double %a, %c + %bd = fmul double %b, %d + %ad = fmul double %a, %d + %bc = fmul double %b, %c + %cc = fmul double %c, %c + %dd = fmul double %d, %d + %scale = fadd double %cc, %dd + %x_noscale = fadd double %ac, %bd + %y_noscale = fsub double %bc, %ad + %x = fdiv double %x_noscale, %scale + %y = fdiv double %y_noscale, %scale + %res = insertvalue %complex.double zeroinitializer, double %x, 0 + %res.1 = insertvalue %complex.double %res, double %y, 1 + ret %complex.double %res.1 +} + +define <2 x double> @vector_div(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: @vector_div( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B:%.*]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[D:%.*]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.experimental.complex.fdiv.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) #[[ATTR2]] +; CHECK-NEXT: ret <2 x double> [[TMP5]] +; + %ac = fmul double %a, %c + %bd = fmul double %b, %d + %ad = fmul double %a, %d + %bc = fmul double %b, %c + %cc = fmul double %c, %c + %dd = fmul double %d, %d + %scale = fadd double %cc, %dd + %x_noscale = fadd double %ac, %bd + %y_noscale = fsub double %bc, %ad + %x = fdiv double %x_noscale, %scale + %y = fdiv double %y_noscale, %scale + %res = insertelement <2 x double> zeroinitializer, double %x, i32 0 + %res.1 = insertelement <2 x double> %res, double %y, i32 1 + ret <2 x double> %res.1 +} + +define void @memory_div(double %a, double %b, double %c, double %d, %complex.double* %dest) { +; CHECK-LABEL: @memory_div( +; CHECK-NEXT: [[DEST_REAL:%.*]] = getelementptr [[COMPLEX_DOUBLE:%.*]], %complex.double* [[DEST:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[DEST_IMAG:%.*]] = getelementptr [[COMPLEX_DOUBLE]], %complex.double* [[DEST]], i64 0, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B:%.*]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[D:%.*]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.experimental.complex.fdiv.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 1 +; CHECK-NEXT: store double [[TMP6]], double* [[DEST_REAL]], align 8 +; CHECK-NEXT: store double [[TMP7]], double* [[DEST_IMAG]], align 8 +; CHECK-NEXT: ret void +; + %ac = fmul double %a, %c + %bd = fmul double %b, %d + %ad = fmul double %a, %d + %bc = fmul double %b, %c + %cc = fmul double %c, %c + %dd = fmul double %d, %d + %scale = fadd double %cc, %dd + %x_noscale = fadd double %ac, %bd + %y_noscale = fsub double %bc, %ad + %x = fdiv double %x_noscale, %scale + %y = fdiv double %y_noscale, %scale + %dest.real = getelementptr %complex.double, %complex.double* %dest, i64 0, i32 0 + %dest.imag = getelementptr %complex.double, %complex.double* %dest, i64 0, i32 1 + store double %x, double* %dest.real + store double %y, double* %dest.imag + ret void +} + +define %complex.double @fast_div(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: @fast_div( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B:%.*]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[D:%.*]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = call fast <2 x double> @llvm.experimental.complex.fdiv.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 1 +; CHECK-NEXT: [[RES:%.*]] = insertvalue [[COMPLEX_DOUBLE:%.*]] zeroinitializer, double [[TMP6]], 0 +; CHECK-NEXT: [[RES_1:%.*]] = insertvalue [[COMPLEX_DOUBLE]] [[RES]], double [[TMP7]], 1 +; CHECK-NEXT: ret [[COMPLEX_DOUBLE]] [[RES_1]] +; + %ac = fmul fast double %a, %c + %bd = fmul fast double %b, %d + %ad = fmul fast double %a, %d + %bc = fmul fast double %b, %c + %cc = fmul fast double %c, %c + %dd = fmul fast double %d, %d + %scale = fadd fast double %cc, %dd + %x_noscale = fadd fast double %ac, %bd + %y_noscale = fsub fast double %bc, %ad + %x = fdiv fast double %x_noscale, %scale + %y = fdiv fast double %y_noscale, %scale + %res = insertvalue %complex.double zeroinitializer, double %x, 0 + %res.1 = insertvalue %complex.double %res, double %y, 1 + ret %complex.double %res.1 +} + +define %complex.double @fastish_div(double %a, double %b, double %c, double %d) { +; CHECK-LABEL: @fastish_div( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[B:%.*]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[D:%.*]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = call arcp <2 x double> @llvm.experimental.complex.fdiv.v2f64(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 1 +; CHECK-NEXT: [[RES:%.*]] = insertvalue [[COMPLEX_DOUBLE:%.*]] zeroinitializer, double [[TMP6]], 0 +; CHECK-NEXT: [[RES_1:%.*]] = insertvalue [[COMPLEX_DOUBLE]] [[RES]], double [[TMP7]], 1 +; CHECK-NEXT: ret [[COMPLEX_DOUBLE]] [[RES_1]] +; + %ac = fmul arcp contract double %a, %c + %bd = fmul arcp afn ninf reassoc double %b, %d + %ad = fmul arcp afn ninf double %a, %d + %bc = fmul arcp nsz reassoc double %b, %c + %cc = fmul arcp nsz afn double %c, %c + %dd = fmul arcp nsz double %d, %d + %scale = fadd arcp nsz contract nnan reassoc double %cc, %dd + %x_noscale = fadd arcp nsz contract ninf nnan double %ac, %bd + %y_noscale = fsub arcp nsz contract reassoc double %bc, %ad + %x = fdiv arcp ninf nnan reassoc double %x_noscale, %scale + %y = fdiv arcp nnan double %y_noscale, %scale + %res = insertvalue %complex.double zeroinitializer, double %x, 0 + %res.1 = insertvalue %complex.double %res, double %y, 1 + ret %complex.double %res.1 +} + +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nofree nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR1]] = { "complex-limited-range" } +; CHECK: attributes #[[ATTR2]] = { "complex-limited-range" "complex-no-scale" } +;.