Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1555,6 +1555,36 @@ return replaceInstUsesWith(*II, V); break; } + case Intrinsic::fma: + case Intrinsic::fmuladd: { + Value *LHS = nullptr; + Value *RHS = nullptr; + + Value *Src0 = II->getArgOperand(0); + Value *Src1 = II->getArgOperand(1); + + // fma fneg(x), fneg(y), z -> fma x, y, z + if (match(Src0, m_FNeg(m_Value(LHS))) && match(Src1, m_FNeg(m_Value(RHS)))) { + CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(), + { LHS, RHS, II->getArgOperand(2) }); + NewCall->takeName(II); + NewCall->copyFastMathFlags(II); + return replaceInstUsesWith(*II, NewCall); + } + + // fma fabs(x), fabs(x), z -> fma x, x, z + if (match(Src0, m_Intrinsic(m_Value(LHS))) && + match(Src1, m_Intrinsic(m_Value(RHS))) && + LHS == RHS) { + CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(), + { LHS, LHS, II->getArgOperand(2) }); + NewCall->takeName(II); + NewCall->copyFastMathFlags(II); + return replaceInstUsesWith(*II, NewCall); + } + + break; + } case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: // Turn PPC lvx -> load if the pointer is known aligned. Index: test/Transforms/InstCombine/fma.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/fma.ll @@ -0,0 +1,132 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +declare float @llvm.fma.f32(float, float, float) #1 +declare float @llvm.fmuladd.f32(float, float, float) #1 +declare float @llvm.fabs.f32(float) #1 + +@external = external global i32 + +; CHECK-LABEL: @fma_fneg_x_fneg_y( +; CHECK: %fma = call float @llvm.fma.f32(float %x, float %y, float %z) +define float @fma_fneg_x_fneg_y(float %x, float %y, float %z) { + %x.fneg = fsub float -0.0, %x + %y.fneg = fsub float -0.0, %y + %fma = call float @llvm.fma.f32(float %x.fneg, float %y.fneg, float %z) + ret float %fma +} + +; CHECK-LABEL: @fma_fneg_x_fneg_y_fast( +; CHECK: %fma = call fast float @llvm.fma.f32(float %x, float %y, float %z) +define float @fma_fneg_x_fneg_y_fast(float %x, float %y, float %z) { + %x.fneg = fsub float -0.0, %x + %y.fneg = fsub float -0.0, %y + %fma = call fast float @llvm.fma.f32(float %x.fneg, float %y.fneg, float %z) + ret float %fma +} + +; CHECK-LABEL: @fma_fneg_const_fneg_y( +; CHECK: %fma = call float @llvm.fma.f32(float bitcast (i32 ptrtoint (i32* @external to i32) to float), float %y, float %z) +define float @fma_fneg_const_fneg_y(float %y, float %z) { + %y.fneg = fsub float -0.0, %y + %fma = call float @llvm.fma.f32(float fsub (float -0.0, float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %y.fneg, float %z) + ret float %fma +} + +; CHECK-LABEL: @fma_fneg_x_fneg_const( +; CHECK: %fma = call float @llvm.fma.f32(float %x, float bitcast (i32 ptrtoint (i32* @external to i32) to float), float %z) +define float @fma_fneg_x_fneg_const(float %x, float %z) { + %x.fneg = fsub float -0.0, %x + %fma = call float @llvm.fma.f32(float %x.fneg, float fsub (float -0.0, float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %z) + ret float %fma +} + +; CHECK-LABEL: @fma_fabs_x_fabs_y( +; CHECK: %x.fabs = call float @llvm.fabs.f32(float %x) +; CHECK: %y.fabs = call float @llvm.fabs.f32(float %y) +; CHECK: %fma = call float @llvm.fma.f32(float %x.fabs, float %y.fabs, float %z) +define float @fma_fabs_x_fabs_y(float %x, float %y, float %z) { + %x.fabs = call float @llvm.fabs.f32(float %x) + %y.fabs = call float @llvm.fabs.f32(float %y) + %fma = call float @llvm.fma.f32(float %x.fabs, float %y.fabs, float %z) + ret float %fma +} + +; CHECK-LABEL: @fma_fabs_x_fabs_x( +; CHECK: %fma = call float @llvm.fma.f32(float %x, float %x, float %z) +define float @fma_fabs_x_fabs_x(float %x, float %z) { + %x.fabs = call float @llvm.fabs.f32(float %x) + %fma = call float @llvm.fma.f32(float %x.fabs, float %x.fabs, float %z) + ret float %fma +} + +; CHECK-LABEL: @fma_fabs_x_fabs_x_fast( +; CHECK: %fma = call fast float @llvm.fma.f32(float %x, float %x, float %z) +define float @fma_fabs_x_fabs_x_fast(float %x, float %z) { + %x.fabs = call float @llvm.fabs.f32(float %x) + %fma = call fast float @llvm.fma.f32(float %x.fabs, float %x.fabs, float %z) + ret float %fma +} + +; CHECK-LABEL: @fmuladd_fneg_x_fneg_y( +; CHECK: %fmuladd = call float @llvm.fmuladd.f32(float %x, float %y, float %z) +define float @fmuladd_fneg_x_fneg_y(float %x, float %y, float %z) { + %x.fneg = fsub float -0.0, %x + %y.fneg = fsub float -0.0, %y + %fmuladd = call float @llvm.fmuladd.f32(float %x.fneg, float %y.fneg, float %z) + ret float %fmuladd +} + +; CHECK-LABEL: @fmuladd_fneg_x_fneg_y_fast( +; CHECK: %fmuladd = call fast float @llvm.fmuladd.f32(float %x, float %y, float %z) +define float @fmuladd_fneg_x_fneg_y_fast(float %x, float %y, float %z) { + %x.fneg = fsub float -0.0, %x + %y.fneg = fsub float -0.0, %y + %fmuladd = call fast float @llvm.fmuladd.f32(float %x.fneg, float %y.fneg, float %z) + ret float %fmuladd +} + +; CHECK-LABEL: @fmuladd_fneg_const_fneg_y( +; CHECK: %fmuladd = call float @llvm.fmuladd.f32(float bitcast (i32 ptrtoint (i32* @external to i32) to float), float %y, float %z) +define float @fmuladd_fneg_const_fneg_y(float %y, float %z) { + %y.fneg = fsub float -0.0, %y + %fmuladd = call float @llvm.fmuladd.f32(float fsub (float -0.0, float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %y.fneg, float %z) + ret float %fmuladd +} + +; CHECK-LABEL: @fmuladd_fneg_x_fneg_const( +; CHECK: %fmuladd = call float @llvm.fmuladd.f32(float %x, float bitcast (i32 ptrtoint (i32* @external to i32) to float), float %z) +define float @fmuladd_fneg_x_fneg_const(float %x, float %z) { + %x.fneg = fsub float -0.0, %x + %fmuladd = call float @llvm.fmuladd.f32(float %x.fneg, float fsub (float -0.0, float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %z) + ret float %fmuladd +} + +; CHECK-LABEL: @fmuladd_fabs_x_fabs_y( +; CHECK: %x.fabs = call float @llvm.fabs.f32(float %x) +; CHECK: %y.fabs = call float @llvm.fabs.f32(float %y) +; CHECK: %fmuladd = call float @llvm.fmuladd.f32(float %x.fabs, float %y.fabs, float %z) +define float @fmuladd_fabs_x_fabs_y(float %x, float %y, float %z) { + %x.fabs = call float @llvm.fabs.f32(float %x) + %y.fabs = call float @llvm.fabs.f32(float %y) + %fmuladd = call float @llvm.fmuladd.f32(float %x.fabs, float %y.fabs, float %z) + ret float %fmuladd +} + +; CHECK-LABEL: @fmuladd_fabs_x_fabs_x( +; CHECK: %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float %z) +define float @fmuladd_fabs_x_fabs_x(float %x, float %z) { + %x.fabs = call float @llvm.fabs.f32(float %x) + %fmuladd = call float @llvm.fmuladd.f32(float %x.fabs, float %x.fabs, float %z) + ret float %fmuladd +} + +; CHECK-LABEL: @fmuladd_fabs_x_fabs_x_fast( +; CHECK: %fmuladd = call fast float @llvm.fmuladd.f32(float %x, float %x, float %z) +define float @fmuladd_fabs_x_fabs_x_fast(float %x, float %z) { + %x.fabs = call float @llvm.fabs.f32(float %x) + %fmuladd = call fast float @llvm.fmuladd.f32(float %x.fabs, float %x.fabs, float %z) + ret float %fmuladd +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone }