Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -105,9 +105,11 @@ bool AllowMinSizeF32 = false, bool AllowF64 = false, bool AllowStrictFP = false); - void replaceLibCallWithSimpleIntrinsic(CallInst *CI, Intrinsic::ID IntrID); + void replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI, + Intrinsic::ID IntrID); - bool tryReplaceLibcallWithSimpleIntrinsic(CallInst *CI, Intrinsic::ID IntrID, + bool tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI, + Intrinsic::ID IntrID, bool AllowMinSizeF32 = false, bool AllowF64 = false, bool AllowStrictFP = false); @@ -592,64 +594,72 @@ case AMDGPULibFunc::EI_EXP: if (FMF.none()) return false; - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::exp, + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp, FMF.approxFunc()); case AMDGPULibFunc::EI_EXP2: if (FMF.none()) return false; - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::exp2, + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp2, FMF.approxFunc()); case AMDGPULibFunc::EI_LOG: if (FMF.none()) return false; - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::log, + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log, FMF.approxFunc()); case AMDGPULibFunc::EI_LOG2: if (FMF.none()) return false; - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::log2, + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log2, FMF.approxFunc()); case AMDGPULibFunc::EI_LOG10: if (FMF.none()) return false; - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::log10, + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log10, FMF.approxFunc()); case AMDGPULibFunc::EI_FMIN: - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::minnum, true, - true); + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::minnum, + true, true); case AMDGPULibFunc::EI_FMAX: - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::maxnum, true, - true); + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::maxnum, + true, true); case AMDGPULibFunc::EI_FMA: - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::fma, true, + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fma, true, true); case AMDGPULibFunc::EI_MAD: - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::fmuladd, true, - true); + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fmuladd, + true, true); case AMDGPULibFunc::EI_FABS: - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::fabs, true, + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fabs, true, true, true); case AMDGPULibFunc::EI_COPYSIGN: - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::copysign, true, - true, true); + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::copysign, + true, true, true); case AMDGPULibFunc::EI_FLOOR: - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::floor, true, + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::floor, true, true); case AMDGPULibFunc::EI_CEIL: - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::ceil, true, + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::ceil, true, true); case AMDGPULibFunc::EI_TRUNC: - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::trunc, true, + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::trunc, true, true); case AMDGPULibFunc::EI_RINT: - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::rint, true, + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::rint, true, true); case AMDGPULibFunc::EI_ROUND: - return tryReplaceLibcallWithSimpleIntrinsic(CI, Intrinsic::round, true, + return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::round, true, true); case AMDGPULibFunc::EI_LDEXP: { if (!shouldReplaceLibcallWithIntrinsic(CI, true, true)) return false; + + Value *Arg1 = CI->getArgOperand(1); + if (VectorType *VecTy = dyn_cast(CI->getType()); + VecTy && !isa(Arg1->getType())) { + Value *SplatArg1 = B.CreateVectorSplat(VecTy->getElementCount(), Arg1); + CI->setArgOperand(1, SplatArg1); + } + CI->setCalledFunction(Intrinsic::getDeclaration( CI->getModule(), Intrinsic::ldexp, {CI->getType(), CI->getArgOperand(1)->getType()})); @@ -1135,21 +1145,34 @@ return true; } -void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(CallInst *CI, +void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, + CallInst *CI, Intrinsic::ID IntrID) { + if (CI->arg_size() == 2) { + Value *Arg0 = CI->getArgOperand(0); + Value *Arg1 = CI->getArgOperand(1); + VectorType *Arg0VecTy = dyn_cast(Arg0->getType()); + VectorType *Arg1VecTy = dyn_cast(Arg1->getType()); + if (Arg0VecTy && !Arg1VecTy) { + Value *SplatRHS = B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1); + CI->setArgOperand(1, SplatRHS); + } else if (!Arg0VecTy && Arg1VecTy) { + Value *SplatLHS = B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0); + CI->setArgOperand(0, SplatLHS); + } + } + CI->setCalledFunction( Intrinsic::getDeclaration(CI->getModule(), IntrID, {CI->getType()})); } -bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(CallInst *CI, - Intrinsic::ID IntrID, - bool AllowMinSizeF32, - bool AllowF64, - bool AllowStrictFP) { +bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic( + IRBuilder<> &B, CallInst *CI, Intrinsic::ID IntrID, bool AllowMinSizeF32, + bool AllowF64, bool AllowStrictFP) { if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64, AllowStrictFP)) return false; - replaceLibCallWithSimpleIntrinsic(CI, IntrID); + replaceLibCallWithSimpleIntrinsic(B, CI, IntrID); return true; } Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax-splat.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmax-splat.ll @@ -0,0 +1,425 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare <2 x float> @_Z4fmaxDv2_ff(<2 x float>, float) +declare <2 x float> @_Z4fmaxDv2_fS_(<2 x float>, <2 x float>) +declare <3 x float> @_Z4fmaxDv3_ff(<3 x float>, float) +declare <3 x float> @_Z4fmaxDv3_fS_(<3 x float>, <3 x float>) +declare <4 x float> @_Z4fmaxDv4_ff(<4 x float>, float) +declare <4 x float> @_Z4fmaxDv4_fS_(<4 x float>, <4 x float>) +declare <8 x float> @_Z4fmaxDv8_ff(<8 x float>, float) +declare <8 x float> @_Z4fmaxDv8_fS_(<8 x float>, <8 x float>) +declare <16 x float> @_Z4fmaxDv16_ff(<16 x float>, float) +declare <16 x float> @_Z4fmaxDv16_fS_(<16 x float>, <16 x float>) +declare <2 x double> @_Z4fmaxDv2_dd(<2 x double>, double) +declare <2 x double> @_Z4fmaxDv2_dS_(<2 x double>, <2 x double>) +declare <3 x double> @_Z4fmaxDv3_dd(<3 x double>, double) +declare <3 x double> @_Z4fmaxDv3_dS_(<3 x double>, <3 x double>) +declare <4 x double> @_Z4fmaxDv4_dd(<4 x double>, double) +declare <4 x double> @_Z4fmaxDv4_dS_(<4 x double>, <4 x double>) +declare <8 x double> @_Z4fmaxDv8_dd(<8 x double>, double) +declare <8 x double> @_Z4fmaxDv8_dS_(<8 x double>, <8 x double>) +declare <16 x double> @_Z4fmaxDv16_dd(<16 x double>, double) +declare <16 x double> @_Z4fmaxDv16_dS_(<16 x double>, <16 x double>) +declare <2 x half> @_Z4fmaxDv2_DhDh(<2 x half>, half) +declare <2 x half> @_Z4fmaxDv2_DhS_(<2 x half>, <2 x half>) +declare <3 x half> @_Z4fmaxDv3_DhDh(<3 x half>, half) +declare <3 x half> @_Z4fmaxDv3_DhS_(<3 x half>, <3 x half>) +declare <4 x half> @_Z4fmaxDv4_DhDh(<4 x half>, half) +declare <4 x half> @_Z4fmaxDv4_DhS_(<4 x half>, <4 x half>) +declare <8 x half> @_Z4fmaxDv8_DhDh(<8 x half>, half) +declare <8 x half> @_Z4fmaxDv8_DhS_(<8 x half>, <8 x half>) +declare <16 x half> @_Z4fmaxDv16_DhDh(<16 x half>, half) +declare <16 x half> @_Z4fmaxDv16_DhS_(<16 x half>, <16 x half>) + +define <2 x float> @test_fmax_v2f32_f32(<2 x float> %x, float %y) { +; CHECK-LABEL: define <2 x float> @test_fmax_v2f32_f32 +; CHECK-SAME: (<2 x float> [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.maxnum.v2f32(<2 x float> [[X]], <2 x float> [[DOTSPLAT]]) +; CHECK-NEXT: ret <2 x float> [[MIN]] +; + %min = tail call <2 x float> @_Z4fmaxDv2_ff(<2 x float> %x, float %y) + ret <2 x float> %min +} + +define <2 x float> @test_fmax_f32_v2f32(float %x, <2 x float> %y) { +; CHECK-LABEL: define <2 x float> @test_fmax_f32_v2f32 +; CHECK-SAME: (float [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x float> poison, float [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[INSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.maxnum.v2f32(<2 x float> [[SHUFFLE]], <2 x float> [[Y]]) +; CHECK-NEXT: ret <2 x float> [[MIN]] +; + %insert = insertelement <2 x float> poison, float %x, i64 0 + %shuffle = shufflevector <2 x float> %insert, <2 x float> poison, <2 x i32> zeroinitializer + %min = tail call <2 x float> @_Z4fmaxDv2_fS_(<2 x float> %shuffle, <2 x float> %y) + ret <2 x float> %min +} + +define <3 x float> @test_fmax_v3f32_f32(<3 x float> %x, float %y) { +; CHECK-LABEL: define <3 x float> @test_fmax_v3f32_f32 +; CHECK-SAME: (<3 x float> [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <3 x float> poison, float [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <3 x float> [[DOTSPLATINSERT]], <3 x float> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <3 x float> @llvm.maxnum.v3f32(<3 x float> [[X]], <3 x float> [[DOTSPLAT]]) +; CHECK-NEXT: ret <3 x float> [[MIN]] +; + %min = tail call <3 x float> @_Z4fmaxDv3_ff(<3 x float> %x, float %y) + ret <3 x float> %min +} + +define <3 x float> @test_fmax_f32_v3f32(float %x, <3 x float> %y) { +; CHECK-LABEL: define <3 x float> @test_fmax_f32_v3f32 +; CHECK-SAME: (float [[X:%.*]], <3 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <3 x float> poison, float [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x float> [[INSERT]], <3 x float> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <3 x float> @llvm.maxnum.v3f32(<3 x float> [[SHUFFLE]], <3 x float> [[Y]]) +; CHECK-NEXT: ret <3 x float> [[MIN]] +; + %insert = insertelement <3 x float> poison, float %x, i64 0 + %shuffle = shufflevector <3 x float> %insert, <3 x float> poison, <3 x i32> zeroinitializer + %min = tail call <3 x float> @_Z4fmaxDv3_fS_(<3 x float> %shuffle, <3 x float> %y) + ret <3 x float> %min +} + +define <4 x float> @test_fmax_v4f32_f32(<4 x float> %x, float %y) { +; CHECK-LABEL: define <4 x float> @test_fmax_v4f32_f32 +; CHECK-SAME: (<4 x float> [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[X]], <4 x float> [[DOTSPLAT]]) +; CHECK-NEXT: ret <4 x float> [[MIN]] +; + %min = tail call <4 x float> @_Z4fmaxDv4_ff(<4 x float> %x, float %y) + ret <4 x float> %min +} + +define <4 x float> @test_fmax_f32_v4f32(float %x, <4 x float> %y) { +; CHECK-LABEL: define <4 x float> @test_fmax_f32_v4f32 +; CHECK-SAME: (float [[X:%.*]], <4 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[INSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[SHUFFLE]], <4 x float> [[Y]]) +; CHECK-NEXT: ret <4 x float> [[MIN]] +; + %insert = insertelement <4 x float> poison, float %x, i64 0 + %shuffle = shufflevector <4 x float> %insert, <4 x float> poison, <4 x i32> zeroinitializer + %min = tail call <4 x float> @_Z4fmaxDv4_fS_(<4 x float> %shuffle, <4 x float> %y) + ret <4 x float> %min +} + +define <8 x float> @test_fmax_v8f32_f32(<8 x float> %x, float %y) { +; CHECK-LABEL: define <8 x float> @test_fmax_v8f32_f32 +; CHECK-SAME: (<8 x float> [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x float> [[DOTSPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <8 x float> @llvm.maxnum.v8f32(<8 x float> [[X]], <8 x float> [[DOTSPLAT]]) +; CHECK-NEXT: ret <8 x float> [[MIN]] +; + %min = tail call <8 x float> @_Z4fmaxDv8_ff(<8 x float> %x, float %y) + ret <8 x float> %min +} + +define <8 x float> @test_fmax_f32_v8f32(float %x, <8 x float> %y) { +; CHECK-LABEL: define <8 x float> @test_fmax_f32_v8f32 +; CHECK-SAME: (float [[X:%.*]], <8 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <8 x float> poison, float [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[INSERT]], <8 x float> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <8 x float> @llvm.maxnum.v8f32(<8 x float> [[SHUFFLE]], <8 x float> [[Y]]) +; CHECK-NEXT: ret <8 x float> [[MIN]] +; + %insert = insertelement <8 x float> poison, float %x, i64 0 + %shuffle = shufflevector <8 x float> %insert, <8 x float> poison, <8 x i32> zeroinitializer + %min = tail call <8 x float> @_Z4fmaxDv8_fS_(<8 x float> %shuffle, <8 x float> %y) + ret <8 x float> %min +} + +define <16 x float> @test_fmax_v16f32_f32(<16 x float> %x, float %y) { +; CHECK-LABEL: define <16 x float> @test_fmax_v16f32_f32 +; CHECK-SAME: (<16 x float> [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x float> [[DOTSPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <16 x float> @llvm.maxnum.v16f32(<16 x float> [[X]], <16 x float> [[DOTSPLAT]]) +; CHECK-NEXT: ret <16 x float> [[MIN]] +; + %min = tail call <16 x float> @_Z4fmaxDv16_ff(<16 x float> %x, float %y) + ret <16 x float> %min +} + +define <16 x float> @test_fmax_f32_v16f32(float %x, <16 x float> %y) { +; CHECK-LABEL: define <16 x float> @test_fmax_f32_v16f32 +; CHECK-SAME: (float [[X:%.*]], <16 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <16 x float> poison, float [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x float> [[INSERT]], <16 x float> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <16 x float> @llvm.maxnum.v16f32(<16 x float> [[SHUFFLE]], <16 x float> [[Y]]) +; CHECK-NEXT: ret <16 x float> [[MIN]] +; + %insert = insertelement <16 x float> poison, float %x, i64 0 + %shuffle = shufflevector <16 x float> %insert, <16 x float> poison, <16 x i32> zeroinitializer + %min = tail call <16 x float> @_Z4fmaxDv16_fS_(<16 x float> %shuffle, <16 x float> %y) + ret <16 x float> %min +} + +define <2 x double> @test_fmax_v2f64_f64(<2 x double> %x, double %y) { +; CHECK-LABEL: define <2 x double> @test_fmax_v2f64_f64 +; CHECK-SAME: (<2 x double> [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x double> [[DOTSPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[X]], <2 x double> [[DOTSPLAT]]) +; CHECK-NEXT: ret <2 x double> [[MIN]] +; + %min = tail call <2 x double> @_Z4fmaxDv2_dd(<2 x double> %x, double %y) + ret <2 x double> %min +} + +define <2 x double> @test_fmax_f64_v2f64(double %x, <2 x double> %y) { +; CHECK-LABEL: define <2 x double> @test_fmax_f64_v2f64 +; CHECK-SAME: (double [[X:%.*]], <2 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x double> poison, double [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[INSERT]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[SHUFFLE]], <2 x double> [[Y]]) +; CHECK-NEXT: ret <2 x double> [[MIN]] +; + %insert = insertelement <2 x double> poison, double %x, i64 0 + %shuffle = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer + %min = tail call <2 x double> @_Z4fmaxDv2_dS_(<2 x double> %shuffle, <2 x double> %y) + ret <2 x double> %min +} + +define <3 x double> @test_fmax_v3f64_f64(<3 x double> %x, double %y) { +; CHECK-LABEL: define <3 x double> @test_fmax_v3f64_f64 +; CHECK-SAME: (<3 x double> [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <3 x double> poison, double [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <3 x double> [[DOTSPLATINSERT]], <3 x double> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <3 x double> @llvm.maxnum.v3f64(<3 x double> [[X]], <3 x double> [[DOTSPLAT]]) +; CHECK-NEXT: ret <3 x double> [[MIN]] +; + %min = tail call <3 x double> @_Z4fmaxDv3_dd(<3 x double> %x, double %y) + ret <3 x double> %min +} + +define <3 x double> @test_fmax_f64_v3f64(double %x, <3 x double> %y) { +; CHECK-LABEL: define <3 x double> @test_fmax_f64_v3f64 +; CHECK-SAME: (double [[X:%.*]], <3 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <3 x double> poison, double [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x double> [[INSERT]], <3 x double> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <3 x double> @llvm.maxnum.v3f64(<3 x double> [[SHUFFLE]], <3 x double> [[Y]]) +; CHECK-NEXT: ret <3 x double> [[MIN]] +; + %insert = insertelement <3 x double> poison, double %x, i64 0 + %shuffle = shufflevector <3 x double> %insert, <3 x double> poison, <3 x i32> zeroinitializer + %min = tail call <3 x double> @_Z4fmaxDv3_dS_(<3 x double> %shuffle, <3 x double> %y) + ret <3 x double> %min +} + +define <4 x double> @test_fmax_v4f64_f64(<4 x double> %x, double %y) { +; CHECK-LABEL: define <4 x double> @test_fmax_v4f64_f64 +; CHECK-SAME: (<4 x double> [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x double> [[DOTSPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <4 x double> @llvm.maxnum.v4f64(<4 x double> [[X]], <4 x double> [[DOTSPLAT]]) +; CHECK-NEXT: ret <4 x double> [[MIN]] +; + %min = tail call <4 x double> @_Z4fmaxDv4_dd(<4 x double> %x, double %y) + ret <4 x double> %min +} + +define <4 x double> @test_fmax_f64_v4f64(double %x, <4 x double> %y) { +; CHECK-LABEL: define <4 x double> @test_fmax_f64_v4f64 +; CHECK-SAME: (double [[X:%.*]], <4 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x double> poison, double [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[INSERT]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <4 x double> @llvm.maxnum.v4f64(<4 x double> [[SHUFFLE]], <4 x double> [[Y]]) +; CHECK-NEXT: ret <4 x double> [[MIN]] +; + %insert = insertelement <4 x double> poison, double %x, i64 0 + %shuffle = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer + %min = tail call <4 x double> @_Z4fmaxDv4_dS_(<4 x double> %shuffle, <4 x double> %y) + ret <4 x double> %min +} + +define <8 x double> @test_fmax_v8f64_f64(<8 x double> %x, double %y) { +; CHECK-LABEL: define <8 x double> @test_fmax_v8f64_f64 +; CHECK-SAME: (<8 x double> [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x double> [[DOTSPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <8 x double> @llvm.maxnum.v8f64(<8 x double> [[X]], <8 x double> [[DOTSPLAT]]) +; CHECK-NEXT: ret <8 x double> [[MIN]] +; + %min = tail call <8 x double> @_Z4fmaxDv8_dd(<8 x double> %x, double %y) + ret <8 x double> %min +} + +define <8 x double> @test_fmax_f64_v8f64(double %x, <8 x double> %y) { +; CHECK-LABEL: define <8 x double> @test_fmax_f64_v8f64 +; CHECK-SAME: (double [[X:%.*]], <8 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <8 x double> poison, double [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[INSERT]], <8 x double> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <8 x double> @llvm.maxnum.v8f64(<8 x double> [[SHUFFLE]], <8 x double> [[Y]]) +; CHECK-NEXT: ret <8 x double> [[MIN]] +; + %insert = insertelement <8 x double> poison, double %x, i64 0 + %shuffle = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer + %min = tail call <8 x double> @_Z4fmaxDv8_dS_(<8 x double> %shuffle, <8 x double> %y) + ret <8 x double> %min +} + +define <16 x double> @test_fmax_v16f64_f64(<16 x double> %x, double %y) { +; CHECK-LABEL: define <16 x double> @test_fmax_v16f64_f64 +; CHECK-SAME: (<16 x double> [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x double> poison, double [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x double> [[DOTSPLATINSERT]], <16 x double> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <16 x double> @llvm.maxnum.v16f64(<16 x double> [[X]], <16 x double> [[DOTSPLAT]]) +; CHECK-NEXT: ret <16 x double> [[MIN]] +; + %min = tail call <16 x double> @_Z4fmaxDv16_dd(<16 x double> %x, double %y) + ret <16 x double> %min +} + +define <16 x double> @test_fmax_f64_v16f64(double %x, <16 x double> %y) { +; CHECK-LABEL: define <16 x double> @test_fmax_f64_v16f64 +; CHECK-SAME: (double [[X:%.*]], <16 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <16 x double> poison, double [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x double> [[INSERT]], <16 x double> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <16 x double> @llvm.maxnum.v16f64(<16 x double> [[SHUFFLE]], <16 x double> [[Y]]) +; CHECK-NEXT: ret <16 x double> [[MIN]] +; + %insert = insertelement <16 x double> poison, double %x, i64 0 + %shuffle = shufflevector <16 x double> %insert, <16 x double> poison, <16 x i32> zeroinitializer + %min = tail call <16 x double> @_Z4fmaxDv16_dS_(<16 x double> %shuffle, <16 x double> %y) + ret <16 x double> %min +} + +define <2 x half> @test_fmax_v2f16_f16(<2 x half> %x, half %y) { +; CHECK-LABEL: define <2 x half> @test_fmax_v2f16_f16 +; CHECK-SAME: (<2 x half> [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x half> poison, half [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x half> [[DOTSPLATINSERT]], <2 x half> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> [[X]], <2 x half> [[DOTSPLAT]]) +; CHECK-NEXT: ret <2 x half> [[MIN]] +; + %min = tail call <2 x half> @_Z4fmaxDv2_DhDh(<2 x half> %x, half %y) + ret <2 x half> %min +} + +define <2 x half> @test_fmax_f16_v2f16(half %x, <2 x half> %y) { +; CHECK-LABEL: define <2 x half> @test_fmax_f16_v2f16 +; CHECK-SAME: (half [[X:%.*]], <2 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x half> poison, half [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x half> [[INSERT]], <2 x half> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> [[SHUFFLE]], <2 x half> [[Y]]) +; CHECK-NEXT: ret <2 x half> [[MIN]] +; + %insert = insertelement <2 x half> poison, half %x, i64 0 + %shuffle = shufflevector <2 x half> %insert, <2 x half> poison, <2 x i32> zeroinitializer + %min = tail call <2 x half> @_Z4fmaxDv2_DhS_(<2 x half> %shuffle, <2 x half> %y) + ret <2 x half> %min +} + +define <3 x half> @test_fmax_v3f16_f16(<3 x half> %x, half %y) { +; CHECK-LABEL: define <3 x half> @test_fmax_v3f16_f16 +; CHECK-SAME: (<3 x half> [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <3 x half> poison, half [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <3 x half> [[DOTSPLATINSERT]], <3 x half> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <3 x half> @llvm.maxnum.v3f16(<3 x half> [[X]], <3 x half> [[DOTSPLAT]]) +; CHECK-NEXT: ret <3 x half> [[MIN]] +; + %min = tail call <3 x half> @_Z4fmaxDv3_DhDh(<3 x half> %x, half %y) + ret <3 x half> %min +} + +define <3 x half> @test_fmax_f16_v3f16(half %x, <3 x half> %y) { +; CHECK-LABEL: define <3 x half> @test_fmax_f16_v3f16 +; CHECK-SAME: (half [[X:%.*]], <3 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <3 x half> poison, half [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x half> [[INSERT]], <3 x half> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <3 x half> @llvm.maxnum.v3f16(<3 x half> [[SHUFFLE]], <3 x half> [[Y]]) +; CHECK-NEXT: ret <3 x half> [[MIN]] +; + %insert = insertelement <3 x half> poison, half %x, i64 0 + %shuffle = shufflevector <3 x half> %insert, <3 x half> poison, <3 x i32> zeroinitializer + %min = tail call <3 x half> @_Z4fmaxDv3_DhS_(<3 x half> %shuffle, <3 x half> %y) + ret <3 x half> %min +} + +define <4 x half> @test_fmax_v4f16_f16(<4 x half> %x, half %y) { +; CHECK-LABEL: define <4 x half> @test_fmax_v4f16_f16 +; CHECK-SAME: (<4 x half> [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x half> poison, half [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x half> [[DOTSPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[X]], <4 x half> [[DOTSPLAT]]) +; CHECK-NEXT: ret <4 x half> [[MIN]] +; + %min = tail call <4 x half> @_Z4fmaxDv4_DhDh(<4 x half> %x, half %y) + ret <4 x half> %min +} + +define <4 x half> @test_fmax_f16_v4f16(half %x, <4 x half> %y) { +; CHECK-LABEL: define <4 x half> @test_fmax_f16_v4f16 +; CHECK-SAME: (half [[X:%.*]], <4 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x half> poison, half [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x half> [[INSERT]], <4 x half> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[SHUFFLE]], <4 x half> [[Y]]) +; CHECK-NEXT: ret <4 x half> [[MIN]] +; + %insert = insertelement <4 x half> poison, half %x, i64 0 + %shuffle = shufflevector <4 x half> %insert, <4 x half> poison, <4 x i32> zeroinitializer + %min = tail call <4 x half> @_Z4fmaxDv4_DhS_(<4 x half> %shuffle, <4 x half> %y) + ret <4 x half> %min +} + +define <8 x half> @test_fmax_v8f16_f16(<8 x half> %x, half %y) { +; CHECK-LABEL: define <8 x half> @test_fmax_v8f16_f16 +; CHECK-SAME: (<8 x half> [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[X]], <8 x half> [[DOTSPLAT]]) +; CHECK-NEXT: ret <8 x half> [[MIN]] +; + %min = tail call <8 x half> @_Z4fmaxDv8_DhDh(<8 x half> %x, half %y) + ret <8 x half> %min +} + +define <8 x half> @test_fmax_f16_v8f16(half %x, <8 x half> %y) { +; CHECK-LABEL: define <8 x half> @test_fmax_f16_v8f16 +; CHECK-SAME: (half [[X:%.*]], <8 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <8 x half> poison, half [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x half> [[INSERT]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[SHUFFLE]], <8 x half> [[Y]]) +; CHECK-NEXT: ret <8 x half> [[MIN]] +; + %insert = insertelement <8 x half> poison, half %x, i64 0 + %shuffle = shufflevector <8 x half> %insert, <8 x half> poison, <8 x i32> zeroinitializer + %min = tail call <8 x half> @_Z4fmaxDv8_DhS_(<8 x half> %shuffle, <8 x half> %y) + ret <8 x half> %min +} + +define <16 x half> @test_fmax_v16f16_f16(<16 x half> %x, half %y) { +; CHECK-LABEL: define <16 x half> @test_fmax_v16f16_f16 +; CHECK-SAME: (<16 x half> [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x half> poison, half [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x half> [[DOTSPLATINSERT]], <16 x half> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <16 x half> @llvm.maxnum.v16f16(<16 x half> [[X]], <16 x half> [[DOTSPLAT]]) +; CHECK-NEXT: ret <16 x half> [[MIN]] +; + %min = tail call <16 x half> @_Z4fmaxDv16_DhDh(<16 x half> %x, half %y) + ret <16 x half> %min +} + +define <16 x half> @test_fmax_f16_v16f16(half %x, <16 x half> %y) { +; CHECK-LABEL: define <16 x half> @test_fmax_f16_v16f16 +; CHECK-SAME: (half [[X:%.*]], <16 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <16 x half> poison, half [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x half> [[INSERT]], <16 x half> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <16 x half> @llvm.maxnum.v16f16(<16 x half> [[SHUFFLE]], <16 x half> [[Y]]) +; CHECK-NEXT: ret <16 x half> [[MIN]] +; + %insert = insertelement <16 x half> poison, half %x, i64 0 + %shuffle = shufflevector <16 x half> %insert, <16 x half> poison, <16 x i32> zeroinitializer + %min = tail call <16 x half> @_Z4fmaxDv16_DhS_(<16 x half> %shuffle, <16 x half> %y) + ret <16 x half> %min +} Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin-splat.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-fmin-splat.ll @@ -0,0 +1,425 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare <2 x float> @_Z4fminDv2_ff(<2 x float>, float) +declare <2 x float> @_Z4fminDv2_fS_(<2 x float>, <2 x float>) +declare <3 x float> @_Z4fminDv3_ff(<3 x float>, float) +declare <3 x float> @_Z4fminDv3_fS_(<3 x float>, <3 x float>) +declare <4 x float> @_Z4fminDv4_ff(<4 x float>, float) +declare <4 x float> @_Z4fminDv4_fS_(<4 x float>, <4 x float>) +declare <8 x float> @_Z4fminDv8_ff(<8 x float>, float) +declare <8 x float> @_Z4fminDv8_fS_(<8 x float>, <8 x float>) +declare <16 x float> @_Z4fminDv16_ff(<16 x float>, float) +declare <16 x float> @_Z4fminDv16_fS_(<16 x float>, <16 x float>) +declare <2 x double> @_Z4fminDv2_dd(<2 x double>, double) +declare <2 x double> @_Z4fminDv2_dS_(<2 x double>, <2 x double>) +declare <3 x double> @_Z4fminDv3_dd(<3 x double>, double) +declare <3 x double> @_Z4fminDv3_dS_(<3 x double>, <3 x double>) +declare <4 x double> @_Z4fminDv4_dd(<4 x double>, double) +declare <4 x double> @_Z4fminDv4_dS_(<4 x double>, <4 x double>) +declare <8 x double> @_Z4fminDv8_dd(<8 x double>, double) +declare <8 x double> @_Z4fminDv8_dS_(<8 x double>, <8 x double>) +declare <16 x double> @_Z4fminDv16_dd(<16 x double>, double) +declare <16 x double> @_Z4fminDv16_dS_(<16 x double>, <16 x double>) +declare <2 x half> @_Z4fminDv2_DhDh(<2 x half>, half) +declare <2 x half> @_Z4fminDv2_DhS_(<2 x half>, <2 x half>) +declare <3 x half> @_Z4fminDv3_DhDh(<3 x half>, half) +declare <3 x half> @_Z4fminDv3_DhS_(<3 x half>, <3 x half>) +declare <4 x half> @_Z4fminDv4_DhDh(<4 x half>, half) +declare <4 x half> @_Z4fminDv4_DhS_(<4 x half>, <4 x half>) +declare <8 x half> @_Z4fminDv8_DhDh(<8 x half>, half) +declare <8 x half> @_Z4fminDv8_DhS_(<8 x half>, <8 x half>) +declare <16 x half> @_Z4fminDv16_DhDh(<16 x half>, half) +declare <16 x half> @_Z4fminDv16_DhS_(<16 x half>, <16 x half>) + +define <2 x float> @test_fmin_v2f32_f32(<2 x float> %x, float %y) { +; CHECK-LABEL: define <2 x float> @test_fmin_v2f32_f32 +; CHECK-SAME: (<2 x float> [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[X]], <2 x float> [[DOTSPLAT]]) +; CHECK-NEXT: ret <2 x float> [[MIN]] +; + %min = tail call <2 x float> @_Z4fminDv2_ff(<2 x float> %x, float %y) + ret <2 x float> %min +} + +define <2 x float> @test_fmin_f32_v2f32(float %x, <2 x float> %y) { +; CHECK-LABEL: define <2 x float> @test_fmin_f32_v2f32 +; CHECK-SAME: (float [[X:%.*]], <2 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x float> poison, float [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x float> [[INSERT]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SHUFFLE]], <2 x float> [[Y]]) +; CHECK-NEXT: ret <2 x float> [[MIN]] +; + %insert = insertelement <2 x float> poison, float %x, i64 0 + %shuffle = shufflevector <2 x float> %insert, <2 x float> poison, <2 x i32> zeroinitializer + %min = tail call <2 x float> @_Z4fminDv2_fS_(<2 x float> %shuffle, <2 x float> %y) + ret <2 x float> %min +} + +define <3 x float> @test_fmin_v3f32_f32(<3 x float> %x, float %y) { +; CHECK-LABEL: define <3 x float> @test_fmin_v3f32_f32 +; CHECK-SAME: (<3 x float> [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <3 x float> poison, float [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <3 x float> [[DOTSPLATINSERT]], <3 x float> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <3 x float> @llvm.minnum.v3f32(<3 x float> [[X]], <3 x float> [[DOTSPLAT]]) +; CHECK-NEXT: ret <3 x float> [[MIN]] +; + %min = tail call <3 x float> @_Z4fminDv3_ff(<3 x float> %x, float %y) + ret <3 x float> %min +} + +define <3 x float> @test_fmin_f32_v3f32(float %x, <3 x float> %y) { +; CHECK-LABEL: define <3 x float> @test_fmin_f32_v3f32 +; CHECK-SAME: (float [[X:%.*]], <3 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <3 x float> poison, float [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x float> [[INSERT]], <3 x float> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <3 x float> @llvm.minnum.v3f32(<3 x float> [[SHUFFLE]], <3 x float> [[Y]]) +; CHECK-NEXT: ret <3 x float> [[MIN]] +; + %insert = insertelement <3 x float> poison, float %x, i64 0 + %shuffle = shufflevector <3 x float> %insert, <3 x float> poison, <3 x i32> zeroinitializer + %min = tail call <3 x float> @_Z4fminDv3_fS_(<3 x float> %shuffle, <3 x float> %y) + ret <3 x float> %min +} + +define <4 x float> @test_fmin_v4f32_f32(<4 x float> %x, float %y) { +; CHECK-LABEL: define <4 x float> @test_fmin_v4f32_f32 +; CHECK-SAME: (<4 x float> [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> [[X]], <4 x float> [[DOTSPLAT]]) +; CHECK-NEXT: ret <4 x float> [[MIN]] +; + %min = tail call <4 x float> @_Z4fminDv4_ff(<4 x float> %x, float %y) + ret <4 x float> %min +} + +define <4 x float> @test_fmin_f32_v4f32(float %x, <4 x float> %y) { +; CHECK-LABEL: define <4 x float> @test_fmin_f32_v4f32 +; CHECK-SAME: (float [[X:%.*]], <4 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[INSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> [[SHUFFLE]], <4 x float> [[Y]]) +; CHECK-NEXT: ret <4 x float> [[MIN]] +; + %insert = insertelement <4 x float> poison, float %x, i64 0 + %shuffle = shufflevector <4 x float> %insert, <4 x float> poison, <4 x i32> zeroinitializer + %min = tail call <4 x float> @_Z4fminDv4_fS_(<4 x float> %shuffle, <4 x float> %y) + ret <4 x float> %min +} + +define <8 x float> @test_fmin_v8f32_f32(<8 x float> %x, float %y) { +; CHECK-LABEL: define <8 x float> @test_fmin_v8f32_f32 +; CHECK-SAME: (<8 x float> [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x float> [[DOTSPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <8 x float> @llvm.minnum.v8f32(<8 x float> [[X]], <8 x float> [[DOTSPLAT]]) +; CHECK-NEXT: ret <8 x float> [[MIN]] +; + %min = tail call <8 x float> @_Z4fminDv8_ff(<8 x float> %x, float %y) + ret <8 x float> %min +} + +define <8 x float> @test_fmin_f32_v8f32(float %x, <8 x float> %y) { +; CHECK-LABEL: define <8 x float> @test_fmin_f32_v8f32 +; CHECK-SAME: (float [[X:%.*]], <8 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <8 x float> poison, float [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[INSERT]], <8 x float> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <8 x float> @llvm.minnum.v8f32(<8 x float> [[SHUFFLE]], <8 x float> [[Y]]) +; CHECK-NEXT: ret <8 x float> [[MIN]] +; + %insert = insertelement <8 x float> poison, float %x, i64 0 + %shuffle = shufflevector <8 x float> %insert, <8 x float> poison, <8 x i32> zeroinitializer + %min = tail call <8 x float> @_Z4fminDv8_fS_(<8 x float> %shuffle, <8 x float> %y) + ret <8 x float> %min +} + +define <16 x float> @test_fmin_v16f32_f32(<16 x float> %x, float %y) { +; CHECK-LABEL: define <16 x float> @test_fmin_v16f32_f32 +; CHECK-SAME: (<16 x float> [[X:%.*]], float [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x float> poison, float [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x float> [[DOTSPLATINSERT]], <16 x float> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <16 x float> @llvm.minnum.v16f32(<16 x float> [[X]], <16 x float> [[DOTSPLAT]]) +; CHECK-NEXT: ret <16 x float> [[MIN]] +; + %min = tail call <16 x float> @_Z4fminDv16_ff(<16 x float> %x, float %y) + ret <16 x float> %min +} + +define <16 x float> @test_fmin_f32_v16f32(float %x, <16 x float> %y) { +; CHECK-LABEL: define <16 x float> @test_fmin_f32_v16f32 +; CHECK-SAME: (float [[X:%.*]], <16 x float> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <16 x float> poison, float [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x float> [[INSERT]], <16 x float> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <16 x float> @llvm.minnum.v16f32(<16 x float> [[SHUFFLE]], <16 x float> [[Y]]) +; CHECK-NEXT: ret <16 x float> [[MIN]] +; + %insert = insertelement <16 x float> poison, float %x, i64 0 + %shuffle = shufflevector <16 x float> %insert, <16 x float> poison, <16 x i32> zeroinitializer + %min = tail call <16 x float> @_Z4fminDv16_fS_(<16 x float> %shuffle, <16 x float> %y) + ret <16 x float> %min +} + +define <2 x double> @test_fmin_v2f64_f64(<2 x double> %x, double %y) { +; CHECK-LABEL: define <2 x double> @test_fmin_v2f64_f64 +; CHECK-SAME: (<2 x double> [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x double> [[DOTSPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> [[X]], <2 x double> [[DOTSPLAT]]) +; CHECK-NEXT: ret <2 x double> [[MIN]] +; + %min = tail call <2 x double> @_Z4fminDv2_dd(<2 x double> %x, double %y) + ret <2 x double> %min +} + +define <2 x double> @test_fmin_f64_v2f64(double %x, <2 x double> %y) { +; CHECK-LABEL: define <2 x double> @test_fmin_f64_v2f64 +; CHECK-SAME: (double [[X:%.*]], <2 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x double> poison, double [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[INSERT]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> [[SHUFFLE]], <2 x double> [[Y]]) +; CHECK-NEXT: ret <2 x double> [[MIN]] +; + %insert = insertelement <2 x double> poison, double %x, i64 0 + %shuffle = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer + %min = tail call <2 x double> @_Z4fminDv2_dS_(<2 x double> %shuffle, <2 x double> %y) + ret <2 x double> %min +} + +define <3 x double> @test_fmin_v3f64_f64(<3 x double> %x, double %y) { +; CHECK-LABEL: define <3 x double> @test_fmin_v3f64_f64 +; CHECK-SAME: (<3 x double> [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <3 x double> poison, double [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <3 x double> [[DOTSPLATINSERT]], <3 x double> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <3 x double> @llvm.minnum.v3f64(<3 x double> [[X]], <3 x double> [[DOTSPLAT]]) +; CHECK-NEXT: ret <3 x double> [[MIN]] +; + %min = tail call <3 x double> @_Z4fminDv3_dd(<3 x double> %x, double %y) + ret <3 x double> %min +} + +define <3 x double> @test_fmin_f64_v3f64(double %x, <3 x double> %y) { +; CHECK-LABEL: define <3 x double> @test_fmin_f64_v3f64 +; CHECK-SAME: (double [[X:%.*]], <3 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <3 x double> poison, double [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x double> [[INSERT]], <3 x double> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <3 x double> @llvm.minnum.v3f64(<3 x double> [[SHUFFLE]], <3 x double> [[Y]]) +; CHECK-NEXT: ret <3 x double> [[MIN]] +; + %insert = insertelement <3 x double> poison, double %x, i64 0 + %shuffle = shufflevector <3 x double> %insert, <3 x double> poison, <3 x i32> zeroinitializer + %min = tail call <3 x double> @_Z4fminDv3_dS_(<3 x double> %shuffle, <3 x double> %y) + ret <3 x double> %min +} + +define <4 x double> @test_fmin_v4f64_f64(<4 x double> %x, double %y) { +; CHECK-LABEL: define <4 x double> @test_fmin_v4f64_f64 +; CHECK-SAME: (<4 x double> [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x double> [[DOTSPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <4 x double> @llvm.minnum.v4f64(<4 x double> [[X]], <4 x double> [[DOTSPLAT]]) +; CHECK-NEXT: ret <4 x double> [[MIN]] +; + %min = tail call <4 x double> @_Z4fminDv4_dd(<4 x double> %x, double %y) + ret <4 x double> %min +} + +define <4 x double> @test_fmin_f64_v4f64(double %x, <4 x double> %y) { +; CHECK-LABEL: define <4 x double> @test_fmin_f64_v4f64 +; CHECK-SAME: (double [[X:%.*]], <4 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x double> poison, double [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[INSERT]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <4 x double> @llvm.minnum.v4f64(<4 x double> [[SHUFFLE]], <4 x double> [[Y]]) +; CHECK-NEXT: ret <4 x double> [[MIN]] +; + %insert = insertelement <4 x double> poison, double %x, i64 0 + %shuffle = shufflevector <4 x double> %insert, <4 x double> poison, <4 x i32> zeroinitializer + %min = tail call <4 x double> @_Z4fminDv4_dS_(<4 x double> %shuffle, <4 x double> %y) + ret <4 x double> %min +} + +define <8 x double> @test_fmin_v8f64_f64(<8 x double> %x, double %y) { +; CHECK-LABEL: define <8 x double> @test_fmin_v8f64_f64 +; CHECK-SAME: (<8 x double> [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x double> poison, double [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x double> [[DOTSPLATINSERT]], <8 x double> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <8 x double> @llvm.minnum.v8f64(<8 x double> [[X]], <8 x double> [[DOTSPLAT]]) +; CHECK-NEXT: ret <8 x double> [[MIN]] +; + %min = tail call <8 x double> @_Z4fminDv8_dd(<8 x double> %x, double %y) + ret <8 x double> %min +} + +define <8 x double> @test_fmin_f64_v8f64(double %x, <8 x double> %y) { +; CHECK-LABEL: define <8 x double> @test_fmin_f64_v8f64 +; CHECK-SAME: (double [[X:%.*]], <8 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <8 x double> poison, double [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x double> [[INSERT]], <8 x double> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <8 x double> @llvm.minnum.v8f64(<8 x double> [[SHUFFLE]], <8 x double> [[Y]]) +; CHECK-NEXT: ret <8 x double> [[MIN]] +; + %insert = insertelement <8 x double> poison, double %x, i64 0 + %shuffle = shufflevector <8 x double> %insert, <8 x double> poison, <8 x i32> zeroinitializer + %min = tail call <8 x double> @_Z4fminDv8_dS_(<8 x double> %shuffle, <8 x double> %y) + ret <8 x double> %min +} + +define <16 x double> @test_fmin_v16f64_f64(<16 x double> %x, double %y) { +; CHECK-LABEL: define <16 x double> @test_fmin_v16f64_f64 +; CHECK-SAME: (<16 x double> [[X:%.*]], double [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x double> poison, double [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x double> [[DOTSPLATINSERT]], <16 x double> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <16 x double> @llvm.minnum.v16f64(<16 x double> [[X]], <16 x double> [[DOTSPLAT]]) +; CHECK-NEXT: ret <16 x double> [[MIN]] +; + %min = tail call <16 x double> @_Z4fminDv16_dd(<16 x double> %x, double %y) + ret <16 x double> %min +} + +define <16 x double> @test_fmin_f64_v16f64(double %x, <16 x double> %y) { +; CHECK-LABEL: define <16 x double> @test_fmin_f64_v16f64 +; CHECK-SAME: (double [[X:%.*]], <16 x double> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <16 x double> poison, double [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x double> [[INSERT]], <16 x double> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <16 x double> @llvm.minnum.v16f64(<16 x double> [[SHUFFLE]], <16 x double> [[Y]]) +; CHECK-NEXT: ret <16 x double> [[MIN]] +; + %insert = insertelement <16 x double> poison, double %x, i64 0 + %shuffle = shufflevector <16 x double> %insert, <16 x double> poison, <16 x i32> zeroinitializer + %min = tail call <16 x double> @_Z4fminDv16_dS_(<16 x double> %shuffle, <16 x double> %y) + ret <16 x double> %min +} + +define <2 x half> @test_fmin_v2f16_f16(<2 x half> %x, half %y) { +; CHECK-LABEL: define <2 x half> @test_fmin_v2f16_f16 +; CHECK-SAME: (<2 x half> [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x half> poison, half [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x half> [[DOTSPLATINSERT]], <2 x half> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[X]], <2 x half> [[DOTSPLAT]]) +; CHECK-NEXT: ret <2 x half> [[MIN]] +; + %min = tail call <2 x half> @_Z4fminDv2_DhDh(<2 x half> %x, half %y) + ret <2 x half> %min +} + +define <2 x half> @test_fmin_f16_v2f16(half %x, <2 x half> %y) { +; CHECK-LABEL: define <2 x half> @test_fmin_f16_v2f16 +; CHECK-SAME: (half [[X:%.*]], <2 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <2 x half> poison, half [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x half> [[INSERT]], <2 x half> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SHUFFLE]], <2 x half> [[Y]]) +; CHECK-NEXT: ret <2 x half> [[MIN]] +; + %insert = insertelement <2 x half> poison, half %x, i64 0 + %shuffle = shufflevector <2 x half> %insert, <2 x half> poison, <2 x i32> zeroinitializer + %min = tail call <2 x half> @_Z4fminDv2_DhS_(<2 x half> %shuffle, <2 x half> %y) + ret <2 x half> %min +} + +define <3 x half> @test_fmin_v3f16_f16(<3 x half> %x, half %y) { +; CHECK-LABEL: define <3 x half> @test_fmin_v3f16_f16 +; CHECK-SAME: (<3 x half> [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <3 x half> poison, half [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <3 x half> [[DOTSPLATINSERT]], <3 x half> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <3 x half> @llvm.minnum.v3f16(<3 x half> [[X]], <3 x half> [[DOTSPLAT]]) +; CHECK-NEXT: ret <3 x half> [[MIN]] +; + %min = tail call <3 x half> @_Z4fminDv3_DhDh(<3 x half> %x, half %y) + ret <3 x half> %min +} + +define <3 x half> @test_fmin_f16_v3f16(half %x, <3 x half> %y) { +; CHECK-LABEL: define <3 x half> @test_fmin_f16_v3f16 +; CHECK-SAME: (half [[X:%.*]], <3 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <3 x half> poison, half [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x half> [[INSERT]], <3 x half> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <3 x half> @llvm.minnum.v3f16(<3 x half> [[SHUFFLE]], <3 x half> [[Y]]) +; CHECK-NEXT: ret <3 x half> [[MIN]] +; + %insert = insertelement <3 x half> poison, half %x, i64 0 + %shuffle = shufflevector <3 x half> %insert, <3 x half> poison, <3 x i32> zeroinitializer + %min = tail call <3 x half> @_Z4fminDv3_DhS_(<3 x half> %shuffle, <3 x half> %y) + ret <3 x half> %min +} + +define <4 x half> @test_fmin_v4f16_f16(<4 x half> %x, half %y) { +; CHECK-LABEL: define <4 x half> @test_fmin_v4f16_f16 +; CHECK-SAME: (<4 x half> [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x half> poison, half [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x half> [[DOTSPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <4 x half> @llvm.minnum.v4f16(<4 x half> [[X]], <4 x half> [[DOTSPLAT]]) +; CHECK-NEXT: ret <4 x half> [[MIN]] +; + %min = tail call <4 x half> @_Z4fminDv4_DhDh(<4 x half> %x, half %y) + ret <4 x half> %min +} + +define <4 x half> @test_fmin_f16_v4f16(half %x, <4 x half> %y) { +; CHECK-LABEL: define <4 x half> @test_fmin_f16_v4f16 +; CHECK-SAME: (half [[X:%.*]], <4 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x half> poison, half [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x half> [[INSERT]], <4 x half> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <4 x half> @llvm.minnum.v4f16(<4 x half> [[SHUFFLE]], <4 x half> [[Y]]) +; CHECK-NEXT: ret <4 x half> [[MIN]] +; + %insert = insertelement <4 x half> poison, half %x, i64 0 + %shuffle = shufflevector <4 x half> %insert, <4 x half> poison, <4 x i32> zeroinitializer + %min = tail call <4 x half> @_Z4fminDv4_DhS_(<4 x half> %shuffle, <4 x half> %y) + ret <4 x half> %min +} + +define <8 x half> @test_fmin_v8f16_f16(<8 x half> %x, half %y) { +; CHECK-LABEL: define <8 x half> @test_fmin_v8f16_f16 +; CHECK-SAME: (<8 x half> [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> [[X]], <8 x half> [[DOTSPLAT]]) +; CHECK-NEXT: ret <8 x half> [[MIN]] +; + %min = tail call <8 x half> @_Z4fminDv8_DhDh(<8 x half> %x, half %y) + ret <8 x half> %min +} + +define <8 x half> @test_fmin_f16_v8f16(half %x, <8 x half> %y) { +; CHECK-LABEL: define <8 x half> @test_fmin_f16_v8f16 +; CHECK-SAME: (half [[X:%.*]], <8 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <8 x half> poison, half [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x half> [[INSERT]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> [[SHUFFLE]], <8 x half> [[Y]]) +; CHECK-NEXT: ret <8 x half> [[MIN]] +; + %insert = insertelement <8 x half> poison, half %x, i64 0 + %shuffle = shufflevector <8 x half> %insert, <8 x half> poison, <8 x i32> zeroinitializer + %min = tail call <8 x half> @_Z4fminDv8_DhS_(<8 x half> %shuffle, <8 x half> %y) + ret <8 x half> %min +} + +define <16 x half> @test_fmin_v16f16_f16(<16 x half> %x, half %y) { +; CHECK-LABEL: define <16 x half> @test_fmin_v16f16_f16 +; CHECK-SAME: (<16 x half> [[X:%.*]], half [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x half> poison, half [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x half> [[DOTSPLATINSERT]], <16 x half> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <16 x half> @llvm.minnum.v16f16(<16 x half> [[X]], <16 x half> [[DOTSPLAT]]) +; CHECK-NEXT: ret <16 x half> [[MIN]] +; + %min = tail call <16 x half> @_Z4fminDv16_DhDh(<16 x half> %x, half %y) + ret <16 x half> %min +} + +define <16 x half> @test_fmin_f16_v16f16(half %x, <16 x half> %y) { +; CHECK-LABEL: define <16 x half> @test_fmin_f16_v16f16 +; CHECK-SAME: (half [[X:%.*]], <16 x half> [[Y:%.*]]) { +; CHECK-NEXT: [[INSERT:%.*]] = insertelement <16 x half> poison, half [[X]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x half> [[INSERT]], <16 x half> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[MIN:%.*]] = tail call <16 x half> @llvm.minnum.v16f16(<16 x half> [[SHUFFLE]], <16 x half> [[Y]]) +; CHECK-NEXT: ret <16 x half> [[MIN]] +; + %insert = insertelement <16 x half> poison, half %x, i64 0 + %shuffle = shufflevector <16 x half> %insert, <16 x half> poison, <16 x i32> zeroinitializer + %min = tail call <16 x half> @_Z4fminDv16_DhS_(<16 x half> %shuffle, <16 x half> %y) + ret <16 x half> %min +} Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp-splat.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ldexp-splat.ll @@ -0,0 +1,200 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8" + +declare <2 x float> @_Z5ldexpDv2_fi(<2 x float>, i32) +declare <3 x float> @_Z5ldexpDv3_fi(<3 x float>, i32) +declare <4 x float> @_Z5ldexpDv4_fi(<4 x float>, i32) +declare <8 x float> @_Z5ldexpDv8_fi(<8 x float>, i32) +declare <16 x float> @_Z5ldexpDv16_fi(<16 x float>, i32) +declare <2 x double> @_Z5ldexpDv2_di(<2 x double>, i32) +declare <3 x double> @_Z5ldexpDv3_di(<3 x double>, i32) +declare <4 x double> @_Z5ldexpDv4_di(<4 x double>, i32) +declare <8 x double> @_Z5ldexpDv8_di(<8 x double>, i32) +declare <16 x double> @_Z5ldexpDv16_di(<16 x double>, i32) +declare <2 x half> @_Z5ldexpDv2_Dhi(<2 x half>, i32) +declare <3 x half> @_Z5ldexpDv3_Dhi(<3 x half>, i32) +declare <4 x half> @_Z5ldexpDv4_Dhi(<4 x half>, i32) +declare <8 x half> @_Z5ldexpDv8_Dhi(<8 x half>, i32) +declare <16 x half> @_Z5ldexpDv16_Dhi(<16 x half>, i32) + +define <2 x float> @test_ldexp_v2f32_i32(<2 x float> %x, i32 %y) { +; CHECK-LABEL: define <2 x float> @test_ldexp_v2f32_i32 +; CHECK-SAME: (<2 x float> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <2 x float> [[LDEXP]] +; + %ldexp = tail call <2 x float> @_Z5ldexpDv2_fi(<2 x float> %x, i32 %y) + ret <2 x float> %ldexp +} + +define <3 x float> @test_ldexp_v3f32_i32(<3 x float> %x, i32 %y) { +; CHECK-LABEL: define <3 x float> @test_ldexp_v3f32_i32 +; CHECK-SAME: (<3 x float> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <3 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <3 x i32> [[DOTSPLATINSERT]], <3 x i32> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <3 x float> @llvm.ldexp.v3f32.v3i32(<3 x float> [[X]], <3 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <3 x float> [[LDEXP]] +; + %ldexp = tail call <3 x float> @_Z5ldexpDv3_fi(<3 x float> %x, i32 %y) + ret <3 x float> %ldexp +} + +define <4 x float> @test_ldexp_v4f32_i32(<4 x float> %x, i32 %y) { +; CHECK-LABEL: define <4 x float> @test_ldexp_v4f32_i32 +; CHECK-SAME: (<4 x float> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> [[X]], <4 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <4 x float> [[LDEXP]] +; + %ldexp = tail call <4 x float> @_Z5ldexpDv4_fi(<4 x float> %x, i32 %y) + ret <4 x float> %ldexp +} + +define <8 x float> @test_ldexp_v8f32_i32(<8 x float> %x, i32 %y) { +; CHECK-LABEL: define <8 x float> @test_ldexp_v8f32_i32 +; CHECK-SAME: (<8 x float> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float> [[X]], <8 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <8 x float> [[LDEXP]] +; + %ldexp = tail call <8 x float> @_Z5ldexpDv8_fi(<8 x float> %x, i32 %y) + ret <8 x float> %ldexp +} + +define <16 x float> @test_ldexp_v16f32_i32(<16 x float> %x, i32 %y) { +; CHECK-LABEL: define <16 x float> @test_ldexp_v16f32_i32 +; CHECK-SAME: (<16 x float> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <16 x float> @llvm.ldexp.v16f32.v16i32(<16 x float> [[X]], <16 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <16 x float> [[LDEXP]] +; + %ldexp = tail call <16 x float> @_Z5ldexpDv16_fi(<16 x float> %x, i32 %y) + ret <16 x float> %ldexp +} + +define <2 x double> @test_ldexp_v2f64_i32(<2 x double> %x, i32 %y) { +; CHECK-LABEL: define <2 x double> @test_ldexp_v2f64_i32 +; CHECK-SAME: (<2 x double> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> [[X]], <2 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <2 x double> [[LDEXP]] +; + %ldexp = tail call <2 x double> @_Z5ldexpDv2_di(<2 x double> %x, i32 %y) + ret <2 x double> %ldexp +} + +define <3 x double> @test_ldexp_v3f64_i32(<3 x double> %x, i32 %y) { +; CHECK-LABEL: define <3 x double> @test_ldexp_v3f64_i32 +; CHECK-SAME: (<3 x double> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <3 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <3 x i32> [[DOTSPLATINSERT]], <3 x i32> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <3 x double> @llvm.ldexp.v3f64.v3i32(<3 x double> [[X]], <3 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <3 x double> [[LDEXP]] +; + %ldexp = tail call <3 x double> @_Z5ldexpDv3_di(<3 x double> %x, i32 %y) + ret <3 x double> %ldexp +} + +define <4 x double> @test_ldexp_v4f64_i32(<4 x double> %x, i32 %y) { +; CHECK-LABEL: define <4 x double> @test_ldexp_v4f64_i32 +; CHECK-SAME: (<4 x double> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double> [[X]], <4 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <4 x double> [[LDEXP]] +; + %ldexp = tail call <4 x double> @_Z5ldexpDv4_di(<4 x double> %x, i32 %y) + ret <4 x double> %ldexp +} + +define <8 x double> @test_ldexp_v8f64_i32(<8 x double> %x, i32 %y) { +; CHECK-LABEL: define <8 x double> @test_ldexp_v8f64_i32 +; CHECK-SAME: (<8 x double> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <8 x double> @llvm.ldexp.v8f64.v8i32(<8 x double> [[X]], <8 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <8 x double> [[LDEXP]] +; + %ldexp = tail call <8 x double> @_Z5ldexpDv8_di(<8 x double> %x, i32 %y) + ret <8 x double> %ldexp +} + +define <16 x double> @test_ldexp_v16f64_i32(<16 x double> %x, i32 %y) { +; CHECK-LABEL: define <16 x double> @test_ldexp_v16f64_i32 +; CHECK-SAME: (<16 x double> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <16 x double> @llvm.ldexp.v16f64.v16i32(<16 x double> [[X]], <16 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <16 x double> [[LDEXP]] +; + %ldexp = tail call <16 x double> @_Z5ldexpDv16_di(<16 x double> %x, i32 %y) + ret <16 x double> %ldexp +} + +define <2 x half> @test_ldexp_v2f16_i32(<2 x half> %x, i32 %y) { +; CHECK-LABEL: define <2 x half> @test_ldexp_v2f16_i32 +; CHECK-SAME: (<2 x half> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> [[X]], <2 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <2 x half> [[LDEXP]] +; + %ldexp = tail call <2 x half> @_Z5ldexpDv2_Dhi(<2 x half> %x, i32 %y) + ret <2 x half> %ldexp +} + +define <3 x half> @test_ldexp_v3f16_i32(<3 x half> %x, i32 %y) { +; CHECK-LABEL: define <3 x half> @test_ldexp_v3f16_i32 +; CHECK-SAME: (<3 x half> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <3 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <3 x i32> [[DOTSPLATINSERT]], <3 x i32> poison, <3 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <3 x half> @llvm.ldexp.v3f16.v3i32(<3 x half> [[X]], <3 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <3 x half> [[LDEXP]] +; + %ldexp = tail call <3 x half> @_Z5ldexpDv3_Dhi(<3 x half> %x, i32 %y) + ret <3 x half> %ldexp +} + +define <4 x half> @test_ldexp_v4f16_i32(<4 x half> %x, i32 %y) { +; CHECK-LABEL: define <4 x half> @test_ldexp_v4f16_i32 +; CHECK-SAME: (<4 x half> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <4 x half> @llvm.ldexp.v4f16.v4i32(<4 x half> [[X]], <4 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <4 x half> [[LDEXP]] +; + %ldexp = tail call <4 x half> @_Z5ldexpDv4_Dhi(<4 x half> %x, i32 %y) + ret <4 x half> %ldexp +} + +define <8 x half> @test_ldexp_v8f16_i32(<8 x half> %x, i32 %y) { +; CHECK-LABEL: define <8 x half> @test_ldexp_v8f16_i32 +; CHECK-SAME: (<8 x half> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <8 x half> @llvm.ldexp.v8f16.v8i32(<8 x half> [[X]], <8 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <8 x half> [[LDEXP]] +; + %ldexp = tail call <8 x half> @_Z5ldexpDv8_Dhi(<8 x half> %x, i32 %y) + ret <8 x half> %ldexp +} + +define <16 x half> @test_ldexp_v16f16_i32(<16 x half> %x, i32 %y) { +; CHECK-LABEL: define <16 x half> @test_ldexp_v16f16_i32 +; CHECK-SAME: (<16 x half> [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[LDEXP:%.*]] = tail call <16 x half> @llvm.ldexp.v16f16.v16i32(<16 x half> [[X]], <16 x i32> [[DOTSPLAT]]) +; CHECK-NEXT: ret <16 x half> [[LDEXP]] +; + %ldexp = tail call <16 x half> @_Z5ldexpDv16_Dhi(<16 x half> %x, i32 %y) + ret <16 x half> %ldexp +}