Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -213,6 +213,7 @@ VGETMANT, // FP Scale. SCALEF, + SCALEFS, // Integer add/sub with unsigned saturation. ADDUS, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -21844,6 +21844,7 @@ case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND"; case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND"; case X86ISD::SCALEF: return "X86ISD::SCALEF"; + case X86ISD::SCALEFS: return "X86ISD::SCALEFS"; case X86ISD::ADDS: return "X86ISD::ADDS"; case X86ISD::SUBS: return "X86ISD::SUBS"; case X86ISD::AVG: return "X86ISD::AVG"; Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -3792,18 +3792,18 @@ }//let mayLoad = 1 } -multiclass avx512_fp_scalef_all opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> { +multiclass avx512_fp_scalef_all opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> { defm PSZ : avx512_fp_scalef_p, avx512_fp_round_packed, EVEX_V512, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp_scalef_p, avx512_fp_round_packed, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm SSZ128 : avx512_fp_scalef_scalar, - avx512_fp_scalar_round, + defm SSZ128 : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, EVEX_4V,EVEX_CD8<32, CD8VT1>; - defm SDZ128 : avx512_fp_scalef_scalar, - avx512_fp_scalar_round, + defm SDZ128 : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; // Define only if AVX512VL feature is present. @@ -3818,7 +3818,7 @@ EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } -defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD; +defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef,X86scalefs>, T8PD; //===----------------------------------------------------------------------===// // AVX-512 VPTESTM instructions Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -457,6 +457,7 @@ def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>; def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>; def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>; +def X86scalefs : SDNode<"X86ISD::SCALEFS", SDTFPBinOpRound>; def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>; def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>; def X86fsqrtRnds : SDNode<"X86ISD::FSQRT_RND", STDFp2SrcRm>; Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -1653,9 +1653,9 @@ X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::SCALEF, 0), X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::SCALEF, 0), + X86ISD::SCALEFS, 0), X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::SCALEF, 0), + X86ISD::SCALEFS, 0), X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4, INTR_TYPE_3OP_IMM8_MASK, X86ISD::SHUF128, 0), X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4_256, INTR_TYPE_3OP_IMM8_MASK, Index: test/CodeGen/X86/scalefIntrin.ll =================================================================== --- test/CodeGen/X86/scalefIntrin.ll +++ test/CodeGen/X86/scalefIntrin.ll @@ -0,0 +1,49 @@ +; RUN: llc < %s | FileCheck %s + +; Function Attrs: nounwind readnone +declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) #1 + +; Function Attrs: nounwind readnone +declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) #1 + +; Function Attrs: nounwind readnone +declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) #1 + +; Function Attrs: nounwind readnone +declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) #1 + + +define i32 @scalefIntrin() #0 { +entry: + ; CHECK: vscalefss %xmm0, %xmm0, %xmm0 + ; CHECK: vscalefsd %xmm0, %xmm0, %xmm1 + ; CHECK: vscalefps %xmm0, %xmm0, %xmm2 + ; CHECK: vscalefpd %xmm0, %xmm0, %xmm3 + %0 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> undef, <4 x float> undef, <4 x float> zeroinitializer, i8 -1, i32 4) #2 + %1 = bitcast <4 x float> %0 to <2 x double> + %2 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> undef, <2 x double> undef, <2 x double> zeroinitializer, i8 -1, i32 4) #2 + %3 = bitcast <2 x double> %2 to <4 x float> + %4 = tail call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> undef, <4 x float> undef, <4 x float> zeroinitializer, i8 -1) #2 + %5 = bitcast <4 x float> %4 to <2 x double> + %6 = tail call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> undef, <2 x double> undef, <2 x double> zeroinitializer, i8 -1) #2 + %7 = bitcast <2 x double> %6 to <4 x float> + %vecext = extractelement <2 x double> %1, i32 0 + %vecext4 = extractelement <4 x float> %3, i32 0 + %conv = fpext float %vecext4 to double + %mul = fmul double %vecext, %conv + %vecext5 = extractelement <2 x double> %5, i32 0 + %mul6 = fmul double %vecext5, %mul + %vecext7 = extractelement <4 x float> %7, i32 0 + %conv8 = fpext float %vecext7 to double + %mul9 = fmul double %mul6, %conv8 + %conv10 = fptosi double %mul9 to i32 + ret i32 %conv10 +} + +attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="skylake-avx512" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+mpx,+pclmul,+pcommit,+pku,+popcnt,+rdrnd,+rdseed,+rtm,+sgx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.9.0 (cfe/trunk 268481)"}