Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -213,6 +213,7 @@ VGETMANT, // FP Scale. SCALEF, + SCALEFS, // Integer add/sub with unsigned saturation. ADDUS, Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -21923,6 +21923,7 @@ case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND"; case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND"; case X86ISD::SCALEF: return "X86ISD::SCALEF"; + case X86ISD::SCALEFS: return "X86ISD::SCALEFS"; case X86ISD::ADDS: return "X86ISD::ADDS"; case X86ISD::SUBS: return "X86ISD::SUBS"; case X86ISD::AVG: return "X86ISD::AVG"; Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -3827,18 +3827,18 @@ }//let mayLoad = 1 } -multiclass avx512_fp_scalef_all opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> { +multiclass avx512_fp_scalef_all opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> { defm PSZ : avx512_fp_scalef_p, avx512_fp_round_packed, EVEX_V512, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp_scalef_p, avx512_fp_round_packed, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm SSZ128 : avx512_fp_scalef_scalar, - avx512_fp_scalar_round, + defm SSZ128 : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, EVEX_4V,EVEX_CD8<32, CD8VT1>; - defm SDZ128 : avx512_fp_scalef_scalar, - avx512_fp_scalar_round, + defm SDZ128 : avx512_fp_scalef_scalar, + avx512_fp_scalar_round, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; // Define only if AVX512VL feature is present. @@ -3853,7 +3853,7 @@ EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } -defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD; +defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD; //===----------------------------------------------------------------------===// // AVX-512 VPTESTM instructions Index: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -460,6 +460,7 @@ def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>; def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>; def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>; +def X86scalefs : SDNode<"X86ISD::SCALEFS", SDTFPBinOpRound>; def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>; def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>; def X86fsqrtRnds : SDNode<"X86ISD::FSQRT_RND", SDTFPBinOpRound>; Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -1659,9 +1659,9 @@ X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::SCALEF, 0), X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::SCALEF, 0), + X86ISD::SCALEFS, 0), X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::SCALEF, 0), + X86ISD::SCALEFS, 0), X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4, INTR_TYPE_3OP_IMM8_MASK, X86ISD::SHUF128, 0), X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4_256, INTR_TYPE_3OP_IMM8_MASK, Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -5616,39 +5616,6 @@ ret <16 x i32> %res2 } - -declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32) -define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { -; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss: -; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vscalefss %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 -; CHECK-NEXT: retq - %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4) - %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8) - %res2 = fadd <4 x float> %res, %res1 - ret <4 x float> %res2 -} - -declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32) -define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { -; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd: -; CHECK: ## BB#0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vscalefsd %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 -; CHECK-NEXT: retq - %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4) - %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8) - %res2 = fadd <2 x double> %res, %res1 - ret <2 x double> %res2 -} - declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { Index: llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-scalarIntrinsics.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s + +declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32) +define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { + ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss: + ; CHECK: vscalefss %xmm1, %xmm0, %xmm2 {%k1} + ; CHECK: vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0 + %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32) +define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { + ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd: + ; CHECK: vscalefsd %xmm1, %xmm0, %xmm2 {%k1} + ; CHECK: vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0 + %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} Index: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -4243,7 +4243,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vscalefpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x2c,0xd1] -; CHECK-NEXT: vscalefsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x2d,0xc1] +; CHECK-NEXT: vscalefpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x2c,0xc1] ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) @@ -4275,7 +4275,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vscalefps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2c,0xd1] -; CHECK-NEXT: vscalefss %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2d,0xc1] +; CHECK-NEXT: vscalefps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2c,0xc1] ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)