Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -664,18 +664,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_ssse3_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_pabs_b_128 : GCCBuiltin<"__builtin_ia32_pabsb128">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_x86_ssse3_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_pabs_w_128 : GCCBuiltin<"__builtin_ia32_pabsw128">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; def int_x86_ssse3_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_ssse3_pabs_d_128 : GCCBuiltin<"__builtin_ia32_pabsd128">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// @@ -1841,88 +1835,6 @@ llvm_v8i32_ty], [IntrNoMem]>; } -// Absolute value ops -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx2_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb256">, - Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>; - def int_x86_avx2_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw256">, - Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>; - def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">, - Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_b_128 : - GCCBuiltin<"__builtin_ia32_pabsb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_b_256 : - GCCBuiltin<"__builtin_ia32_pabsb256_mask">, - Intrinsic<[llvm_v32i8_ty], - [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_b_512 : - GCCBuiltin<"__builtin_ia32_pabsb512_mask">, - Intrinsic<[llvm_v64i8_ty], - [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_d_128 : - GCCBuiltin<"__builtin_ia32_pabsd128_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_d_256 : - GCCBuiltin<"__builtin_ia32_pabsd256_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_d_512 : - GCCBuiltin<"__builtin_ia32_pabsd512_mask">, - Intrinsic<[llvm_v16i32_ty], - [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_q_128 : - GCCBuiltin<"__builtin_ia32_pabsq128_mask">, - Intrinsic<[llvm_v2i64_ty], - [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_q_256 : - GCCBuiltin<"__builtin_ia32_pabsq256_mask">, - Intrinsic<[llvm_v4i64_ty], - [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_q_512 : - GCCBuiltin<"__builtin_ia32_pabsq512_mask">, - Intrinsic<[llvm_v8i64_ty], - [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_w_128 : - GCCBuiltin<"__builtin_ia32_pabsw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_w_256 : - GCCBuiltin<"__builtin_ia32_pabsw256_mask">, - Intrinsic<[llvm_v16i16_ty], - [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem]>; - - def int_x86_avx512_mask_pabs_w_512 : - GCCBuiltin<"__builtin_ia32_pabsw512_mask">, - Intrinsic<[llvm_v32i16_ty], - [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrNoMem]>; -} - // Horizontal arithmetic ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw256">, Index: lib/IR/AutoUpgrade.cpp =================================================================== --- lib/IR/AutoUpgrade.cpp +++ lib/IR/AutoUpgrade.cpp @@ -72,7 +72,12 @@ // like to use this information to remove upgrade code for some older // intrinsics. It is currently undecided how we will determine that future // point. - if (Name.startswith("sse2.pcmpeq.") || // Added in 3.1 + if (Name=="ssse3.pabs.b.128" || // Added in 6.0 + Name=="ssse3.pabs.w.128" || // Added in 6.0 + Name=="ssse3.pabs.d.128" || // Added in 6.0 + Name.startswith("avx2.pabs.") || // Added in 6.0 + Name.startswith("avx512.mask.pabs.") || // Added in 6.0 + Name.startswith("sse2.pcmpeq.") || // Added in 3.1 Name.startswith("sse2.pcmpgt.") || // Added in 3.1 Name.startswith("avx2.pcmpeq.") || // Added in 3.1 Name.startswith("avx2.pcmpgt.") || // Added in 3.1 @@ -790,6 +795,20 @@ return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru); } +static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) { + Value *Op0 = CI.getArgOperand(0); + llvm::Type *Ty = Op0->getType(); + Value *Zero = llvm::Constant::getNullValue(Ty); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Op0, Zero); + Value *Neg = Builder.CreateNeg(Op0); + Value *Res = Builder.CreateSelect(Cmp, Op0, Neg); + + if (CI.getNumArgOperands() == 3) + Res = EmitX86Select(Builder,CI.getArgOperand(2), Res, CI.getArgOperand(1)); + + return Res; +} + static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI, ICmpInst::Predicate Pred) { Value *Op0 = CI.getArgOperand(0); @@ -1053,6 +1072,12 @@ } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) { unsigned Imm = cast(CI->getArgOperand(2))->getZExtValue(); Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); + } else if(IsX86 && (Name == "ssse3.pabs.b.128" || + Name == "ssse3.pabs.w.128" || + Name == "ssse3.pabs.d.128" || + Name.startswith("avx2.pabs") || + Name.startswith("avx512.mask.pabs"))) { + Rep = upgradeAbs(Builder, *CI); } else if (IsX86 && (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" || Name == "sse41.pmaxsd" || Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -372,9 +372,6 @@ X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0), - X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, ISD::ABS, 0), - X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, ISD::ABS, 0), - X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, ISD::ABS, 0), X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), @@ -794,18 +791,6 @@ X86ISD::FMULS_RND, 0), X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMULS_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0), - X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0), X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0), @@ -1639,9 +1624,6 @@ X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0), X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0), X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0), - X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, ISD::ABS, 0), - X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, ISD::ABS, 0), - X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, ISD::ABS, 0), X86_INTRINSIC_DATA(ssse3_phadd_d_128, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0), X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0), Index: test/CodeGen/X86/avx2-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/avx2-intrinsics-fast-isel.ll +++ test/CodeGen/X86/avx2-intrinsics-fast-isel.ll @@ -15,8 +15,10 @@ ; X64-NEXT: vpabsb %ymm0, %ymm0 ; X64-NEXT: retq %arg = bitcast <4 x i64> %a0 to <32 x i8> - %call = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %arg) - %res = bitcast <32 x i8> %call to <4 x i64> + %sub = sub <32 x i8> zeroinitializer, %arg + %cmp = icmp sgt <32 x i8> %arg, zeroinitializer + %sel = select <32 x i1> %cmp, <32 x i8> %arg, <32 x i8> %sub + %res = bitcast <32 x i8> %sel to <4 x i64> ret <4 x i64> %res } declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone @@ -32,8 +34,10 @@ ; X64-NEXT: vpabsw %ymm0, %ymm0 ; X64-NEXT: retq %arg = bitcast <4 x i64> %a0 to <16 x i16> - %call = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %arg) - %res = bitcast <16 x i16> %call to <4 x i64> + %sub = sub <16 x i16> zeroinitializer, %arg + %cmp = icmp sgt <16 x i16> %arg, zeroinitializer + %sel = select <16 x i1> %cmp, <16 x i16> %arg, <16 x i16> %sub + %res = bitcast <16 x i16> %sel to <4 x i64> ret <4 x i64> %res } declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone @@ -49,8 +53,10 @@ ; X64-NEXT: vpabsd %ymm0, %ymm0 ; X64-NEXT: retq %arg = bitcast <4 x i64> %a0 to <8 x i32> - %call = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %arg) - %res = bitcast <8 x i32> %call to <4 x i64> + %sub = sub <8 x i32> zeroinitializer, %arg + %cmp = icmp sgt <8 x i32> %arg, zeroinitializer + %sel = select <8 x i1> %cmp, <8 x i32> %arg, <8 x i32> %sub + %res = bitcast <8 x i32> %sel to <4 x i64> ret <4 x i64> %res } declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone Index: test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll =================================================================== --- test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll +++ test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll @@ -514,3 +514,34 @@ } declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone +define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) { +; AVX2-LABEL: test_x86_avx2_pabs_b: +; AVX2: ## BB#0: +; AVX2-NEXT: vpabsb %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x1c,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] + %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone + +define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) { +; AVX2-LABEL: test_x86_avx2_pabs_d: +; AVX2: ## BB#0: +; AVX2-NEXT: vpabsd %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x1e,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] + %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) { +; AVX2-LABEL: test_x86_avx2_pabs_w: +; AVX2: ## BB#0: +; AVX2-NEXT: vpabsw %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] + %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone + Index: test/CodeGen/X86/avx2-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/avx2-intrinsics-x86.ll +++ test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -621,55 +621,6 @@ } declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone - -define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) { -; AVX2-LABEL: test_x86_avx2_pabs_b: -; AVX2: ## BB#0: -; AVX2-NEXT: vpabsb %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x1c,0xc0] -; AVX2-NEXT: retl ## encoding: [0xc3] -; -; AVX512VL-LABEL: test_x86_avx2_pabs_b: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpabsb %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xc0] -; AVX512VL-NEXT: retl ## encoding: [0xc3] - %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1] - ret <32 x i8> %res -} -declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone - - -define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) { -; AVX2-LABEL: test_x86_avx2_pabs_d: -; AVX2: ## BB#0: -; AVX2-NEXT: vpabsd %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x1e,0xc0] -; AVX2-NEXT: retl ## encoding: [0xc3] -; -; AVX512VL-LABEL: test_x86_avx2_pabs_d: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpabsd %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0] -; AVX512VL-NEXT: retl ## encoding: [0xc3] - %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1] - ret <8 x i32> %res -} -declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone - - -define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) { -; AVX2-LABEL: test_x86_avx2_pabs_w: -; AVX2: ## BB#0: -; AVX2-NEXT: vpabsw %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] -; AVX2-NEXT: retl ## encoding: [0xc3] -; -; AVX512VL-LABEL: test_x86_avx2_pabs_w: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpabsw %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] -; AVX512VL-NEXT: retl ## encoding: [0xc3] - %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1] - ret <16 x i16> %res -} -declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone - - define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: test_x86_avx2_phadd_d: ; CHECK: ## BB#0: @@ -1340,18 +1291,18 @@ ; AVX2: ## BB#0: ; AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23] ; AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI90_0, kind: FK_Data_4 -; AVX2-NEXT: vpsravd LCPI90_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI90_1, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI87_0, kind: FK_Data_4 +; AVX2-NEXT: vpsravd LCPI87_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI87_1, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; AVX512VL-LABEL: test_x86_avx2_psrav_d_const: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vmovdqa LCPI90_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] +; AVX512VL-NEXT: vmovdqa LCPI87_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] ; AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI90_0, kind: FK_Data_4 -; AVX512VL-NEXT: vpsravd LCPI90_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI90_1, kind: FK_Data_4 +; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI87_0, kind: FK_Data_4 +; AVX512VL-NEXT: vpsravd LCPI87_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI87_1, kind: FK_Data_4 ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> , <4 x i32> ) ret <4 x i32> %res @@ -1377,18 +1328,18 @@ ; AVX2: ## BB#0: ; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI92_0, kind: FK_Data_4 -; AVX2-NEXT: vpsravd LCPI92_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI92_1, kind: FK_Data_4 +; AVX2-NEXT: ## fixup A - offset: 4, value: LCPI89_0, kind: FK_Data_4 +; AVX2-NEXT: vpsravd LCPI89_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; AVX2-NEXT: ## fixup A - offset: 5, value: LCPI89_1, kind: FK_Data_4 ; AVX2-NEXT: retl ## encoding: [0xc3] ; ; AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vmovdqa LCPI92_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] +; AVX512VL-NEXT: vmovdqa LCPI89_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI92_0, kind: FK_Data_4 -; AVX512VL-NEXT: vpsravd LCPI92_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI92_1, kind: FK_Data_4 +; AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI89_0, kind: FK_Data_4 +; AVX512VL-NEXT: vpsravd LCPI89_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI89_1, kind: FK_Data_4 ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> , <8 x i32> ) ret <8 x i32> %res Index: test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -3581,3 +3581,36 @@ %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) ret <8 x i64> %res } + +declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_512: +; CHECK: ## BB#0: +; CHECK-NEXT: vpabsd %zmm0, %zmm2 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpabsd %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_512: +; CHECK: ## BB#0: +; CHECK-NEXT: vpabsq %zmm0, %zmm2 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpabsq %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1) + %res2 = add <8 x i64> %res, %res1 + ret <8 x i64> %res2 +} + Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -766,38 +766,6 @@ declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) - declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16) - -define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpabsd %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vpabsd %zmm0, %zmm0 -; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) - %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1) - %res2 = add <16 x i32> %res, %res1 - ret <16 x i32> %res2 -} - -declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) - -define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpabsq %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vpabsq %zmm0, %zmm0 -; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) - %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1) - %res2 = add <8 x i64> %res, %res1 - ret <8 x i64> %res2 -} - define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) { ; CHECK-LABEL: test_vptestmq: ; CHECK: ## BB#0: Index: test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -3610,3 +3610,52 @@ } declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone + +declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { +; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_w_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpabsw %zmm0, %zmm2 +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vpabsw %zmm0, %zmm1 {%k1} +; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpabsw %zmm0, %zmm2 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpabsw %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl + %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1) + %res2 = add <32 x i16> %res, %res1 + ret <32 x i16> %res2 +} + +declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64) + +define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { +; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_b_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpabsb %zmm0, %zmm2 +; AVX512BW-NEXT: kmovq %rdi, %k1 +; AVX512BW-NEXT: vpabsb %zmm0, %zmm1 {%k1} +; AVX512BW-NEXT: vpaddb %zmm2, %zmm1, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vpabsb %zmm0, %zmm2 +; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vpabsb %zmm0, %zmm1 {%k1} +; AVX512F-32-NEXT: vpaddb %zmm2, %zmm1, %zmm0 +; AVX512F-32-NEXT: retl + %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1) + %res2 = add <64 x i8> %res, %res1 + ret <64 x i8> %res2 +} + Index: test/CodeGen/X86/avx512bw-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bw-intrinsics.ll +++ test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1177,56 +1177,6 @@ ret <64 x i8> %res2 } -declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32) - -define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { -; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_w_512: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovd %edi, %k1 -; AVX512BW-NEXT: vpabsw %zmm0, %zmm1 {%k1} -; AVX512BW-NEXT: vpabsw %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_w_512: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpabsw %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vpabsw %zmm0, %zmm0 -; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: retl - %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) - %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1) - %res2 = add <32 x i16> %res, %res1 - ret <32 x i16> %res2 -} - -declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64) - -define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { -; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_b_512: -; AVX512BW: ## BB#0: -; AVX512BW-NEXT: kmovq %rdi, %k1 -; AVX512BW-NEXT: vpabsb %zmm0, %zmm1 {%k1} -; AVX512BW-NEXT: vpabsb %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512BW-NEXT: retq -; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_b_512: -; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 -; AVX512F-32-NEXT: vpabsb %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vpabsb %zmm0, %zmm0 -; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: retl - %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) - %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1) - %res2 = add <64 x i8> %res, %res1 - ret <64 x i8> %res2 -} - declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { Index: test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -3445,3 +3445,68 @@ } declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone + +declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16) + +define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pabs_b_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpabsb %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xd0] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpabsb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8] +; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) + %res2 = add <16 x i8> %res, %res1 + ret <16 x i8> %res2 +} + +declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32) + +define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pabs_b_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpabsb %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xd0] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpabsb %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8] +; CHECK-NEXT: vpaddb %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1) + %res2 = add <32 x i8> %res, %res1 + ret <32 x i8> %res2 +} + +declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pabs_w_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpabsw %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xd0] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpabsw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8] +; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) + %res2 = add <8 x i16> %res, %res1 + ret <8 x i16> %res2 +} + +declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pabs_w_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpabsw %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xd0] +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpabsw %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8] +; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1) + %res2 = add <16 x i16> %res, %res1 + ret <16 x i16> %res2 +} + Index: test/CodeGen/X86/avx512bwvl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -1964,70 +1964,6 @@ ret <16 x i16> %res2 } -declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16) - -define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pabs_b_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vpabsb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8] -; CHECK-NEXT: vpabsb %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] -; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) - %res2 = add <16 x i8> %res, %res1 - ret <16 x i8> %res2 -} - -declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32) - -define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pabs_b_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vpabsb %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8] -; CHECK-NEXT: vpabsb %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xc0] -; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) - %res1 = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1) - %res2 = add <32 x i8> %res, %res1 - ret <32 x i8> %res2 -} - -declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8) - -define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pabs_w_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vpabsw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8] -; CHECK-NEXT: vpabsw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) - %res2 = add <8 x i16> %res, %res1 - ret <8 x i16> %res2 -} - -declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16) - -define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pabs_w_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vpabsw %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8] -; CHECK-NEXT: vpabsw %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xc0] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) - %res1 = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1) - %res2 = add <16 x i16> %res, %res1 - ret <16 x i16> %res2 -} - declare <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) define <8 x i16>@test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { Index: test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -5767,3 +5767,68 @@ %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) ret <8 x i32> %res } + +declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8) + +define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpabsq %xmm0, %xmm2 ## encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xd0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpabsq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8] +; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) + %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) + %res2 = add <2 x i64> %res, %res1 + ret <2 x i64> %res2 +} + +declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8) + +define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpabsq %ymm0, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xd0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpabsq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8] +; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) + %res2 = add <4 x i64> %res, %res1 + ret <4 x i64> %res2 +} + +declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpabsd %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xd0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpabsd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8] +; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) + %res2 = add <4 x i32> %res, %res1 + ret <4 x i32> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpabsd %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xd0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpabsd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8] +; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + Index: test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics.ll +++ test/CodeGen/X86/avx512vl-intrinsics.ll @@ -1097,70 +1097,6 @@ ret <8 x float> %res2 } -declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8) - -define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpabsq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8] -; CHECK-NEXT: vpabsq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) - %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) - %res2 = add <2 x i64> %res, %res1 - ret <2 x i64> %res2 -} - -declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8) - -define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpabsq %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8] -; CHECK-NEXT: vpabsq %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) - %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) - %res2 = add <4 x i64> %res, %res1 - ret <4 x i64> %res2 -} - -declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8) - -define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpabsd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8] -; CHECK-NEXT: vpabsd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] -; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) - %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) - %res2 = add <4 x i32> %res, %res1 - ret <4 x i32> %res2 -} - -declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8) - -define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpabsd %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8] -; CHECK-NEXT: vpabsd %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0] -; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) - %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) - %res2 = add <8 x i32> %res, %res1 - ret <8 x i32> %res2 -} - declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { Index: test/CodeGen/X86/combine-abs.ll =================================================================== --- test/CodeGen/X86/combine-abs.ll +++ test/CodeGen/X86/combine-abs.ll @@ -81,11 +81,20 @@ ; fold (abs x) -> x iff not-negative define <16 x i8> @combine_v16i8_abs_constant(<16 x i8> %a) { -; CHECK-LABEL: combine_v16i8_abs_constant: -; CHECK: # BB#0: -; CHECK-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; CHECK-NEXT: vpabsb %xmm0, %xmm0 -; CHECK-NEXT: retq +; AVX2-LABEL: combine_v16i8_abs_constant: +; AVX2: # BB#0: +; AVX2-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: combine_v16i8_abs_constant: +; AVX512F: # BB#0: +; AVX512F-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: combine_v16i8_abs_constant: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: retq %1 = insertelement <16 x i8> undef, i8 15, i32 0 %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer %3 = and <16 x i8> %a, %2 Index: test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll =================================================================== --- test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll +++ test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll @@ -15,8 +15,10 @@ ; X64-NEXT: pabsb %xmm0, %xmm0 ; X64-NEXT: retq %arg = bitcast <2 x i64> %a0 to <16 x i8> - %call = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %arg) - %res = bitcast <16 x i8> %call to <2 x i64> + %sub = sub <16 x i8> zeroinitializer, %arg + %cmp = icmp sgt <16 x i8> %arg, zeroinitializer + %sel = select <16 x i1> %cmp, <16 x i8> %arg, <16 x i8> %sub + %res = bitcast <16 x i8> %sel to <2 x i64> ret <2 x i64> %res } declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone @@ -32,8 +34,10 @@ ; X64-NEXT: pabsw %xmm0, %xmm0 ; X64-NEXT: retq %arg = bitcast <2 x i64> %a0 to <8 x i16> - %call = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %arg) - %res = bitcast <8 x i16> %call to <2 x i64> + %sub = sub <8 x i16> zeroinitializer, %arg + %cmp = icmp sgt <8 x i16> %arg, zeroinitializer + %sel = select <8 x i1> %cmp, <8 x i16> %arg, <8 x i16> %sub + %res = bitcast <8 x i16> %sel to <2 x i64> ret <2 x i64> %res } declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone @@ -49,8 +53,10 @@ ; X64-NEXT: pabsd %xmm0, %xmm0 ; X64-NEXT: retq %arg = bitcast <2 x i64> %a0 to <4 x i32> - %call = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %arg) - %res = bitcast <4 x i32> %call to <2 x i64> + %sub = sub <4 x i32> zeroinitializer, %arg + %cmp = icmp sgt <4 x i32> %arg, zeroinitializer + %sel = select <4 x i1> %cmp, <4 x i32> %arg, <4 x i32> %sub + %res = bitcast <4 x i32> %sel to <2 x i64> ret <2 x i64> %res } declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone