Index: lib/IR/AutoUpgrade.cpp =================================================================== --- lib/IR/AutoUpgrade.cpp +++ lib/IR/AutoUpgrade.cpp @@ -225,6 +225,7 @@ Name.startswith("x86.avx512.mask.pshuf.d.") || Name.startswith("x86.avx512.mask.pshufl.w.") || Name.startswith("x86.avx512.mask.pshufh.w.") || + Name.startswith("x86.avx512.mask.vpermil.p") || Name.startswith("x86.avx512.mask.punpckl") || Name.startswith("x86.avx512.mask.punpckh") || Name.startswith("x86.avx512.mask.unpckl.") || @@ -1007,12 +1008,13 @@ Rep = nullptr; } else if (Name.startswith("llvm.x86.avx.vpermil.") || Name == "llvm.x86.sse2.pshuf.d" || + Name.startswith("llvm.x86.avx512.mask.vpermil.p") || Name.startswith("llvm.x86.avx512.mask.pshuf.d.")) { Value *Op0 = CI->getArgOperand(0); unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue(); VectorType *VecTy = cast(CI->getType()); unsigned NumElts = VecTy->getNumElements(); - // Calcuate the size of each index in the immediate. + // Calculate the size of each index in the immediate. unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); unsigned IdxMask = ((1 << IdxSize) - 1); Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1107,12 +1107,6 @@ return Res; } - AMDGPUOperand &Op = static_cast(*Operands.back()); - if (Op.isImm() && Op.Imm.IsFPImm) { - Error(Parser.getTok().getLoc(), "floating point operands not allowed with sext() modifier"); - return MatchOperand_ParseFail; - } - AMDGPUOperand::Modifiers Mods = {false, false, false}; if (Sext) { if (getLexer().isNot(AsmToken::RParen)) { @@ -1124,6 +1118,7 @@ } if (Mods.hasIntModifiers()) { + AMDGPUOperand &Op = static_cast(*Operands.back()); Op.setModifiers(Mods); } return MatchOperand_Success; Index: test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -317,6 +317,46 @@ ret <8 x i64> %res4 } +declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 = zmm0[0,1,3,2,5,4,6,6] +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6] +; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6] +; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vaddpd %zmm2, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3) + %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3) + %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1) + %res3 = fadd <8 x double> %res, %res1 + %res4 = fadd <8 x double> %res3, %res2 + ret <8 x double> %res4 +} + +declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: vpermilps {{.*#+}} zmm2 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] +; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] +; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vaddps %zmm2, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3) + %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3) + %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1) + %res3 = fadd <16 x float> %res, %res1 + %res4 = fadd <16 x float> %res3, %res2 + ret <16 x float> %res4 +} + declare <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32>, i32, <16 x i32>, i16) define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -5438,46 +5438,6 @@ ret <16 x float> %res2 } -declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8) - -define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6] -; CHECK-NEXT: vpermilpd {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6] -; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,3,2,5,4,6,6] -; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm1 -; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq - %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3) - %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3) - %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1) - %res3 = fadd <8 x double> %res, %res1 - %res4 = fadd <8 x double> %res3, %res2 - ret <8 x double> %res4 -} - -declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16) - -define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] -; CHECK-NEXT: vpermilps {{.*#+}} zmm2 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] -; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12] -; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 -; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq - %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3) - %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3) - %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1) - %res3 = fadd <16 x float> %res, %res1 - %res4 = fadd <16 x float> %res3, %res2 - ret <16 x float> %res4 -} - declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { Index: test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -138,6 +138,98 @@ ret <4 x double> %res4 } +declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x05,0xd0,0x06] +; CHECK-NEXT: ## ymm2 = ymm0[0,1,3,2] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06] +; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,3,2] +; CHECK-NEXT: vpermilpd $6, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06] +; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,1,3,2] +; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0] +; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3) + %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1) + %res3 = fadd <4 x double> %res, %res1 + %res4 = fadd <4 x double> %res2, %res3 + ret <4 x double> %res4 +} + +declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 ## encoding: [0x62,0xf3,0xfd,0x08,0x05,0xd0,0x01] +; CHECK-NEXT: ## xmm2 = xmm0[1,0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01] +; CHECK-NEXT: ## xmm1 {%k1} = xmm0[1,0] +; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01] +; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[1,0] +; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] +; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3) + %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1) + %res3 = fadd <2 x double> %res, %res1 + %res4 = fadd <2 x double> %res3, %res2 + ret <2 x double> %res4 +} + +declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0x7d,0x28,0x04,0xd0,0x16] +; CHECK-NEXT: ## ymm2 = ymm0[2,1,1,0,6,5,5,4] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16] +; CHECK-NEXT: ## ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4] +; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16] +; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4] +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] +; CHECK-NEXT: vaddps %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc2] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3) + %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1) + %res3 = fadd <8 x float> %res, %res1 + %res4 = fadd <8 x float> %res3, %res2 + ret <8 x float> %res4 +} + +declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 ## encoding: [0x62,0xf3,0x7d,0x08,0x04,0xd0,0x16] +; CHECK-NEXT: ## xmm2 = xmm0[2,1,1,0] +; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16] +; CHECK-NEXT: ## xmm1 {%k1} = xmm0[2,1,1,0] +; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16] +; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[2,1,1,0] +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0] +; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3) + %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1) + %res3 = fadd <4 x float> %res, %res1 + %res4 = fadd <4 x float> %res2, %res3 + ret <4 x float> %res4 +} + declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8) define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) { Index: test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics.ll +++ test/CodeGen/X86/avx512vl-intrinsics.ll @@ -5722,98 +5722,6 @@ ret <4 x i64> %res2 } -declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8) - -define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x16] -; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,3,2] -; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xd0,0x16] -; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,3,2] -; CHECK-NEXT: vpermilpd $22, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x05,0xc0,0x16] -; CHECK-NEXT: ## ymm0 = ymm0[0,1,3,2] -; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xca] -; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3) - %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3) - %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1) - %res3 = fadd <4 x double> %res, %res1 - %res4 = fadd <4 x double> %res2, %res3 - ret <4 x double> %res4 -} - -declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8) - -define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01] -; CHECK-NEXT: ## xmm1 {%k1} = xmm0[1,0] -; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x05,0xd0,0x01] -; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[1,0] -; CHECK-NEXT: vpermilpd $1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0xfd,0x08,0x05,0xc0,0x01] -; CHECK-NEXT: ## xmm0 = xmm0[1,0] -; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xca] -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3) - %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3) - %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1) - %res3 = fadd <2 x double> %res, %res1 - %res4 = fadd <2 x double> %res3, %res2 - ret <2 x double> %res4 -} - -declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8) - -define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpermilps $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16] -; CHECK-NEXT: ## ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4] -; CHECK-NEXT: vpermilps $22, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xd0,0x16] -; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4] -; CHECK-NEXT: vpermilps $22, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x04,0xc0,0x16] -; CHECK-NEXT: ## ymm0 = ymm0[2,1,1,0,6,5,5,4] -; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca] -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3) - %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3) - %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1) - %res3 = fadd <8 x float> %res, %res1 - %res4 = fadd <8 x float> %res3, %res2 - ret <8 x float> %res4 -} - -declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8) - -define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) { -; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT: vpermilps $22, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16] -; CHECK-NEXT: ## xmm1 {%k1} = xmm0[2,1,1,0] -; CHECK-NEXT: vpermilps $22, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x04,0xd0,0x16] -; CHECK-NEXT: ## xmm2 {%k1} {z} = xmm0[2,1,1,0] -; CHECK-NEXT: vpermilps $22, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x04,0xc0,0x16] -; CHECK-NEXT: ## xmm0 = xmm0[2,1,1,0] -; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca] -; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1] -; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3) - %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3) - %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1) - %res3 = fadd <4 x float> %res, %res1 - %res4 = fadd <4 x float> %res2, %res3 - ret <4 x float> %res4 -} - declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8) define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) { @@ -7218,9 +7126,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; CHECK-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A] -; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI472_0-4, kind: reloc_riprel_4byte +; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI468_0-4, kind: reloc_riprel_4byte ; CHECK-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A] -; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI472_1-4, kind: reloc_riprel_4byte +; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI468_1-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> , <8 x i32> , <8 x i32> zeroinitializer, i8 -1) ret <8 x i32> %res @@ -7251,9 +7159,9 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607] ; CHECK-NEXT: ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A] -; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI474_0-4, kind: reloc_riprel_4byte +; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI470_0-4, kind: reloc_riprel_4byte ; CHECK-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A] -; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI474_1-4, kind: reloc_riprel_4byte +; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI470_1-4, kind: reloc_riprel_4byte ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> , <2 x i64> , <2 x i64> zeroinitializer, i8 -1) ret <2 x i64> %res Index: test/MC/AMDGPU/vop1.s =================================================================== --- test/MC/AMDGPU/vop1.s +++ test/MC/AMDGPU/vop1.s @@ -374,3 +374,13 @@ // NOSICI: v_cos_f16 v1, v2 // VI: v_cos_f16_e32 v1, v2 ; encoding: [0x02,0x95,0x02,0x7e] v_cos_f16 v1, v2 + +//===----------------------------------------------------------------------===// +// Floating point literals +//===----------------------------------------------------------------------===// + +// GCN: v_mov_b32_e32 v0, 0.5 ; encoding: [0xf0,0x02,0x00,0x7e] +v_mov_b32 v0, 0.5 + +// GCN: v_mov_b32_e32 v0, 0x40480000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x00,0x48,0x40] +v_mov_b32 v0, 3.125 \ No newline at end of file Index: test/MC/AMDGPU/vop2.s =================================================================== --- test/MC/AMDGPU/vop2.s +++ test/MC/AMDGPU/vop2.s @@ -94,6 +94,14 @@ // SICI: v_mul_i32_i24_e64 v1, 3, s3 ; encoding: [0x01,0x00,0x12,0xd2,0x83,0x06,0x00,0x00] v_mul_i32_i24 v1, 3, s3 +// SICI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x4a] +// VI: v_add_i32_e32 v0, vcc, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x32] +v_add_i32 v0, vcc, 0.5, v0 + +// SICI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x4a,0x00,0x00,0x48,0x40] +// VI: v_add_i32_e32 v0, vcc, 0x40480000, v0 ; encoding: [0xff,0x00,0x00,0x32,0x00,0x00,0x48,0x40] +v_add_i32 v0, vcc, 3.125, v0 + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===//