Skip to content

Commit 4339abe

Browse files
committedMar 12, 2015
[X86, AVX2] Replace inserti128 and extracti128 intrinsics with generic shuffles
This should complete the job started in r231794 and continued in r232045: We want to replace as much custom x86 shuffling via intrinsics as possible because pushing the code down the generic shuffle optimization path allows for better codegen and less complexity in LLVM. AVX2 introduced proper integer variants of the hacked integer insert/extract C intrinsics that were created for this same functionality with AVX1. This should complete the removal of insert/extract128 intrinsics. The Clang precursor patch for this change was checked in at r232109. llvm-svn: 232120
1 parent 7fde301 commit 4339abe

File tree

6 files changed

+30
-52
lines changed

6 files changed

+30
-52
lines changed
 

‎llvm/include/llvm/IR/IntrinsicsX86.td

-7
Original file line numberDiff line numberDiff line change
@@ -1759,13 +1759,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
17591759

17601760
// Vector extract and insert
17611761
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1762-
def int_x86_avx2_vextracti128 : GCCBuiltin<"__builtin_ia32_extract128i256">,
1763-
Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty,
1764-
llvm_i8_ty], [IntrNoMem]>;
1765-
def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">,
1766-
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
1767-
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
1768-
17691762
def int_x86_avx512_mask_vextractf32x4_512 :
17701763
GCCBuiltin<"__builtin_ia32_extractf32x4_mask">,
17711764
Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i8_ty,

‎llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

-22
Original file line numberDiff line numberDiff line change
@@ -4966,28 +4966,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
49664966
setValue(&I, Res);
49674967
return nullptr;
49684968
}
4969-
case Intrinsic::x86_avx2_vinserti128: {
4970-
EVT DestVT = TLI.getValueType(I.getType());
4971-
EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
4972-
uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
4973-
ElVT.getVectorNumElements();
4974-
Res =
4975-
DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT,
4976-
getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
4977-
DAG.getConstant(Idx, TLI.getVectorIdxTy()));
4978-
setValue(&I, Res);
4979-
return nullptr;
4980-
}
4981-
case Intrinsic::x86_avx2_vextracti128: {
4982-
EVT DestVT = TLI.getValueType(I.getType());
4983-
uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
4984-
DestVT.getVectorNumElements();
4985-
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT,
4986-
getValue(I.getArgOperand(0)),
4987-
DAG.getConstant(Idx, TLI.getVectorIdxTy()));
4988-
setValue(&I, Res);
4989-
return nullptr;
4990-
}
49914969
case Intrinsic::convertff:
49924970
case Intrinsic::convertfsi:
49934971
case Intrinsic::convertfui:

‎llvm/lib/IR/AutoUpgrade.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,11 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
161161
Name == "x86.avx.vinsertf128.pd.256" ||
162162
Name == "x86.avx.vinsertf128.ps.256" ||
163163
Name == "x86.avx.vinsertf128.si.256" ||
164+
Name == "x86.avx2.vinserti128" ||
164165
Name == "x86.avx.vextractf128.pd.256" ||
165166
Name == "x86.avx.vextractf128.ps.256" ||
166167
Name == "x86.avx.vextractf128.si.256" ||
168+
Name == "x86.avx2.vextracti128" ||
167169
Name == "x86.avx.movnt.dq.256" ||
168170
Name == "x86.avx.movnt.pd.256" ||
169171
Name == "x86.avx.movnt.ps.256" ||
@@ -634,7 +636,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
634636
Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
635637
} else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
636638
Name == "llvm.x86.avx.vinsertf128.ps.256" ||
637-
Name == "llvm.x86.avx.vinsertf128.si.256") {
639+
Name == "llvm.x86.avx.vinsertf128.si.256" ||
640+
Name == "llvm.x86.avx2.vinserti128") {
638641
Value *Op0 = CI->getArgOperand(0);
639642
Value *Op1 = CI->getArgOperand(1);
640643
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
@@ -679,7 +682,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
679682
Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
680683
} else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
681684
Name == "llvm.x86.avx.vextractf128.ps.256" ||
682-
Name == "llvm.x86.avx.vextractf128.si.256") {
685+
Name == "llvm.x86.avx.vextractf128.si.256" ||
686+
Name == "llvm.x86.avx2.vextracti128") {
683687
Value *Op0 = CI->getArgOperand(0);
684688
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
685689
VectorType *VecTy = cast<VectorType>(CI->getType());

‎llvm/lib/Target/X86/X86InstrSSE.td

+1-3
Original file line numberDiff line numberDiff line change
@@ -8595,9 +8595,7 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
85958595
//
85968596
def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
85978597
(ins VR256:$src1, u8imm:$src2),
8598-
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8599-
[(set VR128:$dst,
8600-
(int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
8598+
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
86018599
Sched<[WriteShuffle256]>, VEX, VEX_L;
86028600
let hasSideEffects = 0, mayStore = 1 in
86038601
def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),

‎llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll

+22-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
1+
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx2 | FileCheck %s
22

33
define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
44
; CHECK: vpblendw
@@ -62,3 +62,24 @@ define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
6262
ret <4 x i64> %res
6363
}
6464
declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
65+
66+
67+
define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
68+
; CHECK-LABEL: test_x86_avx2_vextracti128:
69+
; CHECK: vextracti128
70+
71+
%res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7)
72+
ret <2 x i64> %res
73+
}
74+
declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
75+
76+
77+
define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
78+
; CHECK-LABEL: test_x86_avx2_vinserti128:
79+
; CHECK: vinserti128
80+
81+
%res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7)
82+
ret <4 x i64> %res
83+
}
84+
declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
85+

‎llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll

+1-17
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
1+
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx2 | FileCheck %s
22

33
define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
44
; CHECK: vpackssdw
@@ -775,22 +775,6 @@ define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
775775
declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly
776776

777777

778-
define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
779-
; CHECK: vextracti128
780-
%res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
781-
ret <2 x i64> %res
782-
}
783-
declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
784-
785-
786-
define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
787-
; CHECK: vinserti128
788-
%res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) ; <<4 x i64>> [#uses=1]
789-
ret <4 x i64> %res
790-
}
791-
declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
792-
793-
794778
define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) {
795779
; CHECK: vpmaskmovq
796780
%res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]

0 commit comments

Comments
 (0)