diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -122,6 +122,9 @@ /// specified node are ISD::UNDEF. bool allOperandsUndef(const SDNode *N); +/// Return true if the specified node is FREEZE(UNDEF). +bool isFreezeUndef(const SDNode *N); + } // end namespace ISD //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -175,6 +175,9 @@ SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>, SDTCisVec<1>, SDTCisOpSmallerThanOp<1, 0> ]>; +def SDTFreeze : SDTypeProfile<1, 1, [ + SDTCisSameAs<0, 1> +]>; def SDTSetCC : SDTypeProfile<1, 3, [ // setcc SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> @@ -453,6 +456,7 @@ def trunc : SDNode<"ISD::TRUNCATE" , SDTIntTruncOp>; def bitconvert : SDNode<"ISD::BITCAST" , SDTUnaryOp>; def addrspacecast : SDNode<"ISD::ADDRSPACECAST", SDTUnaryOp>; +def freeze : SDNode<"ISD::FREEZE" , SDTFreeze>; def extractelt : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTVecExtract>; def insertelt : SDNode<"ISD::INSERT_VECTOR_ELT", SDTVecInsert>; @@ -1300,6 +1304,9 @@ let ScalarMemoryVT = i16; } +// A helper for matching undef or freeze undef +def undef_or_freeze_undef : PatFrags<(ops), [(undef), (freeze undef)]>; + // TODO: Split these into volatile and unordered flavors to enable // selectively legal optimizations for each. (See D66309) def simple_load : PatFrag<(ops node:$ptr), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -300,6 +300,10 @@ return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); }); } +bool ISD::isFreezeUndef(const SDNode *N) { + return N->getOpcode() == ISD::FREEZE && N->getOperand(0).isUndef(); +} + bool ISD::matchUnaryPredicate(SDValue Op, std::function Match, bool AllowUndefs) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -11461,6 +11461,7 @@ ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide"); unsigned NumOperands = Op.getNumOperands(); + unsigned NumFreezeUndef = 0; unsigned NumZero = 0; unsigned NumNonZero = 0; unsigned NonZeros = 0; @@ -11468,7 +11469,9 @@ SDValue SubVec = Op.getOperand(i); if (SubVec.isUndef()) continue; - if (ISD::isBuildVectorAllZeros(SubVec.getNode())) + if (ISD::isFreezeUndef(SubVec.getNode()) && SubVec.hasOneUse()) + ++NumFreezeUndef; + else if (ISD::isBuildVectorAllZeros(SubVec.getNode())) ++NumZero; else { assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range. @@ -11490,7 +11493,8 @@ // Otherwise, build it up through insert_subvectors. SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) - : DAG.getUNDEF(ResVT); + : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT)) + : DAG.getUNDEF(ResVT)); MVT SubVT = Op.getOperand(0).getSimpleValueType(); unsigned NumSubElems = SubVT.getVectorNumElements(); diff --git a/llvm/lib/Target/X86/X86InstrVecCompiler.td b/llvm/lib/Target/X86/X86InstrVecCompiler.td --- a/llvm/lib/Target/X86/X86InstrVecCompiler.td +++ b/llvm/lib/Target/X86/X86InstrVecCompiler.td @@ -68,7 +68,7 @@ def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))), (subVT (EXTRACT_SUBREG RC:$src, subIdx))>; - def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))), + def : Pat<(VT (insert_subvector undef_or_freeze_undef, subRC:$src, (iPTR 0))), (VT (INSERT_SUBREG (IMPLICIT_DEF), subRC:$src, subIdx))>; } diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll --- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -258,7 +258,6 @@ ; CHECK-LABEL: test_mm256_castpd128_pd256_freeze: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %a1 = freeze <2 x double> poison %res = shufflevector <2 x double> %a0, <2 x double> %a1, <4 x i32> @@ -304,7 +303,6 @@ ; CHECK-LABEL: test_mm256_castps128_ps256_freeze: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %a1 = freeze <4 x float> poison %res = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> @@ -334,7 +332,6 @@ ; CHECK-LABEL: test_mm256_castsi128_si256_freeze: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; CHECK-NEXT: ret{{[l|q]}} %a1 = freeze <2 x i64> poison %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <4 x i32> diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -1033,17 +1033,10 @@ define <4 x double> @test_mm256_castpd128_pd256_freeze(<2 x double> %a0) nounwind { -; AVX-LABEL: test_mm256_castpd128_pd256_freeze: -; AVX: # %bb.0: -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] -; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] -; -; AVX512VL-LABEL: test_mm256_castpd128_pd256_freeze: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] -; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; CHECK-LABEL: test_mm256_castpd128_pd256_freeze: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %a1 = freeze <2 x double> poison %res = shufflevector <2 x double> %a0, <2 x double> %a1, <4 x i32> ret <4 x double> %res @@ -1051,17 +1044,10 @@ define <8 x float> @test_mm256_castps128_ps256_freeze(<4 x float> %a0) nounwind { -; AVX-LABEL: test_mm256_castps128_ps256_freeze: -; AVX: # %bb.0: -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] -; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] -; -; AVX512VL-LABEL: test_mm256_castps128_ps256_freeze: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] -; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; CHECK-LABEL: test_mm256_castps128_ps256_freeze: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %a1 = freeze <4 x float> poison %res = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> ret <8 x float> %res @@ -1069,17 +1055,10 @@ define <4 x i64> @test_mm256_castsi128_si256_freeze(<2 x i64> %a0) nounwind { -; AVX-LABEL: test_mm256_castsi128_si256_freeze: -; AVX: # %bb.0: -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] -; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] -; -; AVX512VL-LABEL: test_mm256_castsi128_si256_freeze: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01] -; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; CHECK-LABEL: test_mm256_castsi128_si256_freeze: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %a1 = freeze <2 x i64> poison %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <4 x i32> ret <4 x i64> %res diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -7510,7 +7510,6 @@ ; CHECK-LABEL: test_mm256_castpd256_pd256_freeze: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ; CHECK-NEXT: ret{{[l|q]}} %a1 = freeze <4 x double> poison %res = shufflevector <4 x double> %a0, <4 x double> %a1, <8 x i32> @@ -7536,7 +7535,6 @@ ; CHECK-LABEL: test_mm256_castps256_ps512_freeze: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ; CHECK-NEXT: ret{{[l|q]}} %a1 = freeze <8 x float> poison %res = shufflevector <8 x float> %a0, <8 x float> %a1, <16x i32> @@ -7562,7 +7560,6 @@ ; CHECK-LABEL: test_mm512_castsi256_si512_pd256_freeze: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ; CHECK-NEXT: ret{{[l|q]}} %a1 = freeze <4 x i64> poison %res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <8 x i32> diff --git a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll @@ -1221,7 +1221,6 @@ ; CHECK-LABEL: test_mm256_castph128_ph256_freeze: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; CHECK-NEXT: retq %a1 = freeze <8 x half> poison %res = shufflevector <8 x half> %a0, <8 x half> %a1, <16 x i32> @@ -1247,7 +1246,6 @@ ; CHECK-LABEL: test_mm512_castph256_ph512_freeze: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ; CHECK-NEXT: retq %a1 = freeze <16 x half> poison %res = shufflevector <16 x half> %a0, <16 x half> %a1, <32 x i32>