diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4370,11 +4370,16 @@ for (SDValue Op : Elts) SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); - if (SVT.bitsGT(VT.getScalarType())) - for (SDValue &Op : Elts) - Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT) - ? DAG.getZExtOrTrunc(Op, DL, SVT) - : DAG.getSExtOrTrunc(Op, DL, SVT); + if (SVT.bitsGT(VT.getScalarType())) { + for (SDValue &Op : Elts) { + if (Op.isUndef()) + Op = DAG.getUNDEF(SVT); + else + Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, DL, SVT) + : DAG.getSExtOrTrunc(Op, DL, SVT); + } + } SDValue V = DAG.getBuildVector(VT, DL, Elts); NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG); diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-undef-not-zero.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-undef-not-zero.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-undef-not-zero.ll @@ -0,0 +1,32 @@ +; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s + +; Check that we don't generate lots of vinserts (of 0 that should be undef). +; CHECK: vinsert +; CHECK: vinsert +; CHECK-NOT: vinsert + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dllexport void @f0(i8* noalias align 128 %a0) #0 { +b0: + %v0 = bitcast i8* %a0 to i32* + %v1 = getelementptr inbounds i32, i32* %v0, i32 undef + %v2 = bitcast i32* %v1 to <7 x i32>* + br label %b1 + +b1: ; preds = %b0 + %v3 = load i8, i8* undef, align 1 + %v4 = insertelement <7 x i8> undef, i8 %v3, i32 0 + %v5 = shufflevector <7 x i8> %v4, <7 x i8> undef, <7 x i32> zeroinitializer + %v6 = zext <7 x i8> %v5 to <7 x i32> + %v7 = load <7 x i8>, <7 x i8>* undef, align 1 + %v8 = zext <7 x i8> %v7 to <7 x i32> + %v9 = mul nsw <7 x i32> %v6, %v8 + %v10 = add nsw <7 x i32> %v9, zeroinitializer + store <7 x i32> %v10, <7 x i32>* %v2, align 4 + ret void +} + +attributes #0 = { nounwind "target-cpu"="hexagonv66" "target-features"="+hvx,+hvx-length128b" } + diff --git a/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll b/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll --- a/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-cmp-sub128.ll @@ -101,7 +101,6 @@ ; AVX512-32-NEXT: kmovw %eax, %k0 ; AVX512-32-NEXT: vcomiss 8(%ebp), %xmm2 ; AVX512-32-NEXT: seta %al -; AVX512-32-NEXT: andl $1, %eax ; AVX512-32-NEXT: kmovw %eax, %k1 ; AVX512-32-NEXT: kandw %k0, %k1, %k0 ; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] @@ -122,7 +121,6 @@ ; AVX512-64-NEXT: kmovw %eax, %k0 ; AVX512-64-NEXT: vcomiss %xmm3, %xmm2 ; AVX512-64-NEXT: seta %al -; AVX512-64-NEXT: andl $1, %eax ; AVX512-64-NEXT: kmovw %eax, %k1 ; AVX512-64-NEXT: kandw %k0, %k1, %k0 ; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] @@ -148,7 +146,6 @@ ; AVX512F-32-NEXT: kmovw %eax, %k0 ; AVX512F-32-NEXT: vcomiss 8(%ebp), %xmm2 ; AVX512F-32-NEXT: seta %al -; AVX512F-32-NEXT: andl $1, %eax ; AVX512F-32-NEXT: kmovw %eax, %k1 ; AVX512F-32-NEXT: kandw %k0, %k1, %k0 ; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] @@ -173,7 +170,6 @@ ; AVX512F-64-NEXT: kmovw %eax, %k0 ; AVX512F-64-NEXT: vcomiss %xmm3, %xmm2 ; AVX512F-64-NEXT: seta %al -; AVX512F-64-NEXT: andl $1, %eax ; AVX512F-64-NEXT: kmovw %eax, %k1 ; AVX512F-64-NEXT: kandw %k0, %k1, %k0 ; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] @@ -299,7 +295,6 @@ ; AVX512-32-NEXT: sete %cl ; AVX512-32-NEXT: testb %al, %cl ; AVX512-32-NEXT: setne %al -; AVX512-32-NEXT: andl $1, %eax ; AVX512-32-NEXT: kmovw %eax, %k1 ; AVX512-32-NEXT: kandw %k0, %k1, %k0 ; AVX512-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] @@ -319,27 +314,26 @@ ; ; AVX512-64-LABEL: test_v2f32_oeq_q: ; AVX512-64: # %bb.0: -; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3] -; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] -; AVX512-64-NEXT: vucomiss %xmm4, %xmm5 +; AVX512-64-NEXT: vucomiss %xmm3, %xmm2 ; AVX512-64-NEXT: setnp %al ; AVX512-64-NEXT: sete %cl ; AVX512-64-NEXT: testb %al, %cl ; AVX512-64-NEXT: setne %al ; AVX512-64-NEXT: kmovw %eax, %k0 -; AVX512-64-NEXT: kshiftlw $15, %k0, %k0 -; AVX512-64-NEXT: kshiftrw $14, %k0, %k0 +; AVX512-64-NEXT: movw $-3, %ax +; AVX512-64-NEXT: kmovw %eax, %k1 +; AVX512-64-NEXT: kandw %k1, %k0, %k0 +; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] +; AVX512-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX512-64-NEXT: vucomiss %xmm3, %xmm2 ; AVX512-64-NEXT: setnp %al ; AVX512-64-NEXT: sete %cl ; AVX512-64-NEXT: testb %al, %cl ; AVX512-64-NEXT: setne %al -; AVX512-64-NEXT: andl $1, %eax ; AVX512-64-NEXT: kmovw %eax, %k1 -; AVX512-64-NEXT: movw $-3, %ax -; AVX512-64-NEXT: kmovw %eax, %k2 -; AVX512-64-NEXT: kandw %k2, %k1, %k1 -; AVX512-64-NEXT: korw %k0, %k1, %k1 +; AVX512-64-NEXT: kshiftlw $15, %k1, %k1 +; AVX512-64-NEXT: kshiftrw $14, %k1, %k1 +; AVX512-64-NEXT: korw %k1, %k0, %k1 ; AVX512-64-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1} ; AVX512-64-NEXT: retq ; @@ -358,7 +352,6 @@ ; AVX512F-32-NEXT: sete %cl ; AVX512F-32-NEXT: testb %al, %cl ; AVX512F-32-NEXT: setne %al -; AVX512F-32-NEXT: andl $1, %eax ; AVX512F-32-NEXT: kmovw %eax, %k1 ; AVX512F-32-NEXT: kandw %k0, %k1, %k0 ; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] @@ -382,27 +375,26 @@ ; AVX512F-64: # %bb.0: ; AVX512F-64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3] -; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] -; AVX512F-64-NEXT: vucomiss %xmm4, %xmm5 +; AVX512F-64-NEXT: vucomiss %xmm3, %xmm2 ; AVX512F-64-NEXT: setnp %al ; AVX512F-64-NEXT: sete %cl ; AVX512F-64-NEXT: testb %al, %cl ; AVX512F-64-NEXT: setne %al ; AVX512F-64-NEXT: kmovw %eax, %k0 -; AVX512F-64-NEXT: kshiftlw $15, %k0, %k0 -; AVX512F-64-NEXT: kshiftrw $14, %k0, %k0 +; AVX512F-64-NEXT: movw $-3, %ax +; AVX512F-64-NEXT: kmovw %eax, %k1 +; AVX512F-64-NEXT: kandw %k1, %k0, %k0 +; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] +; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX512F-64-NEXT: vucomiss %xmm3, %xmm2 ; AVX512F-64-NEXT: setnp %al ; AVX512F-64-NEXT: sete %cl ; AVX512F-64-NEXT: testb %al, %cl ; AVX512F-64-NEXT: setne %al -; AVX512F-64-NEXT: andl $1, %eax ; AVX512F-64-NEXT: kmovw %eax, %k1 -; AVX512F-64-NEXT: movw $-3, %ax -; AVX512F-64-NEXT: kmovw %eax, %k2 -; AVX512F-64-NEXT: kandw %k2, %k1, %k1 -; AVX512F-64-NEXT: korw %k0, %k1, %k1 +; AVX512F-64-NEXT: kshiftlw $15, %k1, %k1 +; AVX512F-64-NEXT: kshiftrw $14, %k1, %k1 +; AVX512F-64-NEXT: korw %k1, %k0, %k1 ; AVX512F-64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; AVX512F-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512F-64-NEXT: vzeroupper