Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8064,10 +8064,15 @@ unsigned NumElts = VT.getVectorNumElements(); SDLoc DL(N); - for (unsigned i=0; i != NumElts; ++i) { - SDValue Op = N0->getOperand(i); - if (Op->isUndef()) { - Elts.push_back(DAG.getUNDEF(SVT)); + // For zero-extensions, UNDEF elements still guarantee to have the upper + // bits set to zero. + bool IsZext = + Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG; + + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Op = N0.getOperand(i); + if (Op.isUndef()) { + Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT)); continue; } Index: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1848,6 +1848,13 @@ return true; KnownZero = SrcZero.zextOrTrunc(NumElts); KnownUndef = SrcUndef.zextOrTrunc(NumElts); + + if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) { + // zext(undef) upper bits are guaranteed to be zero. + if (DemandedElts.isSubsetOf(KnownUndef)) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); + KnownUndef.clearAllBits(); + } break; } case ISD::OR: @@ -1892,6 +1899,13 @@ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, KnownZero, TLO, Depth + 1)) return true; + + if (Op.getOpcode() == ISD::ZERO_EXTEND) { + // zext(undef) upper bits are guaranteed to be zero. + if (DemandedElts.isSubsetOf(KnownUndef)) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); + KnownUndef.clearAllBits(); + } break; default: { if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { Index: llvm/trunk/test/CodeGen/AArch64/pr40091.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/pr40091.ll +++ llvm/trunk/test/CodeGen/AArch64/pr40091.ll @@ -4,7 +4,7 @@ define i64 @test(i64 %aa) { ; CHECK-LABEL: test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-NEXT: movi v0.8b, #137 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret entry: Index: llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll +++ llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll @@ -261,12 +261,12 @@ define <4 x i16> @test_zext_4i8_4i16_undef() { ; X32-LABEL: test_zext_4i8_4i16_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253] ; X32-NEXT: retl ; ; X64-LABEL: test_zext_4i8_4i16_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} xmm0 = +; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253] ; X64-NEXT: retq %1 = insertelement <4 x i8> undef, i8 undef, i32 0 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 @@ -279,12 +279,12 @@ define <4 x i32> @test_zext_4i8_4i32_undef() { ; X32-LABEL: test_zext_4i8_4i32_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = <0,u,2,u> +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,0] ; X32-NEXT: retl ; ; X64-LABEL: test_zext_4i8_4i32_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} xmm0 = <0,u,2,u> +; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,2,0] ; X64-NEXT: retq %1 = insertelement <4 x i8> undef, i8 0, i32 0 %2 = insertelement <4 x i8> %1, i8 undef, i32 1 @@ -297,12 +297,12 @@ define <4 x i64> @test_zext_4i8_4i64_undef() { ; X32-LABEL: test_zext_4i8_4i64_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} ymm0 = +; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,255,0,2,0,0,0] ; X32-NEXT: retl ; ; X64-LABEL: test_zext_4i8_4i64_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} ymm0 = +; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,255,2,0] ; X64-NEXT: retq %1 = insertelement <4 x i8> undef, i8 undef, i32 0 %2 = insertelement <4 x i8> %1, i8 -1, i32 1 @@ -359,12 +359,12 @@ define <8 x i16> @test_zext_8i8_8i16_undef() { ; X32-LABEL: test_zext_8i8_8i16_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} xmm0 = +; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249] ; X32-NEXT: retl ; ; X64-LABEL: test_zext_8i8_8i16_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} xmm0 = +; X64-NEXT: vmovaps {{.*#+}} xmm0 = [0,255,0,253,0,251,0,249] ; X64-NEXT: retq %1 = insertelement <8 x i8> undef, i8 undef, i32 0 %2 = insertelement <8 x i8> %1, i8 -1, i32 1 @@ -381,12 +381,12 @@ define <8 x i32> @test_zext_8i8_8i32_undef() { ; X32-LABEL: test_zext_8i8_8i32_undef: ; X32: # %bb.0: -; X32-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,253,4,u,6,u> +; X32-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0] ; X32-NEXT: retl ; ; X64-LABEL: test_zext_8i8_8i32_undef: ; X64: # %bb.0: -; X64-NEXT: vmovaps {{.*#+}} ymm0 = <0,u,2,253,4,u,6,u> +; X64-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,2,253,4,0,6,0] ; X64-NEXT: retq %1 = insertelement <8 x i8> undef, i8 0, i32 0 %2 = insertelement <8 x i8> %1, i8 undef, i32 1