diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -5253,21 +5253,26 @@ assert(NOutVT.isVector() && "This type must be promoted to a vector type"); unsigned NumElems = N->getNumOperands(); EVT NOutVTElem = NOutVT.getVectorElementType(); - + TargetLoweringBase::BooleanContent NOutBoolType = TLI.getBooleanContents(NOutVT); + unsigned NOutExtOpc = TargetLowering::getExtendForContent(NOutBoolType); SDLoc dl(N); SmallVector Ops; Ops.reserve(NumElems); for (unsigned i = 0; i != NumElems; ++i) { - SDValue Op; + SDValue Op = N->getOperand(i); + EVT OpVT = Op.getValueType(); // BUILD_VECTOR integer operand types are allowed to be larger than the // result's element type. This may still be true after the promotion. For // example, we might be promoting ( = BV , , ...) to // (v?i16 = BV , , ...), and we can't any_extend to . - if (N->getOperand(i).getValueType().bitsLT(NOutVTElem)) - Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); - else - Op = N->getOperand(i); + if (OpVT.bitsLT(NOutVTElem)) { + unsigned ExtOpc = ISD::ANY_EXTEND; + // Attempt to extend constant bool vectors to match target's BooleanContent. + if (OpVT == MVT::i1 && Op.getOpcode() == ISD::Constant) + ExtOpc = NOutExtOpc; + Op = DAG.getNode(ExtOpc, dl, NOutVTElem, Op); + } Ops.push_back(Op); } diff --git a/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll b/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll --- a/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll @@ -15,19 +15,11 @@ ret <8 x i1> %Shuff } -; CHECK-LABEL: lCPI1_0: -; CHECK: .byte 0 ; 0x0 -; CHECK: .space 1 -; CHECK: .byte 0 ; 0x0 -; CHECK: .space 1 -; CHECK: .byte 1 ; 0x1 -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 0 ; 0x0 define <8 x i1>@test2() { -; CHECK-LABEL: test2 -; CHECK: adrp x[[REG2:[0-9]+]], lCPI1_0@PAGE -; CHECK: ldr d[[REG1:[0-9]+]], [x[[REG2]], lCPI1_0@PAGEOFF] +; CHECK-LABEL: test2: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: movi d0, #0x0000ff00000000 +; CHECK-NEXT: ret bb: %Shuff = shufflevector <8 x i1> zeroinitializer, <8 x i1> , @@ -39,7 +31,7 @@ define <16 x i1> @test3(i1* %ptr, i32 %v) { ; CHECK-LABEL: test3: ; CHECK: ; %bb.0: ; %bb -; CHECK-NEXT: movi.4s v0, #1 +; CHECK-NEXT: movi.2d v0, #0x0000ff000000ff ; CHECK-NEXT: ret bb: %Shuff = shufflevector <16 x i1> , <16 x i1> undef, @@ -52,7 +44,7 @@ ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 1 ; 0x1 +; CHECK: .byte 255 ; 0xff ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 diff --git a/llvm/test/CodeGen/ARM/select_xform.ll b/llvm/test/CodeGen/ARM/select_xform.ll --- a/llvm/test/CodeGen/ARM/select_xform.ll +++ b/llvm/test/CodeGen/ARM/select_xform.ll @@ -529,8 +529,7 @@ ; CHECK-NEXT: vmov d16, r2, r3 ; CHECK-NEXT: vmov d17, r0, r1 ; CHECK-NEXT: vceq.i32 d16, d17, d16 -; CHECK-NEXT: vmov.i32 d17, #0x1 -; CHECK-NEXT: veor d16, d16, d17 +; CHECK-NEXT: vmvn d16, d16 ; CHECK-NEXT: vshl.i32 d16, d16, #31 ; CHECK-NEXT: vshr.s32 d16, d16, #31 ; CHECK-NEXT: vmov r0, r1, d16 diff --git a/llvm/test/CodeGen/PowerPC/pr25080.ll b/llvm/test/CodeGen/PowerPC/pr25080.ll --- a/llvm/test/CodeGen/PowerPC/pr25080.ll +++ b/llvm/test/CodeGen/PowerPC/pr25080.ll @@ -44,13 +44,10 @@ ; LE-NEXT: lxvd2x 2, 0, 3 ; LE-NEXT: vmrghh 5, 0, 5 ; LE-NEXT: xxmrglw 0, 36, 34 -; LE-NEXT: vspltish 4, 15 ; LE-NEXT: xxmrglw 1, 37, 35 ; LE-NEXT: xxswapd 35, 2 ; LE-NEXT: xxmrgld 34, 1, 0 ; LE-NEXT: xxlor 34, 34, 35 -; LE-NEXT: vslh 2, 2, 4 -; LE-NEXT: vsrah 2, 2, 4 ; LE-NEXT: blr ; ; BE-LABEL: pr25080: @@ -96,12 +93,9 @@ ; BE-NEXT: vperm 3, 0, 3, 1 ; BE-NEXT: xxmrghw 0, 36, 34 ; BE-NEXT: xxmrghw 1, 35, 37 -; BE-NEXT: vspltish 3, 15 ; BE-NEXT: xxmrghd 34, 1, 0 ; BE-NEXT: lxvw4x 0, 0, 3 ; BE-NEXT: xxlor 34, 34, 0 -; BE-NEXT: vslh 2, 2, 3 -; BE-NEXT: vsrah 2, 2, 3 ; BE-NEXT: blr entry: %0 = trunc <8 x i32> %a to <8 x i23> diff --git a/llvm/test/CodeGen/PowerPC/vec-select.ll b/llvm/test/CodeGen/PowerPC/vec-select.ll --- a/llvm/test/CodeGen/PowerPC/vec-select.ll +++ b/llvm/test/CodeGen/PowerPC/vec-select.ll @@ -53,15 +53,11 @@ ret <2 x i64> %or.i } -; Not valid to emit XXSEL for this illegal type. +; vXi1 constants are sign-extended to preserve XXSEL pattern. define dso_local <4 x i1> @test5(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vspltisw v5, 1 -; CHECK-NEXT: xxland vs0, vs36, vs35 -; CHECK-NEXT: xxlxor vs1, vs36, vs37 -; CHECK-NEXT: xxland vs1, vs34, vs1 -; CHECK-NEXT: xxlor vs34, vs1, vs0 +; CHECK-NEXT: xxsel vs34, vs34, vs35, vs36 ; CHECK-NEXT: blr entry: %neg.i = xor <4 x i1> %c, diff --git a/llvm/test/CodeGen/X86/bitcast-setcc-128.ll b/llvm/test/CodeGen/X86/bitcast-setcc-128.ll --- a/llvm/test/CodeGen/X86/bitcast-setcc-128.ll +++ b/llvm/test/CodeGen/X86/bitcast-setcc-128.ll @@ -515,8 +515,8 @@ ; AVX2-LABEL: v16i8_widened_with_ones: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vinserti128 $1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx ; AVX2-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; AVX2-NEXT: orq %rcx, %rax diff --git a/llvm/test/CodeGen/X86/promote-cmp.ll b/llvm/test/CodeGen/X86/promote-cmp.ll --- a/llvm/test/CodeGen/X86/promote-cmp.ll +++ b/llvm/test/CodeGen/X86/promote-cmp.ll @@ -7,42 +7,40 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) { ; SSE2-LABEL: PR45808: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648] ; SSE2-NEXT: movdqa %xmm3, %xmm9 -; SSE2-NEXT: pxor %xmm4, %xmm9 +; SSE2-NEXT: pxor %xmm5, %xmm9 ; SSE2-NEXT: movdqa %xmm1, %xmm6 -; SSE2-NEXT: pxor %xmm4, %xmm6 +; SSE2-NEXT: pxor %xmm5, %xmm6 ; SSE2-NEXT: movdqa %xmm6, %xmm8 ; SSE2-NEXT: pcmpgtd %xmm9, %xmm8 ; SSE2-NEXT: movdqa %xmm2, %xmm7 -; SSE2-NEXT: pxor %xmm4, %xmm7 -; SSE2-NEXT: pxor %xmm0, %xmm4 -; SSE2-NEXT: movdqa %xmm4, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm7, %xmm5 -; SSE2-NEXT: movdqa %xmm5, %xmm10 +; SSE2-NEXT: pxor %xmm5, %xmm7 +; SSE2-NEXT: pxor %xmm0, %xmm5 +; SSE2-NEXT: movdqa %xmm5, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm7, %xmm4 +; SSE2-NEXT: movdqa %xmm4, %xmm10 ; SSE2-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2],xmm8[0,2] ; SSE2-NEXT: pcmpeqd %xmm9, %xmm6 -; SSE2-NEXT: pcmpeqd %xmm7, %xmm4 -; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm6[1,3] -; SSE2-NEXT: andps %xmm10, %xmm4 -; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,3],xmm8[1,3] -; SSE2-NEXT: orps %xmm4, %xmm5 -; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[2,1,3,3] -; SSE2-NEXT: psllq $63, %xmm6 -; SSE2-NEXT: psrad $31, %xmm6 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3] -; SSE2-NEXT: pand %xmm6, %xmm1 -; SSE2-NEXT: pandn %xmm3, %xmm6 -; SSE2-NEXT: por %xmm6, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,1,1,3] -; SSE2-NEXT: pxor %xmm4, %xmm3 -; SSE2-NEXT: psllq $63, %xmm3 -; SSE2-NEXT: psrad $31, %xmm3 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] -; SSE2-NEXT: pand %xmm3, %xmm0 -; SSE2-NEXT: pandn %xmm2, %xmm3 -; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm7, %xmm5 +; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,3],xmm6[1,3] +; SSE2-NEXT: andps %xmm10, %xmm5 +; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm8[1,3] +; SSE2-NEXT: orps %xmm5, %xmm4 +; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,1,3,3] +; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 +; SSE2-NEXT: pxor %xmm6, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 +; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1] +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: pandn %xmm2, %xmm4 +; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: psllq $63, %xmm5 +; SSE2-NEXT: psrad $31, %xmm5 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3] +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pandn %xmm3, %xmm2 +; SSE2-NEXT: por %xmm2, %xmm1 ; SSE2-NEXT: retq ; ; SSE4-LABEL: PR45808: @@ -57,8 +55,7 @@ ; SSE4-NEXT: pxor %xmm5, %xmm6 ; SSE4-NEXT: psllq $63, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm3 -; SSE4-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm6[0],zero,xmm6[1],zero -; SSE4-NEXT: psllq $63, %xmm0 +; SSE4-NEXT: pmovsxdq %xmm6, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm4, %xmm2 ; SSE4-NEXT: movapd %xmm2, %xmm0 ; SSE4-NEXT: movapd %xmm3, %xmm1 diff --git a/llvm/test/CodeGen/X86/vselect-constants.ll b/llvm/test/CodeGen/X86/vselect-constants.ll --- a/llvm/test/CodeGen/X86/vselect-constants.ll +++ b/llvm/test/CodeGen/X86/vselect-constants.ll @@ -280,26 +280,30 @@ ; SSE-LABEL: wrong_min_signbits: ; SSE: # %bb.0: ; SSE-NEXT: pxor %xmm1, %xmm1 -; SSE-NEXT: pcmpeqw %xmm0, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm0 = [1,0,0,0] -; SSE-NEXT: pandn %xmm0, %xmm1 -; SSE-NEXT: psllw $15, %xmm1 -; SSE-NEXT: psraw $15, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm2 -; SSE-NEXT: pandn %xmm0, %xmm2 -; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE-NEXT: por %xmm2, %xmm1 -; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: pcmpeqw %xmm1, %xmm0 +; SSE-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE-NEXT: pxor %xmm0, %xmm2 +; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE-NEXT: psllw $15, %xmm2 +; SSE-NEXT: psraw $15, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE-NEXT: por %xmm0, %xmm2 +; SSE-NEXT: movd %xmm2, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: wrong_min_signbits: ; AVX: # %bb.0: ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,0,0,0] -; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; AVX-NEXT: vpsllw $15, %xmm0, %xmm0 ; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,0,0,0] ; AVX-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq