diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5464,6 +5464,23 @@ return llvm::None; } +// Handle constant folding with UNDEF. +// TODO: Handle more cases. +static llvm::Optional FoldValueWithUndef(unsigned Opcode, + const APInt &C1, bool IsUndef1, + const APInt &C2, + bool IsUndef2) { + if (!(IsUndef1 || IsUndef2)) + return FoldValue(Opcode, C1, C2); + + // Fold and(x, undef) -> 0 + // Fold mul(x, undef) -> 0 + if (Opcode == ISD::AND || Opcode == ISD::MUL) + return APInt::getZero(C1.getBitWidth()); + + return llvm::None; +}; + SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, const GlobalAddressSDNode *GA, const SDNode *N2) { @@ -5564,7 +5581,6 @@ ElementCount NumElts = VT.getVectorElementCount(); // See if we can fold through bitcasted integer ops. - // TODO: Can we handle undef elements? if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() && Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && Ops[0].getOpcode() == ISD::BITCAST && @@ -5580,11 +5596,11 @@ SmallVector RawBits1, RawBits2; BitVector UndefElts1, UndefElts2; if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) && - BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) && - UndefElts1.none() && UndefElts2.none()) { + BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2)) { SmallVector RawBits; for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) { - Optional Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]); + Optional Fold = FoldValueWithUndef( + Opcode, RawBits1[I], UndefElts1[I], RawBits2[I], UndefElts2[I]); if (!Fold) break; RawBits.push_back(*Fold); diff --git a/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll @@ -431,7 +431,7 @@ ; ARM7-NEXT: .LCPI3_2: ; ARM7-NEXT: .long 3 @ 0x3 ; ARM7-NEXT: .long 0 @ 0x0 -; ARM7-NEXT: .zero 4 +; ARM7-NEXT: .long 0 @ 0x0 ; ARM7-NEXT: .long 0 @ 0x0 ; ; ARM8-LABEL: test_srem_vec: @@ -507,7 +507,7 @@ ; ARM8-NEXT: .LCPI3_2: ; ARM8-NEXT: .long 3 @ 0x3 ; ARM8-NEXT: .long 0 @ 0x0 -; ARM8-NEXT: .zero 4 +; ARM8-NEXT: .long 0 @ 0x0 ; ARM8-NEXT: .long 0 @ 0x0 ; ; NEON7-LABEL: test_srem_vec: @@ -583,7 +583,7 @@ ; NEON7-NEXT: .LCPI3_2: ; NEON7-NEXT: .long 3 @ 0x3 ; NEON7-NEXT: .long 0 @ 0x0 -; NEON7-NEXT: .zero 4 +; NEON7-NEXT: .long 0 @ 0x0 ; NEON7-NEXT: .long 0 @ 0x0 ; ; NEON8-LABEL: test_srem_vec: @@ -659,7 +659,7 @@ ; NEON8-NEXT: .LCPI3_2: ; NEON8-NEXT: .long 3 @ 0x3 ; NEON8-NEXT: .long 0 @ 0x0 -; NEON8-NEXT: .zero 4 +; NEON8-NEXT: .long 0 @ 0x0 ; NEON8-NEXT: .long 0 @ 0x0 %srem = srem <3 x i33> %X, %cmp = icmp ne <3 x i33> %srem, diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -651,16 +651,22 @@ ; RV32MV-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; RV32MV-NEXT: vmv.s.x v0, a0 ; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; RV32MV-NEXT: vmv.v.i v8, 1 ; RV32MV-NEXT: mv a0, sp -; RV32MV-NEXT: vle32.v v10, (a0) -; RV32MV-NEXT: lui a0, %hi(.LCPI3_0) -; RV32MV-NEXT: addi a0, a0, %lo(.LCPI3_0) -; RV32MV-NEXT: vle32.v v12, (a0) -; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0 -; RV32MV-NEXT: vand.vv v8, v10, v8 +; RV32MV-NEXT: vle32.v v8, (a0) +; RV32MV-NEXT: vmv.v.i v10, 1 +; RV32MV-NEXT: vmerge.vim v10, v10, -1, v0 +; RV32MV-NEXT: vand.vv v8, v8, v10 +; RV32MV-NEXT: li a0, 2 +; RV32MV-NEXT: vmv.s.x v10, a0 +; RV32MV-NEXT: li a0, 1 +; RV32MV-NEXT: vmv.s.x v12, a0 +; RV32MV-NEXT: vmv.v.i v14, 0 +; RV32MV-NEXT: vsetivli zero, 3, e32, m2, tu, mu +; RV32MV-NEXT: vslideup.vi v14, v12, 2 +; RV32MV-NEXT: vsetivli zero, 5, e32, m2, tu, mu +; RV32MV-NEXT: vslideup.vi v14, v10, 4 ; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV32MV-NEXT: vmsne.vv v0, v8, v12 +; RV32MV-NEXT: vmsne.vv v0, v8, v14 ; RV32MV-NEXT: vmv.v.i v8, 0 ; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0 ; RV32MV-NEXT: vsetivli zero, 1, e32, m2, ta, mu diff --git a/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll @@ -136,7 +136,7 @@ ; CHECK-NEXT: .LCPI3_2: ; CHECK-NEXT: .long 3 @ 0x3 ; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .zero 4 +; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 0 @ 0x0 %srem = srem <3 x i33> %X, %cmp = icmp ne <3 x i33> %srem, diff --git a/llvm/test/CodeGen/X86/fshl-splat-undef.ll b/llvm/test/CodeGen/X86/fshl-splat-undef.ll --- a/llvm/test/CodeGen/X86/fshl-splat-undef.ll +++ b/llvm/test/CodeGen/X86/fshl-splat-undef.ll @@ -21,9 +21,7 @@ ; CHECK-LABEL: test_fshl: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm2 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12] -; CHECK-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}, %zmm2, %zmm2 -; CHECK-NEXT: vpsllvq %zmm2, %zmm1, %zmm1 +; CHECK-NEXT: vpsllq $12, %zmm1, %zmm1 ; CHECK-NEXT: vpsrlq $52, %zmm0, %zmm0 ; CHECK-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0 ; CHECK-NEXT: vmovdqa64 %zmm0, (%eax)