Index: llvm/include/llvm/ADT/APInt.h =================================================================== --- llvm/include/llvm/ADT/APInt.h +++ llvm/include/llvm/ADT/APInt.h @@ -2239,12 +2239,17 @@ /// Splat/Merge neighboring bits to widen/narrow the bitmask represented /// by \param A to \param NewBitWidth bits. /// +/// Default (Down = false): +/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011 +/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111 +/// +/// Round down (Down = true): /// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011 /// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111 /// A.getBitwidth() or NewBitWidth must be a whole multiples of the other. /// /// TODO: Do we need a mode where all bits must be set when merging down? -APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth); +APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool Down = false); } // namespace APIntOps // See friend declaration above. This additional declaration is required in Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2712,7 +2712,15 @@ SubDemandedElts &= ScaledDemandedElts; if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1)) return false; - UndefElts |= APIntOps::ScaleBitMask(SubUndefElts, NumElts); + + // Here we can't do "OR" operation merge for undef bits. + // Because some operation only use part value of the source. + // Take llvm.fshl.* for example: + // t1: v4i32 = Constant:i32<12>, undef:i32, Constant:i32<12>, undef:i32 + // t2: v2i64 = bitcast t1 + // t5: v2i64 = fshl t3, t4, t2 + // We can not convert t2 to {i64 undef, i64 undef} + UndefElts |= APIntOps::ScaleBitMask(SubUndefElts, NumElts, true); } return true; } Index: llvm/lib/Support/APInt.cpp =================================================================== --- llvm/lib/Support/APInt.cpp +++ llvm/lib/Support/APInt.cpp @@ -2968,7 +2968,8 @@ return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1); } -APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth) { +APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth, + bool Down) { unsigned OldBitWidth = A.getBitWidth(); assert((((OldBitWidth % NewBitWidth) == 0) || ((NewBitWidth % OldBitWidth) == 0)) && @@ -2994,9 +2995,15 @@ } else { // Merge bits - if any old bit is set, then set scale equivalent new bit. unsigned Scale = OldBitWidth / NewBitWidth; - for (unsigned i = 0; i != NewBitWidth; ++i) - if (!A.extractBits(Scale, i * Scale).isZero()) - NewA.setBit(i); + for (unsigned i = 0; i != NewBitWidth; ++i) { + if (Down) { + if (A.extractBits(Scale, i * Scale).isAllOnes()) + NewA.setBit(i); + } else { + if (!A.extractBits(Scale, i * Scale).isZero()) + NewA.setBit(i); + } + } } return NewA;