Index: include/llvm/ADT/APInt.h =================================================================== --- include/llvm/ADT/APInt.h +++ include/llvm/ADT/APInt.h @@ -1243,6 +1243,9 @@ /// as "bitPosition". void flipBit(unsigned bitPosition); + /// Insert the bits from a smaller APInt starting at bitPosition. + void insertBits(const APInt &SubBits, unsigned bitPosition); + /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits). APInt extractBits(unsigned numBits, unsigned bitPosition) const; Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7517,11 +7517,11 @@ if (OpVal.isUndef()) SplatUndef.setBits(BitPos, BitPos + EltBitSize); else if (ConstantSDNode *CN = dyn_cast(OpVal)) - SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize). - zextOrTrunc(sz) << BitPos; + SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltBitSize), + BitPos); else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) - SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <getValueAPF().bitcastToAPInt(), BitPos); + else return false; } Index: lib/Support/APInt.cpp =================================================================== --- lib/Support/APInt.cpp +++ lib/Support/APInt.cpp @@ -588,6 +588,62 @@ else setBit(bitPosition); } +void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { + unsigned subBitWidth = subBits.getBitWidth(); + assert((subBitWidth + bitPosition) <= BitWidth && "Illegal bit insertion"); + + // Insertion is a direct copy. + if (subBitWidth == BitWidth) { + *this = subBits; + return; + } + + // Single word result can be done as a direct bitmask. + if (isSingleWord()) { + uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth); + VAL &= ~(mask << bitPosition); + VAL |= (subBits.VAL << bitPosition); + return; + } + + unsigned loBit = whichBit(bitPosition); + unsigned loWord = whichWord(bitPosition); + unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1); + + // Insertion within a single word can be done as a direct bitmask. + if (loWord == hi1Word) { + uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth); + pVal[loWord] &= ~(mask << loBit); + pVal[loWord] |= (subBits.VAL << loBit); + return; + } + + // Insert on word boundaries. + if (loBit == 0) { + // Direct copy whole words. + unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; + memcpy(pVal + loWord, subBits.getRawData(), + numWholeSubWords * APINT_WORD_SIZE); + + // Mask+insert remaining bits. + unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; + if (remainingBits != 0) { + uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - remainingBits); + pVal[hi1Word] &= ~mask; + pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); + } + return; + } + + // General case - set/clear individual bits in dst based on src. + for (unsigned i = 0; i != subBitWidth; ++i) { + if (subBits[i]) + setBit(bitPosition + i); + else + clearBit(bitPosition + i); + } +} + APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { assert(numBits > 0 && "Can't extract zero bits"); assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -5318,12 +5318,11 @@ return true; } if (auto *CInt = dyn_cast(Cst)) { - Mask |= CInt->getValue().zextOrTrunc(SizeInBits).shl(BitOffset); + Mask.insertBits(CInt->getValue(), BitOffset); return true; } if (auto *CFP = dyn_cast(Cst)) { - APInt CstBits = CFP->getValueAPF().bitcastToAPInt(); - Mask |= CstBits.zextOrTrunc(SizeInBits).shl(BitOffset); + Mask.insertBits(CFP->getValueAPF().bitcastToAPInt(), BitOffset); return true; } return false; @@ -5340,7 +5339,7 @@ } auto *Cst = cast(Src); APInt Bits = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); - MaskBits |= Bits.zext(SizeInBits).shl(BitOffset); + MaskBits.insertBits(Bits, BitOffset); } return SplitBitData(); } Index: lib/Target/X86/X86ShuffleDecodeConstantPool.cpp =================================================================== --- lib/Target/X86/X86ShuffleDecodeConstantPool.cpp +++ lib/Target/X86/X86ShuffleDecodeConstantPool.cpp @@ -64,8 +64,7 @@ continue; } - auto *Elt = cast(COp); - MaskBits |= Elt->getValue().zextOrTrunc(CstSizeInBits).shl(BitOffset); + MaskBits.insertBits(cast(COp)->getValue(), BitOffset); } // Now extract the undef/constant bit data into the raw shuffle masks. Index: unittests/ADT/APIntTest.cpp =================================================================== --- unittests/ADT/APIntTest.cpp +++ unittests/ADT/APIntTest.cpp @@ -1647,6 +1647,60 @@ } } +TEST(APIntTest, insertBits) { + APInt iSrc(31, 0x00123456); + + // Direct copy. + APInt i31(31, 0x76543210ull); + i31.insertBits(iSrc, 0); + EXPECT_EQ(static_cast(0x00123456ull), i31.getSExtValue()); + + // Single word src/dst insertion. + APInt i63(63, 0x01234567FFFFFFFFull); + i63.insertBits(iSrc, 4); + EXPECT_EQ(static_cast(0x012345600123456Full), i63.getSExtValue()); + + // Insert single word src into one word of dst. + APInt i120(120, UINT64_MAX, true); + i120.insertBits(iSrc, 8); + EXPECT_EQ(static_cast(0xFFFFFF80123456FFull), i120.getSExtValue()); + + // Insert single word src into two words of dst. + APInt i127(127, UINT64_MAX, true); + i127.insertBits(iSrc, 48); + EXPECT_EQ(48u, i127.countLeadingOnes()); + EXPECT_EQ(127u, i127.getActiveBits()); + EXPECT_EQ(48u, i127.countTrailingOnes()); + EXPECT_EQ(105u, i127.countPopulation()); + + // Insert on word boundaries. + APInt i128(128, 0); + i128.insertBits(APInt(64, UINT64_MAX, true), 0); + i128.insertBits(APInt(64, UINT64_MAX, true), 64); + EXPECT_EQ(-1, i128.getSExtValue()); + + APInt i256(256, UINT64_MAX, true); + i256.insertBits(APInt(65, 0), 0); + i256.insertBits(APInt(69, 0), 64); + i256.insertBits(APInt(128, 0), 128); + EXPECT_EQ(0u, i256.getSExtValue()); + + APInt i257(257, 0); + i257.insertBits(APInt(96, UINT64_MAX, true), 64); + EXPECT_EQ(97u, i257.countLeadingZeros()); + EXPECT_EQ(64u, i257.countTrailingZeros()); + EXPECT_EQ(96u, i257.countPopulation()); + + // General insertion. + APInt i260(260, UINT64_MAX, true); + i260.insertBits(APInt(129, 1ull << 48), 15); + EXPECT_EQ(116u, i260.countLeadingOnes()); + EXPECT_EQ(260u, i260.getActiveBits()); + EXPECT_EQ(145u, i260.getMinSignedBits()); + EXPECT_EQ(15u, i260.countTrailingOnes()); + EXPECT_EQ(132u, i260.countPopulation()); +} + TEST(APIntTest, extractBits) { APInt i32(32, 0x1234567); EXPECT_EQ(0x3456, i32.extractBits(16, 4));