Index: llvm/trunk/include/llvm/ADT/APInt.h =================================================================== --- llvm/trunk/include/llvm/ADT/APInt.h +++ llvm/trunk/include/llvm/ADT/APInt.h @@ -1243,6 +1243,9 @@ /// as "bitPosition". void flipBit(unsigned bitPosition); + /// Insert the bits from a smaller APInt starting at bitPosition. + void insertBits(const APInt &SubBits, unsigned bitPosition); + /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits). APInt extractBits(unsigned numBits, unsigned bitPosition) const; Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7523,11 +7523,11 @@ if (OpVal.isUndef()) SplatUndef.setBits(BitPos, BitPos + EltBitSize); else if (ConstantSDNode *CN = dyn_cast(OpVal)) - SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize). - zextOrTrunc(sz) << BitPos; + SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltBitSize), + BitPos); else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) - SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <getValueAPF().bitcastToAPInt(), BitPos); + else return false; } Index: llvm/trunk/lib/Support/APInt.cpp =================================================================== --- llvm/trunk/lib/Support/APInt.cpp +++ llvm/trunk/lib/Support/APInt.cpp @@ -588,6 +588,65 @@ else setBit(bitPosition); } +void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { + unsigned subBitWidth = subBits.getBitWidth(); + assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth && + "Illegal bit insertion"); + + // Insertion is a direct copy. + if (subBitWidth == BitWidth) { + *this = subBits; + return; + } + + // Single word result can be done as a direct bitmask. + if (isSingleWord()) { + uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth); + VAL &= ~(mask << bitPosition); + VAL |= (subBits.VAL << bitPosition); + return; + } + + unsigned loBit = whichBit(bitPosition); + unsigned loWord = whichWord(bitPosition); + unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1); + + // Insertion within a single word can be done as a direct bitmask. + if (loWord == hi1Word) { + uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth); + pVal[loWord] &= ~(mask << loBit); + pVal[loWord] |= (subBits.VAL << loBit); + return; + } + + // Insert on word boundaries. + if (loBit == 0) { + // Direct copy whole words. + unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; + memcpy(pVal + loWord, subBits.getRawData(), + numWholeSubWords * APINT_WORD_SIZE); + + // Mask+insert remaining bits. + unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; + if (remainingBits != 0) { + uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - remainingBits); + pVal[hi1Word] &= ~mask; + pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); + } + return; + } + + // General case - set/clear individual bits in dst based on src. + // TODO - there is scope for optimization here, but at the moment this code + // path is barely used so prefer readability over performance. + for (unsigned i = 0; i != subBitWidth; ++i) { + if (subBits[i]) + setBit(bitPosition + i); + else + clearBit(bitPosition + i); + } +} + APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { assert(numBits > 0 && "Can't extract zero bits"); assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -5318,12 +5318,11 @@ return true; } if (auto *CInt = dyn_cast(Cst)) { - Mask |= CInt->getValue().zextOrTrunc(SizeInBits).shl(BitOffset); + Mask.insertBits(CInt->getValue(), BitOffset); return true; } if (auto *CFP = dyn_cast(Cst)) { - APInt CstBits = CFP->getValueAPF().bitcastToAPInt(); - Mask |= CstBits.zextOrTrunc(SizeInBits).shl(BitOffset); + Mask.insertBits(CFP->getValueAPF().bitcastToAPInt(), BitOffset); return true; } return false; @@ -5340,7 +5339,7 @@ } auto *Cst = cast(Src); APInt Bits = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); - MaskBits |= Bits.zext(SizeInBits).shl(BitOffset); + MaskBits.insertBits(Bits, BitOffset); } return SplitBitData(); } Index: llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp +++ llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp @@ -91,8 +91,7 @@ continue; } - auto *Elt = cast(COp); - MaskBits |= Elt->getValue().zextOrTrunc(CstSizeInBits).shl(BitOffset); + MaskBits.insertBits(cast(COp)->getValue(), BitOffset); } // Now extract the undef/constant bit data into the raw shuffle masks. Index: llvm/trunk/unittests/ADT/APIntTest.cpp =================================================================== --- llvm/trunk/unittests/ADT/APIntTest.cpp +++ llvm/trunk/unittests/ADT/APIntTest.cpp @@ -1647,6 +1647,59 @@ } } +TEST(APIntTest, insertBits) { + APInt iSrc(31, 0x00123456); + + // Direct copy. + APInt i31(31, 0x76543210ull); + i31.insertBits(iSrc, 0); + EXPECT_EQ(static_cast(0x00123456ull), i31.getSExtValue()); + + // Single word src/dst insertion. + APInt i63(63, 0x01234567FFFFFFFFull); + i63.insertBits(iSrc, 4); + EXPECT_EQ(static_cast(0x012345600123456Full), i63.getSExtValue()); + + // Insert single word src into one word of dst. + APInt i120(120, UINT64_MAX, true); + i120.insertBits(iSrc, 8); + EXPECT_EQ(static_cast(0xFFFFFF80123456FFull), i120.getSExtValue()); + + // Insert single word src into two words of dst. + APInt i127(127, UINT64_MAX, true); + i127.insertBits(iSrc, 48); + EXPECT_EQ(i127.extractBits(64, 0).getZExtValue(), 0x3456FFFFFFFFFFFF); + EXPECT_EQ(i127.extractBits(63, 64).getZExtValue(), 0x7FFFFFFFFFFF8012); + + // Insert on word boundaries. + APInt i128(128, 0); + i128.insertBits(APInt(64, UINT64_MAX, true), 0); + i128.insertBits(APInt(64, UINT64_MAX, true), 64); + EXPECT_EQ(-1, i128.getSExtValue()); + + APInt i256(256, UINT64_MAX, true); + i256.insertBits(APInt(65, 0), 0); + i256.insertBits(APInt(69, 0), 64); + i256.insertBits(APInt(128, 0), 128); + EXPECT_EQ(0u, i256.getSExtValue()); + + APInt i257(257, 0); + i257.insertBits(APInt(96, UINT64_MAX, true), 64); + EXPECT_EQ(i257.extractBits(64, 0).getZExtValue(), 0x0000000000000000); + EXPECT_EQ(i257.extractBits(64, 64).getZExtValue(), 0xFFFFFFFFFFFFFFFF); + EXPECT_EQ(i257.extractBits(64, 128).getZExtValue(), 0x00000000FFFFFFFF); + EXPECT_EQ(i257.extractBits(65, 192).getZExtValue(), 0x0000000000000000); + + // General insertion. + APInt i260(260, UINT64_MAX, true); + i260.insertBits(APInt(129, 1ull << 48), 15); + EXPECT_EQ(i260.extractBits(64, 0).getZExtValue(), 0x8000000000007FFF); + EXPECT_EQ(i260.extractBits(64, 64).getZExtValue(), 0x0000000000000000); + EXPECT_EQ(i260.extractBits(64, 128).getZExtValue(), 0xFFFFFFFFFFFF0000); + EXPECT_EQ(i260.extractBits(64, 192).getZExtValue(), 0xFFFFFFFFFFFFFFFF); + EXPECT_EQ(i260.extractBits(4, 256).getZExtValue(), 0x000000000000000F); +} + TEST(APIntTest, extractBits) { APInt i32(32, 0x1234567); EXPECT_EQ(0x3456, i32.extractBits(16, 4));