Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -420,6 +420,8 @@ /// shuffles. FunctionPass *createExpandReductionsPass(); + FunctionPass *createSubstituteLoadWithSubShrPass(); + } // End llvm namespace #endif Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -356,6 +356,7 @@ void initializeStripNonLineTableDebugInfoPass(PassRegistry&); void initializeStripSymbolsPass(PassRegistry&); void initializeStructurizeCFGPass(PassRegistry&); +void initializeSubstituteLoadWithSubShrPass(PassRegistry &); void initializeTailCallElimPass(PassRegistry&); void initializeTailDuplicatePassPass(PassRegistry&); void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&); Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -137,6 +137,7 @@ StackMaps.cpp StackProtector.cpp StackSlotColoring.cpp + SubstituteLoadWithSubShr.cpp TailDuplication.cpp TailDuplicator.cpp TargetFrameLoweringImpl.cpp Index: lib/CodeGen/SubstituteLoadWithSubShr.cpp =================================================================== --- lib/CodeGen/SubstituteLoadWithSubShr.cpp +++ lib/CodeGen/SubstituteLoadWithSubShr.cpp @@ -0,0 +1,147 @@ +//===-------------------- SubstituteLoadWithSubShr.cpp --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass recognizes cases where a load can be replaced by subtract and +// shift right instructions. +// +// In case of loading integer value from an array of constants which is defined +// as follows: +// +// int array[SIZE] = {0x0, 0x1, 0x3, 0x7, 0xF ..., 2^(SIZE-1) - 1} +// +// array[idx] can be replaced with (0xFFFFFFFF >> (sub 32, idx)) +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/Transforms/Utils/Local.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "substitute-load-with-sub-shr" + +namespace { + +class SubstituteLoadWithSubShr : public FunctionPass { + +public: + static char ID; + SubstituteLoadWithSubShr() : FunctionPass(ID) { + initializeSubstituteLoadWithSubShrPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Substitute Load With Sub Shr"; + } + + bool runOnFunction(Function &F) override; + +private: + bool + ReplaceLoadWithSubShr(LoadInst *LI, SmallVector &DeadInsts, + SmallVector &PossibleDeadArrays); +}; +} // end anonymous namespace. + +char SubstituteLoadWithSubShr::ID = 0; +INITIALIZE_PASS_BEGIN(SubstituteLoadWithSubShr, DEBUG_TYPE, + "SubstituteLoadWithSubShr", false, false) +INITIALIZE_PASS_END(SubstituteLoadWithSubShr, DEBUG_TYPE, + "SubstituteLoadWithSubShr", false, false) + +FunctionPass *llvm::createSubstituteLoadWithSubShrPass() { + return new SubstituteLoadWithSubShr(); +} + +bool SubstituteLoadWithSubShr::ReplaceLoadWithSubShr( + LoadInst *LI, SmallVector &DeadInsts, + SmallVector &PossibleDeadArrays) { + + if (GetElementPtrInst *GEP = dyn_cast(LI->getOperand(0))) { + if (GlobalVariable *GV = dyn_cast(GEP->getOperand(0))) { + if (GV->isConstant() && GV->hasDefinitiveInitializer()) { + + Constant *Init = GV->getInitializer(); + Type *Ty = Init->getType(); + if ((!isa(Init) && !isa(Init)) || + !Ty->getArrayElementType()->isIntegerTy() || + Ty->getArrayNumElements() > + Ty->getArrayElementType()->getScalarSizeInBits()) + return nullptr; + + // Check if the array's constant elements are suitable to our case. + uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); + for (uint64_t i = 0; i < ArrayElementCount; i++) { + ConstantInt *Elem = + dyn_cast(Init->getAggregateElement(i)); + if (Elem->getZExtValue() != (((uint64_t)1 << i) - 1)) + return nullptr; + } + + IRBuilder<> Builder(LI->getContext()); + Builder.SetInsertPoint(LI); + + // Do the transformation (assuming 32-bit elements): + // -> elemPtr = getelementptr @array, 0, idx + // elem = load elemPtr + // <- hiBit = 32 - idx + // elem = 0xFFFFFFFF >> hiBit + Type *ElemTy = Ty->getArrayElementType(); + Value *LoadIdx = Builder.CreateZExtOrTrunc(GEP->getOperand(2), ElemTy); + + APInt ElemSize = + APInt(ElemTy->getScalarSizeInBits(), ElemTy->getScalarSizeInBits()); + Constant *ElemSizeConst = Constant::getIntegerValue(ElemTy, ElemSize); + Value *Sub = Builder.CreateSub(ElemSizeConst, LoadIdx); + + Constant *AllOnes = Constant::getAllOnesValue(ElemTy); + Value *LShr = Builder.CreateLShr(AllOnes, Sub); + + LI->replaceAllUsesWith(LShr); + DeadInsts.push_back(LI); + + // Insert the array to mark it as potentially dead. + if (find(PossibleDeadArrays, GV) == PossibleDeadArrays.end()) + PossibleDeadArrays.push_back(GV); + + return true; + } + } + } + return false; +} + +bool SubstituteLoadWithSubShr::runOnFunction(Function &F) { + SmallVector DeadInsts; + SmallVector PossibleDeadArrays; + bool Changed = false; + + // Perform the transformation if possible while keep tracking of dead + // instructions and potential dead arrays. + for (auto &I : instructions(F)) { + if (LoadInst *LI = dyn_cast(&I)) + Changed |= ReplaceLoadWithSubShr(LI, DeadInsts, PossibleDeadArrays); + } + + if (Changed) { + for (auto &I : DeadInsts) { + RecursivelyDeleteTriviallyDeadInstructions(I); + } + + for (auto &GV : PossibleDeadArrays) { + if (GV->use_empty() && GV->isDiscardableIfUnused()) + GV->eraseFromParent(); + } + } + + return Changed; +} Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -377,8 +377,10 @@ TargetPassConfig::addIRPasses(); - if (TM->getOptLevel() != CodeGenOpt::None) + if (TM->getOptLevel() != CodeGenOpt::None) { addPass(createInterleavedAccessPass()); + addPass(createSubstituteLoadWithSubShrPass()); + } } bool X86PassConfig::addInstSelector() { Index: test/CodeGen/X86/replace-load-with-sub-shr.ll =================================================================== --- test/CodeGen/X86/replace-load-with-sub-shr.ll +++ test/CodeGen/X86/replace-load-with-sub-shr.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2 | FileCheck %s + +@fill_table32 = internal unnamed_addr constant [32 x i32] [i32 0, i32 1, i32 3, i32 7, i32 15, i32 31, i32 63, i32 127, i32 255, i32 511, i32 1023, i32 2047, i32 4095, i32 8191, i32 16383, i32 32767, i32 65535, i32 131071, i32 262143, i32 524287, i32 1048575, i32 2097151, i32 4194303, i32 8388607, i32 16777215, i32 33554431, i32 67108863, i32 134217727, i32 268435455, i32 536870911, i32 1073741823, i32 2147483647], align 16 +@fill_table64 = internal unnamed_addr constant [64 x i64] [i64 0, i64 1, i64 3, i64 7, i64 15, i64 31, i64 63, i64 127, i64 255, i64 511, i64 1023, i64 2047, i64 4095, i64 8191, i64 16383, i64 32767, i64 65535, i64 131071, i64 262143, i64 524287, i64 1048575, i64 2097151, i64 4194303, i64 8388607, i64 16777215, i64 33554431, i64 67108863, i64 134217727, i64 268435455, i64 536870911, i64 1073741823, i64 2147483647, i64 4294967295, i64 8589934591, i64 17179869183, i64 34359738367, i64 68719476735, i64 137438953471, i64 274877906943, i64 549755813887, i64 1099511627775, i64 2199023255551, i64 4398046511103, i64 8796093022207, i64 17592186044415, i64 35184372088831, i64 70368744177663, i64 140737488355327, i64 281474976710655, i64 562949953421311, i64 1125899906842623, i64 2251799813685247, i64 4503599627370495, i64 9007199254740991, i64 18014398509481983, i64 36028797018963967, i64 72057594037927935, i64 144115188075855871, i64 288230376151711743, i64 576460752303423487, i64 1152921504606846975, i64 2305843009213693951, i64 4611686018427387903, i64 9223372036854775807], align 16 + +define i32 @f32(i32 %y) local_unnamed_addr { +; CHECK-LABEL: f32: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl $32, %eax +; CHECK-NEXT: subl %edi, %eax +; CHECK-NEXT: movl $-1, %ecx +; CHECK-NEXT: shrxl %eax, %ecx, %eax +; CHECK-NEXT: retq +entry: + %idxprom = sext i32 %y to i64 + %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @fill_table32, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + ret i32 %0 +} + +define i64 @f64(i64 %y) local_unnamed_addr { +; CHECK-LABEL: f64: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl $64, %eax +; CHECK-NEXT: subl %edi, %eax +; CHECK-NEXT: movq $-1, %rcx +; CHECK-NEXT: shrxq %rax, %rcx, %rax +; CHECK-NEXT: retq +entry: + %arrayidx = getelementptr inbounds [64 x i64], [64 x i64]* @fill_table64, i64 0, i64 %y + %0 = load i64, i64* %arrayidx, align 8 + ret i64 %0 +} + +define i32 @f32_bzhi(i32 %x, i32 %y) local_unnamed_addr { +; CHECK-LABEL: f32_bzhi: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: bzhil %esi, %edi, %eax +; CHECK-NEXT: retq +entry: + %idxprom = sext i32 %y to i64 + %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @fill_table32, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %and = and i32 %0, %x + ret i32 %and +} + +define i64 @f64_bzhi(i64 %x, i64 %y) local_unnamed_addr { +; CHECK-LABEL: f64_bzhi: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: bzhiq %rsi, %rdi, %rax +; CHECK-NEXT: retq +entry: + %arrayidx = getelementptr inbounds [64 x i64], [64 x i64]* @fill_table64, i64 0, i64 %y + %0 = load i64, i64* %arrayidx, align 8 + %and = and i64 %0, %x + ret i64 %and +} + +; CHECK-NOT: fill_table32: +; CHECK-NOT: fill_table64: +