Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -4916,6 +4916,24 @@ ... !0 = !{i64 (i64, i64)* @add, i64 (i64, i64)* @sub} +'``speculation.marker``' Metadata +^^^^^^^^^^^^^^^^^^^^^^ + +``speculation.marker`` metadata must be attached to a load. It consists of +set of ``i64`` type offsets indicating that the memory from load pointer +address is accessable for read operation with provided offsets. The intent +of this metadata is to keep track of dereferanceable memory locations after +load operations to that memory were deleted, as it might be beneficial +for future optimizations. Offsets are sorted and they aren't indicating min/max +offsets, but rather each offset proven to be dereferanceable. + +.. code-block:: llvm + + %ld1 = load double, double* %arrayidx1, align 8, !speculation.marker !0 + + ... + !0 = !{i64 -1, i64 2} + '``unpredictable``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: include/llvm/IR/LLVMContext.h =================================================================== --- include/llvm/IR/LLVMContext.h +++ include/llvm/IR/LLVMContext.h @@ -102,6 +102,7 @@ MD_associated = 22, // "associated" MD_callees = 23, // "callees" MD_irr_loop = 24, // "irr_loop" + MD_speculation_marker = 25, // "speculation.marker" }; /// Known operand bundle tag IDs, which always have the same value. All Index: include/llvm/Transforms/Vectorize/SLPVectorizer.h =================================================================== --- include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -112,6 +112,9 @@ /// collected in GEPs. bool vectorizeGEPIndices(BasicBlock *BB, slpvectorizer::BoUpSLP &R); + /// \brief Restore inserts out of speculation.marker metadata. + InsertElementInst * restoreInserts(LoadInst *LInstr); + /// Try to find horizontal reduction or otherwise vectorize a chain of binary /// operators. bool vectorizeRootInstruction(PHINode *P, Value *V, BasicBlock *BB, Index: lib/IR/LLVMContext.cpp =================================================================== --- lib/IR/LLVMContext.cpp +++ lib/IR/LLVMContext.cpp @@ -61,6 +61,7 @@ {MD_associated, "associated"}, {MD_callees, "callees"}, {MD_irr_loop, "irr_loop"}, + {MD_speculation_marker, "speculation.marker"}, }; for (auto &MDKind : MDKinds) { Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5726,6 +5726,169 @@ return tryToVectorizeList(BuildVectorOpds, R, BuildVector, false); } +InsertElementInst *SLPVectorizerPass::restoreInserts(LoadInst *LInstr) { + ShuffleVectorInst *Use = nullptr; + Value *Ptr = nullptr; + SmallDenseSet Offsets; + int64_t OffsetOfLoad; + DenseMap Loads; + DenseMap Inserts; + DenseMap GEPs; + BasicBlock *BB = LInstr->getParent(); + VectorType *VecType = nullptr; + unsigned MaxOffset = 0; + + if (Instruction *Instr = dyn_cast(LInstr->getOperand(0))) { + GetElementPtrInst *GEP = dyn_cast(Instr); + if (!GEP) + return nullptr; + ConstantInt *C = dyn_cast(GEP->getOperand(1)); + if (!C) + return nullptr; + OffsetOfLoad = C->getSExtValue(); + Ptr = GEP->getOperand(0); + GEPs[OffsetOfLoad] = GEP; + Loads[OffsetOfLoad] = LInstr; + } else { + OffsetOfLoad = 0; + Ptr = LInstr->getOperand(0); + Loads[0] = LInstr; + } + + MDNode *MD = LInstr->getMetadata(LLVMContext::MD_speculation_marker); + assert(MD != nullptr && "Load should contain speculation.marker metadata"); + for (int i = 0, e = MD->getNumOperands(); i < e; i++) { + ConstantInt *C = mdconst::dyn_extract(MD->getOperand(i)); + Offsets.insert(OffsetOfLoad + C->getSExtValue()); + } + + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) { + if (InsertElementInst *Insert = dyn_cast(it)) { + ConstantInt *C = dyn_cast(Insert->getOperand(2)); + LoadInst *Load = dyn_cast(Insert->getOperand(1)); + if (!C || !Load) + continue; + GetElementPtrInst *GEP = dyn_cast(Load->getOperand(0)); + Value *Base = (GEP == nullptr) ? Load->getOperand(0) : GEP->getOperand(0); + if (Base == Ptr) { + if (!VecType) + VecType = Insert->getType(); + else if (Insert->getType() != VecType) + return nullptr; + uint64_t Offset = C->getZExtValue(); + Inserts[Offset] = Insert; + Loads[Offset] = Load; + // GEP instruction might be missing for 0 offset. + if (GEP) + GEPs[Offset] = GEP; + if (!Insert->use_empty() && !Use) + if (ShuffleVectorInst *Shuffle = + dyn_cast(Insert->user_back())) + Use = Shuffle; + continue; + } + } + if (GetElementPtrInst *GEP = dyn_cast(it)) + // Examining the chain of GEP, Load, Insert. + if (Ptr == GEP->getOperand(0)) { + ConstantInt *C = dyn_cast(GEP->getOperand(1)); + if (!C || GEP->use_empty() || !GEP->hasOneUse()) + return nullptr; + uint64_t Off = C->getZExtValue(); + if (GEPs[Off] == GEP) + continue; + LoadInst *Load = dyn_cast(GEP->user_back()); + if (!Load) + return nullptr; + if (Load->getMetadata(LLVMContext::MD_speculation_marker) == nullptr) { + InsertElementInst *Insert = + dyn_cast(Load->user_back()); + if (!Insert) + return nullptr; + if (!VecType) + VecType = Insert->getType(); + else if (Insert->getType() != VecType) + return nullptr; + ConstantInt *C1 = cast(Insert->getOperand(2)); + uint64_t Off_Ins = C1->getZExtValue(); + // Offset from GEP must be equal to offset from + // Insert otherwise we have to ignore. + if (Off != Off_Ins) + return nullptr; + if (!Insert->use_empty() && !Use) + if (ShuffleVectorInst *Shuffle = + dyn_cast(Insert->user_back())) + Use = Shuffle; + Inserts[Off] = Insert; + } else + // Strip off speculation.marker for other loads + // in the chain. + Load->setMetadata(LLVMContext::MD_speculation_marker, nullptr); + Loads[Off] = Load; + GEPs[Off] = GEP; + } + } + MaxOffset = VecType->getVectorNumElements(); + if (!Use || Use->getParent() != BB) + return nullptr; + + unsigned PrevOff = 0; + for (auto Off : Offsets) { + if (Off > MaxOffset - 1) + return nullptr; + if ((Off != 0 && GEPs[Off]) || Loads[Off] || Inserts[Off]) + return nullptr; + for (unsigned i = PrevOff + 1; i < Off; i++) + if ((i != 0 && !GEPs[i]) || !Loads[i] || !Inserts[i]) + return nullptr; + PrevOff = Off; + } + + IRBuilder<> Builder(LInstr->getParent(), ++BasicBlock::iterator(LInstr)); + if (!Inserts[0]) { + Builder.SetInsertPoint(LInstr->getPrevNode()); + LoadInst *NewLoad = Builder.CreateLoad(Ptr); + NewLoad->setAlignment(LInstr->getAlignment()); + Loads[0] = NewLoad; + Value *NewInsert = Builder.CreateInsertElement( + UndefValue::get(VecType), NewLoad, Builder.getInt32(0)); + Inserts[0] = cast(NewInsert); + } + for (unsigned i = 1, e = MaxOffset; i < e; i++) { + // Building GEP, Load, Insert for + // this Offset. + GetElementPtrInst *PrevGEP = GEPs[i - 1]; + if (!Inserts[i]) { + assert(Loads[i - 1] != nullptr && + "Couldn't find previous load in the chain."); + if (!PrevGEP) + Builder.SetInsertPoint(BB, ++Loads[i - 1]->getIterator()); + else + Builder.SetInsertPoint(BB, ++PrevGEP->getIterator()); + Value *NewGEP = + Builder.CreateGEP(LInstr->getType(), Ptr, Builder.getInt64(i)); + GEPs[i] = cast(NewGEP); + Builder.SetInsertPoint(BB, ++Loads[i - 1]->getIterator()); + Builder.SetCurrentDebugLocation(LInstr->getDebugLoc()); + LoadInst *NewLoad = Builder.CreateLoad(NewGEP); + NewLoad->setAlignment(LInstr->getAlignment()); + Loads[i] = NewLoad; + // InsertElement must be inserted after + // previous InsertElement. + InsertElementInst *PrevIns = Inserts[i - 1]; + Builder.SetInsertPoint(BB, ++PrevIns->getIterator()); + Value *NewInsert = + Builder.CreateInsertElement(PrevIns, NewLoad, Builder.getInt32(i)); + Inserts[i] = cast(NewInsert); + } + Inserts[i]->setOperand(0, Inserts[i - 1]); + } + + // Update ShuffleVector with restored insert elements. + Use->setOperand(0, Inserts[MaxOffset - 1]); + return cast(Inserts[MaxOffset - 1]); +} + bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB, BoUpSLP &R) { SmallVector BuildVector; @@ -5733,6 +5896,16 @@ if (!findBuildVector(IEI, BuildVector, BuildVectorOpds)) return false; + if (LoadInst *LInstr = dyn_cast(IEI->getOperand(1))) { + if (LInstr->getMetadata(LLVMContext::MD_speculation_marker) != nullptr) + if (InsertElementInst *Insert = restoreInserts(LInstr)) { + BuildVector.clear(); + BuildVectorOpds.clear(); + if (!findBuildVector(Insert, BuildVector, BuildVectorOpds)) + return false; + } + } + // Vectorize starting with the build vector operands ignoring the BuildVector // instructions for the purpose of scheduling and user extraction. return tryToVectorizeList(BuildVectorOpds, R, BuildVector); Index: test/ThinLTO/X86/lazyload_metadata.ll =================================================================== --- test/ThinLTO/X86/lazyload_metadata.ll +++ test/ThinLTO/X86/lazyload_metadata.ll @@ -10,13 +10,13 @@ ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=LAZY -; LAZY: 55 bitcode-reader - Number of Metadata records loaded +; LAZY: 57 bitcode-reader - Number of Metadata records loaded ; LAZY: 2 bitcode-reader - Number of MDStrings loaded ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -disable-ondemand-mds-loading -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=NOTLAZY -; NOTLAZY: 64 bitcode-reader - Number of Metadata records loaded +; NOTLAZY: 66 bitcode-reader - Number of Metadata records loaded ; NOTLAZY: 7 bitcode-reader - Number of MDStrings loaded Index: test/Transforms/SLPVectorizer/X86/pr21780.ll =================================================================== --- /dev/null +++ test/Transforms/SLPVectorizer/X86/pr21780.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -S | FileCheck %s + +define <4 x double> @foo(double* %ptr) #0 { +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, double* [[PTR]], i64 3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, double* [[PTR]], i64 1 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[PTR]] to <4 x double>* +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* [[TMP3]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[TMP4]], i32 0 +; CHECK-NEXT: [[INS0:%.*]] = insertelement <4 x double> undef, double [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x double> [[INS0]], double [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP4]], i32 2 +; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x double> [[TMP7]], double [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x double> [[INS2]], double [[TMP9]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> undef, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[SHUFFLE]] +; + %arrayidx2 = getelementptr inbounds double, double* %ptr, i64 2 + %ld0 = load double, double* %ptr, align 8, !speculation.marker !0 + %ld2 = load double, double* %arrayidx2, align 8, !speculation.marker !1 + %ins0 = insertelement <4 x double> undef, double %ld0, i32 0 + %ins2 = insertelement <4 x double> %ins0, double %ld2, i32 2 + %shuffle = shufflevector <4 x double> %ins2, <4 x double> undef, <4 x i32> + ret <4 x double> %shuffle +} + +define <4 x double> @bar(double* %ptr) #0 { +; CHECK-LABEL: @bar( +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, double* [[PTR]], i64 3 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[PTR]] to <4 x double>* +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* [[TMP2]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> undef, double [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP3]], i32 1 +; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x double> [[TMP5]], double [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP3]], i32 2 +; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x double> [[INS1]], double [[TMP7]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP3]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x double> [[INS2]], double [[TMP8]], i32 3 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> undef, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[SHUFFLE]] +; + %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 1 + %arrayidx2 = getelementptr inbounds double, double* %ptr, i64 2 + %ld1 = load double, double* %arrayidx1, align 8, !speculation.marker !2 + %ld2 = load double, double* %arrayidx2, align 8, !speculation.marker !3 + %ins1 = insertelement <4 x double> undef, double %ld1, i32 1 + %ins2 = insertelement <4 x double> %ins1, double %ld2, i32 2 + %shuffle = shufflevector <4 x double> %ins2, <4 x double> undef, <4 x i32> + ret <4 x double> %shuffle +} + +attributes #0 = { "target-cpu"="bdver2" } + +!0 = !{i64 1, i64 3} +!1 = !{i64 -1, i64 1} +!2 = !{i64 -1, i64 2} +!3 = !{i64 -2, i64 1}