diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp --- a/llvm/lib/Target/X86/X86LowerAMXType.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp @@ -175,25 +175,8 @@ void combineLoadBitcast(LoadInst *LD, BitCastInst *Bitcast); void combineBitcastStore(BitCastInst *Bitcast, StoreInst *ST); bool transformBitcast(BitCastInst *Bitcast); - Value *getRowFromCol(Instruction *II, Value *V, unsigned Granularity); }; -Value *X86LowerAMXType::getRowFromCol(Instruction *II, Value *V, - unsigned Granularity) { - if (Col2Row.count(V)) - return Col2Row[V]; - IRBuilder<> Builder(&*II->getParent()->getFirstInsertionPt()); - if (auto *I = dyn_cast(V)) { - BasicBlock::iterator Iter = I->getIterator(); - ++Iter; - Builder.SetInsertPoint(&*Iter); - } - ConstantInt *Gran = Builder.getInt16(Granularity); - Value *RealRow = Builder.CreateUDiv(V, Gran); - Col2Row[V] = RealRow; - return RealRow; -} - // %src = load <256 x i32>, <256 x i32>* %addr, align 64 // %2 = bitcast <256 x i32> %src to x86_amx // --> @@ -898,10 +881,12 @@ Convert(Vec2TileInsts, Intrinsic::x86_cast_tile_to_vector); Convert(Tile2VecInsts, Intrinsic::x86_cast_vector_to_tile); - auto EraseInst = [](SmallVectorImpl &Insts) { + auto EraseInst = [&](SmallVectorImpl &Insts) { for (auto *Inst : Insts) { - if (Inst->use_empty()) + if (Inst->use_empty()) { Inst->eraseFromParent(); + Change = true; + } } }; @@ -912,7 +897,7 @@ for (BasicBlock &BB : Func) { for (Instruction &I : BB) { if (isAMXCast(&I)) { - if (PHINode *PN = dyn_cast(I.getOperand(0))) + if (dyn_cast(I.getOperand(0))) PhiCastWorkList.push_back(&I); } } @@ -1036,17 +1021,18 @@ } bool runOnFunction(Function &F) override { + bool C = false; TargetMachine *TM = &getAnalysis().getTM(); TargetLibraryInfo *TLI = &getAnalysis().getTLI(F); X86LowerAMXCast LAC(F); - LAC.combineAMXcast(TLI); + C |= LAC.combineAMXcast(TLI); // There might be remaining AMXcast after combineAMXcast and they should be // handled elegantly. - LAC.transformAllAMXCast(); + C |= LAC.transformAllAMXCast(); X86LowerAMXType LAT(F); - bool C = LAT.visit(); + C |= LAT.visit(); // Prepare for fast register allocation at O0. // Todo: May better check the volatile model of AMX code, not just