diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp --- a/llvm/lib/Target/X86/X86LowerAMXType.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp @@ -898,10 +898,12 @@ Convert(Vec2TileInsts, Intrinsic::x86_cast_tile_to_vector); Convert(Tile2VecInsts, Intrinsic::x86_cast_vector_to_tile); - auto EraseInst = [](SmallVectorImpl &Insts) { + auto EraseInst = [&](SmallVectorImpl &Insts) { for (auto *Inst : Insts) { - if (Inst->use_empty()) + if (Inst->use_empty()) { Inst->eraseFromParent(); + Change = true; + } } }; @@ -912,7 +914,7 @@ for (BasicBlock &BB : Func) { for (Instruction &I : BB) { if (isAMXCast(&I)) { - if (PHINode *PN = dyn_cast(I.getOperand(0))) + if (isa(I.getOperand(0))) PhiCastWorkList.push_back(&I); } } @@ -1036,17 +1038,18 @@ } bool runOnFunction(Function &F) override { + bool C = false; TargetMachine *TM = &getAnalysis().getTM(); TargetLibraryInfo *TLI = &getAnalysis().getTLI(F); X86LowerAMXCast LAC(F); - LAC.combineAMXcast(TLI); + C |= LAC.combineAMXcast(TLI); // There might be remaining AMXcast after combineAMXcast and they should be // handled elegantly. - LAC.transformAllAMXCast(); + C |= LAC.transformAllAMXCast(); X86LowerAMXType LAT(F); - bool C = LAT.visit(); + C |= LAT.visit(); // Prepare for fast register allocation at O0. // Todo: May better check the volatile model of AMX code, not just