Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -577,6 +577,10 @@ return true; } + virtual unsigned changeAArch64Opcode(MachineInstr *MInstr) const { + return 0; + } + /// Represents a predicate at the MachineFunction level. The control flow a /// MachineBranchPredicate represents is: /// Index: lib/CodeGen/MachineCSE.cpp =================================================================== --- lib/CodeGen/MachineCSE.cpp +++ lib/CodeGen/MachineCSE.cpp @@ -38,6 +38,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include #include #include @@ -56,6 +57,12 @@ STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); namespace { + // struct for storing constant value information like value, register and instruction. + struct ConstMInst { + int OriginalValue = 0; + unsigned RegValue; + MachineInstr *ConstMInstr; + }; class MachineCSE : public MachineFunctionPass { const TargetInstrInfo *TII; @@ -63,6 +70,7 @@ AliasAnalysis *AA; MachineDominatorTree *DT; MachineRegisterInfo *MRI; + SmallVector constMInstVec; public: static char ID; // Pass identification @@ -123,6 +131,8 @@ void ExitScopeIfDone(MachineDomTreeNode *Node, DenseMap &OpenChildren); bool PerformCSE(MachineDomTreeNode *Node); + void AnalyseConstValueUses(MachineFunction &MF); + void updateRegInfoAndInstr(MachineBasicBlock *MBB); }; } // end anonymous namespace @@ -488,12 +498,131 @@ ScopeMap.erase(SI); } +// e.g: int test(int* x) +// { +// if(x > (int)655360000L) +// return *x - (int)655360000L; +// return x; +// } + +// The above code uses the constant 655360000, once in a compare, once in a subtract. +// AC6 will convert the subtract to an addition by a negative, which means +// that it needs to materialise both the constant and the negative constant. + +// To fix this issue, we keep track of the constants used and if the same +// constant or its negative is used in more than one place, we replace the +// next use register with previous const reg and change the instruction +// from Add to Sub. +void MachineCSE::updateRegInfoAndInstr(MachineBasicBlock *MBB) +{ + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { + MachineInstr *MI = &*I; + ++I; + // checking if instruction is Move immediate or not + if (MI->isMoveImmediate()) { + int ConstNum = MI->getOperand(1).getImm(); + // iterate through all the constant values list in current function + for (SmallVector::iterator it = constMInstVec.begin(); + it != constMInstVec.end(); ++it) { + // if current instruction(MI) and instruction from list(it->ConstMInstr) + // both are same continue. + if(it->ConstMInstr == MI) + continue; + // serching current move instruction constant value is using in any where + // in current function or constant value +1 or -1 or negative of that value. + // if it's have uses in this function have to iterate through + // the instruction and find out the def register. + if (it->OriginalValue == ConstNum || + it->OriginalValue == ConstNum + 1 || + it->OriginalValue == ConstNum - 1 || + it->OriginalValue == -ConstNum || + it->OriginalValue == -ConstNum + 1 || + it->OriginalValue == -ConstNum - 1) { + for (MachineInstr::mop_iterator MOI = it->ConstMInstr->operands_begin(), + MOE = it->ConstMInstr->operands_end(); + MOI != MOE; ++MOI) { + // if operand is not register and operand is not def then continue. + if (!MOI->isReg()) + continue; + if (!MOI->isDef()) + continue; + + unsigned Newreg = MI->getOperand(0).getReg(); + unsigned Reg = MOI->getReg(); + // find out the uses of def register(from it->ConstMInstr) and + // iterate through all the use instructions. + for (MachineRegisterInfo::use_instr_iterator + UI = MRI->use_instr_begin(Reg), + E = MRI->use_instr_end(); UI != E; ) { + MachineInstr *UseMI = &*(UI++); + MachineBasicBlock *MBBL = UseMI->getParent(); + // if have any instruction is cmp inst in Machine Basic Block + // dont do this transformation. + MachineBasicBlock::instr_iterator Inst; + for (Inst = MBBL->instr_begin(); + Inst != MBBL->instr_end(); ++Inst) + if (Inst->getOpcode() == TargetOpcode::G_ICMP) + return; + + // iterate through the first move instruction and find out the uses of def register. + // if def reg is killed in first use and we want use this + // def reg in below instructions have same const value uses, + // have reset the kill as false. and in final use it will kill this def reg after all uses. + for (MachineRegisterInfo::use_instr_iterator + RegUI = MRI->use_instr_begin(Newreg), + RegE = MRI->use_instr_end(); RegUI != RegE; ) { + MachineInstr *RegUseMI = &*(RegUI++); + for (MachineInstr::mop_iterator MO = RegUseMI->operands_begin(), + ME = RegUseMI->operands_end(); + MO != ME; ++MO) { + if (!MO->isReg()) + continue; + if (MO->isDef()) + continue; + if(Newreg == MO->getReg()) { + if(MO->isKill()) + MO->setIsKill(false); + } + } + } + const DebugLoc DL = UseMI->getDebugLoc(); + MachineBasicBlock::iterator InstIter = UseMI->getIterator(); + unsigned TargetOpc = TII->changeAArch64Opcode(UseMI); + // if instruction is either Add or SUb, + // then the instruction is add change to sub and vice versa. + if (TII->changeAArch64Opcode(UseMI)) { + BuildMI(*MBBL, InstIter, DL, TII->get(TargetOpc)) + .add(UseMI->getOperand(0)) + .add(UseMI->getOperand(1)) + .add(UseMI->getOperand(2)); + // replace the constant uses reg with top const num use reg value. + MRI->replaceRegWith(Reg, Newreg); + // erase the second move instruction from parent basic block. + it->ConstMInstr->eraseFromParent(); + // remove old instruction (Add or Sub) from Basic Block. + UseMI->eraseFromParent(); + } + } + } + } + } + } + } +} + bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool Changed = false; SmallVector, 8> CSEPairs; SmallVector ImplicitDefsToUpdate; SmallVector ImplicitDefs; + + // if opt level is either -Oz or -Os, call updateRegInfoAndInstr() API. + // if able to change add to sub or sub to add. + if (MBB->getParent()->getFunction().optForSize()) { + updateRegInfoAndInstr(MBB); + } + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; @@ -746,15 +875,40 @@ return Changed; } +// iterate through current function and find out the constant values and uses +// of those values, and store those constant values and instructions into list. +void MachineCSE::AnalyseConstValueUses(MachineFunction &MF) { + for (MachineBasicBlock &MI : MF) { + for (MachineInstr &I : MI) { + if (I.isMoveImmediate()) { + int ConstNum = I.getOperand(1).getImm(); + ConstMInst constObj; + constObj.OriginalValue = ConstNum; + constObj.RegValue = I.getOperand(0).getReg(); + constObj.ConstMInstr = &I; + + constMInstVec.push_back(constObj); + } + } + } +} + bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) return false; - + TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); AA = &getAnalysis().getAAResults(); DT = &getAnalysis(); LookAheadLimit = TII->getMachineCSELookAheadLimit(); + + // if optimization level is Oz or Os, then we are analyzing the instructions for constant values and the uses of those values. + if (MF.getFunction().optForSize()) { + constMInstVec.clear(); + AnalyseConstValueUses(MF); + } return PerformCSE(DT->getRootNode()); } Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -162,6 +162,9 @@ MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify = false) const override; + + unsigned changeAArch64Opcode(MachineInstr *MInstr) const override; + unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -290,6 +290,35 @@ return true; } +unsigned AArch64InstrInfo::changeAArch64Opcode(MachineInstr *MInstr) const { + switch (MInstr->getOpcode()) { + default: + return false; + case AArch64::ADDWrr: + return AArch64::SUBWrr; + case AArch64::ADDWrs: + return AArch64::SUBWrs; + case AArch64::ADDWrx: + return AArch64::SUBWrx; + case AArch64::ADDWri: + return AArch64::SUBWri; + case AArch64::ADDXri: + return AArch64::SUBXri; + case AArch64::ADDXrr: + return AArch64::SUBXrr; + case AArch64::ADDXrs: + return AArch64::SUBXrs; + case AArch64::ADDXrx: + return AArch64::SUBXrx; + case AArch64::SUBWri: + return AArch64::ADDWri; + case AArch64::SUBWrr: + return AArch64::ADDWrr; + case AArch64::SUBXri: + return AArch64::ADDXri; + } +} + bool AArch64InstrInfo::reverseBranchCondition( SmallVectorImpl &Cond) const { if (Cond[0].getImm() != -1) { Index: test/CodeGen/AArch64/better_use_of_existing_constants.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/better_use_of_existing_constants.ll @@ -0,0 +1,44 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64-apple-darwin -mcpu=cortex-a53 < %s | FileCheck %s +; ModuleID = '../test_51313.c' +source_filename = "../test_51313.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-arm-none-eabi" + +; Function Attrs: minsize norecurse nounwind optsize readonly +;CHECK-LABEL: @test +;CHECK: mov +;CHECK-NEXT: movk +;CHECK: ldr +;CHECK-NEXT: sub +;CHECK-NOT: add + +define dso_local i32 @test(i32*) local_unnamed_addr #0 { + %2 = icmp ugt i32* %0, inttoptr (i64 655360000 to i32*) + br i1 %2, label %3, label %6 + +3: ; preds = %1 + %4 = load i32, i32* %0, align 4, !tbaa !2 + %5 = add nsw i32 %4, -655360000 + br label %9 + +6: ; preds = %1 + %7 = ptrtoint i32* %0 to i64 + %8 = trunc i64 %7 to i32 + br label %9 + +9: ; preds = %6, %3 + %10 = phi i32 [ %5, %3 ], [ %8, %6 ] + ret i32 %10 +} + +attributes #0 = { minsize norecurse nounwind optsize readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a53" "target-features"="+aes,+crc,+crypto,+fp-armv8,+neon,+sha2" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 9.0.0 (https://git.llvm.org/git/clang.git/ a67b0501cbbab83a4a0795365ef9c38f0a6473f6) (https://git.llvm.org/git/llvm.git/ 5b28ef618dab3056eb1ce524dd504f9031ab88dc)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"}