Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -28,12 +28,14 @@ class TargetMachine; class TargetRegisterClass; class raw_ostream; +struct SimplifyCFGOptions; } // End llvm namespace /// List of target independent CodeGen pass IDs. namespace llvm { FunctionPass *createAtomicExpandPass(); + FunctionPass *createAtomicExpandPass(const SimplifyCFGOptions &Opts); /// createUnreachableBlockEliminationPass - The LLVM code generator does not /// work well with unreachable basic blocks (what live ranges make sense for a Index: lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- lib/CodeGen/AtomicExpandPass.cpp +++ lib/CodeGen/AtomicExpandPass.cpp @@ -18,6 +18,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/Utils/Local.h" #include "llvm/CodeGen/AtomicExpandUtils.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/TargetLowering.h" @@ -58,16 +60,26 @@ class AtomicExpand: public FunctionPass { const TargetLowering *TLI = nullptr; + const TargetTransformInfo *TTI = nullptr; + // If the CFG simplification options are present, the newly created + // basic blocks will be simplified at the end of runOnFunction. + Optional SimplifyOpts; + SmallVector SimplifyBlocks; public: static char ID; // Pass identification, replacement for typeid - AtomicExpand() : FunctionPass(ID) { + AtomicExpand(Optional Opts = None) + : FunctionPass(ID), SimplifyOpts(Opts) { initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } + private: bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); @@ -125,7 +137,13 @@ INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, false) -FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } +FunctionPass *llvm::createAtomicExpandPass() { + return new AtomicExpand(None); +} + +FunctionPass *llvm::createAtomicExpandPass(const SimplifyCFGOptions &Opts) { + return new AtomicExpand(Opts); +} // Helper functions to retrieve the size of atomic instructions. static unsigned getAtomicOpSize(LoadInst *LI) { @@ -200,6 +218,8 @@ if (!TM.getSubtargetImpl(F)->enableAtomicExpand()) return false; TLI = TM.getSubtargetImpl(F)->getTargetLowering(); + TTI = &getAnalysis().getTTI(F); + SimplifyBlocks.clear(); SmallVector AtomicInsts; @@ -334,6 +354,11 @@ } } } + + if (SimplifyOpts) + for (BasicBlock *B : SimplifyBlocks) + simplifyCFG(B, *TTI, *SimplifyOpts); + return MadeChange; } @@ -857,6 +882,8 @@ CI->replaceAllUsesWith(Res); CI->eraseFromParent(); + + SimplifyBlocks.insert(SimplifyBlocks.end(), { EndBB, FailureBB, LoopBB }); } void AtomicExpand::expandAtomicOpToLLSC( @@ -913,6 +940,8 @@ Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + + SimplifyBlocks.insert(SimplifyBlocks.end(), { ExitBB, LoopBB }); return Loaded; } @@ -1181,6 +1210,11 @@ } CI->eraseFromParent(); + + SimplifyBlocks.insert(SimplifyBlocks.end(), + { ExitBB, FailureBB, NoStoreBB, SuccessBB, + ReleasedLoadBB, TryStoreBB, ReleasingStoreBB, + StartBB }); return true; } Index: lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetMachine.cpp +++ lib/Target/AArch64/AArch64TargetMachine.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/Utils/Local.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" @@ -363,7 +364,10 @@ void AArch64PassConfig::addIRPasses() { // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg // ourselves. - addPass(createAtomicExpandPass()); + if (EnableAtomicTidy) + addPass(createAtomicExpandPass(SimplifyCFGOptions())); + else + addPass(createAtomicExpandPass()); // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in Index: lib/Target/Hexagon/HexagonTargetMachine.cpp =================================================================== --- lib/Target/Hexagon/HexagonTargetMachine.cpp +++ lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -17,6 +17,7 @@ #include "HexagonMachineScheduler.h" #include "HexagonTargetObjectFile.h" #include "HexagonTargetTransformInfo.h" +#include "llvm/Analysis/Utils/Local.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/LegacyPassManager.h" @@ -310,7 +311,7 @@ addPass(createDeadCodeEliminationPass()); } - addPass(createAtomicExpandPass()); + addPass(createAtomicExpandPass(SimplifyCFGOptions())); if (!NoOpt) { if (EnableLoopPrefetch) addPass(createLoopDataPrefetchPass()); Index: test/CodeGen/AArch64/atomic-expand-simplify-cfg.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/atomic-expand-simplify-cfg.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=aarch64 < %s | FileCheck %s + +; The atomic expansion pass generates CFG structures that contain phi nodes +; for boolean values. +; Check that this is optimized away. + +; There should be no uses of WZR. +; CHECK-NOT: orr{{.*}}wzr +; CHECK-NOT: mov{{.*}}wzr + +define dso_local i32 @f0(i32* nocapture readnone %a0) { +b0: + %v0 = load volatile i32, i32* %a0, align 8 + %v1 = add i32 %v0, 1 + %v2 = icmp eq i32 %v1, 17 + %v3 = select i1 %v2, i32 0, i32 %v1 + %v4 = cmpxchg i32* %a0, i32 %v0, i32 %v3 seq_cst seq_cst + %v5 = extractvalue { i32, i1 } %v4, 1 + br i1 %v5, label %b2, label %b1 + +b1: ; preds = %b0 + ret i32 123 + +b2: ; preds = %b0 + ret i32 321 +} Index: test/CodeGen/Hexagon/atomic-expand-simplify-cfg.ll =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/atomic-expand-simplify-cfg.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; The atomic expansion pass generates CFG structures that contain phi nodes +; for boolean values. +; Check that this is optimized away. + +; CHECK-NOT: p{{[0-3]}} = or +; CHECK-NOT: p{{[0-3]}} = and + +define dso_local i32 @f0(i32* nocapture readnone %a0) { +b0: + %v0 = load volatile i32, i32* %a0, align 8 + %v1 = add i32 %v0, 1 + %v2 = icmp eq i32 %v1, 17 + %v3 = select i1 %v2, i32 0, i32 %v1 + %v4 = cmpxchg i32* %a0, i32 %v0, i32 %v3 seq_cst seq_cst + %v5 = extractvalue { i32, i1 } %v4, 1 + br i1 %v5, label %b2, label %b1 + +b1: ; preds = %b0 + ret i32 123 + +b2: ; preds = %b0 + ret i32 321 +}