Index: llvm/trunk/lib/Transforms/InstCombine/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/CMakeLists.txt +++ llvm/trunk/lib/Transforms/InstCombine/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMInstCombine InstructionCombining.cpp InstCombineAddSub.cpp + InstCombineAtomicRMW.cpp InstCombineAndOrXor.cpp InstCombineCalls.cpp InstCombineCasts.cpp Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp @@ -0,0 +1,48 @@ +//===- InstCombineAtomicRMW.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the visit functions for atomic rmw instructions. +// +//===----------------------------------------------------------------------===// +#include "InstCombineInternal.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + +Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) { + switch (RMWI.getOperation()) { + default: + break; + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + case AtomicRMWInst::Or: + // Replace atomicrmw addr, 0 => load atomic addr. + + // Volatile RMWs perform a load and a store, we cannot replace + // this by just a load. + if (RMWI.isVolatile()) + break; + + auto *CI = dyn_cast(RMWI.getValOperand()); + if (!CI || !CI->isZero()) + break; + // Check if the required ordering is compatible with an + // atomic load. + AtomicOrdering Ordering = RMWI.getOrdering(); + assert(Ordering != AtomicOrdering::NotAtomic && + Ordering != AtomicOrdering::Unordered && + "AtomicRMWs don't make sense with Unordered or NotAtomic"); + if (Ordering != AtomicOrdering::Acquire && + Ordering != AtomicOrdering::Monotonic) + break; + LoadInst *Load = new LoadInst(RMWI.getType(), RMWI.getPointerOperand()); + Load->setAtomic(Ordering, RMWI.getSyncScopeID()); + return Load; + } + return nullptr; +} Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h @@ -401,6 +401,7 @@ Instruction *visitFree(CallInst &FI); Instruction *visitLoadInst(LoadInst &LI); Instruction *visitStoreInst(StoreInst &SI); + Instruction *visitAtomicRMWInst(AtomicRMWInst &SI); Instruction *visitBranchInst(BranchInst &BI); Instruction *visitFenceInst(FenceInst &FI); Instruction *visitSwitchInst(SwitchInst &SI); Index: llvm/trunk/test/Transforms/InstCombine/atomicrmw.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/atomicrmw.ll +++ llvm/trunk/test/Transforms/InstCombine/atomicrmw.ll @@ -0,0 +1,84 @@ +; RUN: opt -instcombine -S -o - %s | FileCheck %s +; Check that we can replace `atomicrmw LHS, 0` with `load atomic LHS`. +; This is possible when: +; - LHS, 0 == LHS +; - the ordering of atomicrmw is compatible with a load (i.e., no release semantic) + +; CHECK-LABEL: atomic_add_zero +; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4 +; CHECK-NEXT: ret i32 %res +define i32 @atomic_add_zero(i32* %addr) { + %res = atomicrmw add i32* %addr, i32 0 monotonic + ret i32 %res +} + +; Don't transform volatile atomicrmw. This would eliminate a volatile store +; otherwise. +; CHECK-LABEL: atomic_sub_zero_volatile +; CHECK-NEXT: %res = atomicrmw volatile sub i64* %addr, i64 0 acquire +; CHECK-NEXT: ret i64 %res +define i64 @atomic_sub_zero_volatile(i64* %addr) { + %res = atomicrmw volatile sub i64* %addr, i64 0 acquire + ret i64 %res +} + + +; Check that the transformation properly preserve the syncscope. +; CHECK-LABEL: atomic_or_zero +; CHECK-NEXT: %res = load atomic i16, i16* %addr syncscope("some_syncscope") acquire, align 2 +; CHECK-NEXT: ret i16 %res +define i16 @atomic_or_zero(i16* %addr) { + %res = atomicrmw or i16* %addr, i16 0 syncscope("some_syncscope") acquire + ret i16 %res +} + +; Don't transform seq_cst ordering. +; By eliminating the store part of the atomicrmw, we would get rid of the +; release semantic, which is incorrect. +; CHECK-LABEL: atomic_or_zero_seq_cst +; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 seq_cst +; CHECK-NEXT: ret i16 %res +define i16 @atomic_or_zero_seq_cst(i16* %addr) { + %res = atomicrmw or i16* %addr, i16 0 seq_cst + ret i16 %res +} + +; Check that the transformation does not apply when the value is changed by +; the atomic operation (non zero constant). +; CHECK-LABEL: atomic_or_non_zero +; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 2 monotonic +; CHECK-NEXT: ret i16 %res +define i16 @atomic_or_non_zero(i16* %addr) { + %res = atomicrmw or i16* %addr, i16 2 monotonic + ret i16 %res +} + +; Check that the transformation does not apply when the value is changed by +; the atomic operation (xor operation with zero). +; CHECK-LABEL: atomic_xor_zero +; CHECK-NEXT: %res = atomicrmw xor i16* %addr, i16 0 monotonic +; CHECK-NEXT: ret i16 %res +define i16 @atomic_xor_zero(i16* %addr) { + %res = atomicrmw xor i16* %addr, i16 0 monotonic + ret i16 %res +} + +; Check that the transformation does not apply when the ordering is +; incompatible with a load (release). +; CHECK-LABEL: atomic_or_zero_release +; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 release +; CHECK-NEXT: ret i16 %res +define i16 @atomic_or_zero_release(i16* %addr) { + %res = atomicrmw or i16* %addr, i16 0 release + ret i16 %res +} + +; Check that the transformation does not apply when the ordering is +; incompatible with a load (acquire, release). +; CHECK-LABEL: atomic_or_zero_acq_rel +; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 acq_rel +; CHECK-NEXT: ret i16 %res +define i16 @atomic_or_zero_acq_rel(i16* %addr) { + %res = atomicrmw or i16* %addr, i16 0 acq_rel + ret i16 %res +}