Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp @@ -47,25 +47,73 @@ return false; } } -} +/// Return true if the given instruction always produces a value in memory +/// equivelent to its value operand. +bool isSaturating(AtomicRMWInst& RMWI) { + auto C = dyn_cast(RMWI.getValOperand()); + if(!C) + return false; + + AtomicRMWInst::BinOp Op = RMWI.getOperation(); + switch(Op) { + default: + // TODO: fadd, fsub w/Nan + // Note: We avoid listing xchg to prevent transform cycles. + return false; + case AtomicRMWInst::Or: + return C->isAllOnesValue(); + case AtomicRMWInst::And: + return C->isZero(); + case AtomicRMWInst::Min: + return C->isMinValue(true); + case AtomicRMWInst::Max: + return C->isMaxValue(true); + case AtomicRMWInst::UMin: + return C->isMinValue(false); + case AtomicRMWInst::UMax: + return C->isMaxValue(false); + }; +} +} Instruction *InstCombiner::visitAtomicRMWInst(AtomicRMWInst &RMWI) { - // TODO: Any atomicrmw op which produces a known result in memory can be - // replaced w/an atomicrmw xchg. (see getBinOpAbsorber) - - // TODO: Any atomicrmw xchg with no uses can be converted to a atomic store - // if the ordering is compatible. - - if (!isIdempotentRMW(RMWI)) - return nullptr; // Volatile RMWs perform a load and a store, we cannot replace this by just a - // load. We chose not to canonicalize out of general paranoia about user - // expectations around volatile. + // load or just a store. We chose not to canonicalize out of general paranoia + // about user expectations around volatile. if (RMWI.isVolatile()) return nullptr; + // Any atomicrmw op which produces a known result in memory can be + // replaced w/an atomicrmw xchg. + if (isSaturating(RMWI)) { + RMWI.setOperation(AtomicRMWInst::Xchg); + return &RMWI; + } + + AtomicOrdering Ordering = RMWI.getOrdering(); + assert(Ordering != AtomicOrdering::NotAtomic && + Ordering != AtomicOrdering::Unordered && + "AtomicRMWs don't make sense with Unordered or NotAtomic"); + + // Any atomicrmw xchg with no uses can be converted to a atomic store if the + // ordering is compatible. + if (RMWI.getOperation() == AtomicRMWInst::Xchg && + RMWI.use_empty()) { + if (Ordering != AtomicOrdering::Release && + Ordering != AtomicOrdering::Monotonic) + return nullptr; + auto *SI = new StoreInst(RMWI.getValOperand(), + RMWI.getPointerOperand(), &RMWI); + SI->setAtomic(Ordering, RMWI.getSyncScopeID()); + SI->setAlignment(DL.getABITypeAlignment(RMWI.getType())); + return eraseInstFromFunction(RMWI); + } + + if (!isIdempotentRMW(RMWI)) + return nullptr; + // We chose to canonicalize all idempotent operations to an single // operation code and constant. This makes it easier for the rest of the // optimizer to match easily. The choice of or w/zero is arbitrary. @@ -77,10 +125,6 @@ } // Check if the required ordering is compatible with an atomic load. - AtomicOrdering Ordering = RMWI.getOrdering(); - assert(Ordering != AtomicOrdering::NotAtomic && - Ordering != AtomicOrdering::Unordered && - "AtomicRMWs don't make sense with Unordered or NotAtomic"); if (Ordering != AtomicOrdering::Acquire && Ordering != AtomicOrdering::Monotonic) return nullptr; Index: llvm/trunk/test/Transforms/InstCombine/atomicrmw.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/atomicrmw.ll +++ llvm/trunk/test/Transforms/InstCombine/atomicrmw.ll @@ -137,22 +137,22 @@ ; CHECK-LABEL: sat_or_allones -; CHECK-NEXT: %res = atomicrmw add i32* %addr, i32 -1 monotonic +; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 -1 monotonic ; CHECK-NEXT: ret i32 %res define i32 @sat_or_allones(i32* %addr) { - %res = atomicrmw add i32* %addr, i32 -1 monotonic + %res = atomicrmw or i32* %addr, i32 -1 monotonic ret i32 %res } ; CHECK-LABEL: sat_and_zero -; CHECK-NEXT: %res = atomicrmw and i32* %addr, i32 0 monotonic +; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 0 monotonic ; CHECK-NEXT: ret i32 %res define i32 @sat_and_zero(i32* %addr) { %res = atomicrmw and i32* %addr, i32 0 monotonic ret i32 %res } ; CHECK-LABEL: sat_umin_uint_min -; CHECK-NEXT: %res = atomicrmw umin i32* %addr, i32 0 monotonic +; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 0 monotonic ; CHECK-NEXT: ret i32 %res define i32 @sat_umin_uint_min(i32* %addr) { %res = atomicrmw umin i32* %addr, i32 0 monotonic @@ -160,7 +160,7 @@ } ; CHECK-LABEL: sat_umax_uint_max -; CHECK-NEXT: %res = atomicrmw umax i32* %addr, i32 -1 monotonic +; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 -1 monotonic ; CHECK-NEXT: ret i32 %res define i32 @sat_umax_uint_max(i32* %addr) { %res = atomicrmw umax i32* %addr, i32 -1 monotonic @@ -168,7 +168,7 @@ } ; CHECK-LABEL: sat_min_smin_char -; CHECK-NEXT: %res = atomicrmw min i8* %addr, i8 -128 monotonic +; CHECK-NEXT: %res = atomicrmw xchg i8* %addr, i8 -128 monotonic ; CHECK-NEXT: ret i8 %res define i8 @sat_min_smin_char(i8* %addr) { %res = atomicrmw min i8* %addr, i8 -128 monotonic @@ -176,7 +176,7 @@ } ; CHECK-LABEL: sat_max_smax_char -; CHECK-NEXT: %res = atomicrmw max i8* %addr, i8 127 monotonic +; CHECK-NEXT: %res = atomicrmw xchg i8* %addr, i8 127 monotonic ; CHECK-NEXT: ret i8 %res define i8 @sat_max_smax_char(i8* %addr) { %res = atomicrmw max i8* %addr, i8 127 monotonic @@ -184,7 +184,7 @@ } ; CHECK-LABEL: xchg_unused_monotonic -; CHECK-NEXT: atomicrmw xchg i32* %addr, i32 0 monotonic +; CHECK-NEXT: store atomic i32 0, i32* %addr monotonic, align 4 ; CHECK-NEXT: ret void define void @xchg_unused_monotonic(i32* %addr) { atomicrmw xchg i32* %addr, i32 0 monotonic @@ -192,7 +192,7 @@ } ; CHECK-LABEL: xchg_unused_release -; CHECK-NEXT: atomicrmw xchg i32* %addr, i32 -1 release +; CHECK-NEXT: store atomic i32 -1, i32* %addr release, align 4 ; CHECK-NEXT: ret void define void @xchg_unused_release(i32* %addr) { atomicrmw xchg i32* %addr, i32 -1 release @@ -215,6 +215,13 @@ ret void } +; CHECK-LABEL: sat_or_allones_unused +; CHECK-NEXT: store atomic i32 -1, i32* %addr monotonic, align 4 +; CHECK-NEXT: ret void +define void @sat_or_allones_unused(i32* %addr) { + atomicrmw or i32* %addr, i32 -1 monotonic + ret void +}