Index: lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp +++ lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp @@ -21,12 +21,21 @@ /// TODO: Common w/ the version in AtomicExpandPass, and change the term used. /// Idemptotent is confusing in this context. bool isIdempotentRMW(AtomicRMWInst& RMWI) { + AtomicRMWInst::BinOp Op = RMWI.getOperation(); + if (auto CF = dyn_cast(RMWI.getValOperand())) + switch(Op) { + case AtomicRMWInst::FAdd: // -0.0 + return CF->isZero() && CF->isNegative(); + case AtomicRMWInst::FSub: // +0.0 + return CF->isZero() && !CF->isNegative(); + default: + return false; + }; + auto C = dyn_cast(RMWI.getValOperand()); if(!C) - // TODO: Handle fadd, fsub? return false; - AtomicRMWInst::BinOp Op = RMWI.getOperation(); switch(Op) { case AtomicRMWInst::Add: case AtomicRMWInst::Sub: @@ -68,12 +77,18 @@ // We chose to canonicalize all idempotent operations to an single // operation code and constant. This makes it easier for the rest of the - // optimizer to match easily. The choice of or w/zero is arbitrary. + // optimizer to match easily. The choices of or w/0 and fadd w/-0.0 are + // arbitrary. if (RMWI.getType()->isIntegerTy() && RMWI.getOperation() != AtomicRMWInst::Or) { RMWI.setOperation(AtomicRMWInst::Or); RMWI.setOperand(1, ConstantInt::get(RMWI.getType(), 0)); return &RMWI; + } else if (RMWI.getType()->isFloatingPointTy() && + RMWI.getOperation() != AtomicRMWInst::FAdd) { + RMWI.setOperation(AtomicRMWInst::FAdd); + RMWI.setOperand(1, ConstantFP::getNegativeZero(RMWI.getType())); + return &RMWI; } // Check if the required ordering is compatible with an atomic load. Index: test/Transforms/InstCombine/atomicrmw.ll =================================================================== --- test/Transforms/InstCombine/atomicrmw.ll +++ test/Transforms/InstCombine/atomicrmw.ll @@ -67,6 +67,29 @@ ret i8 %res } +; CHECK-LABEL: atomic_fsub +; CHECK-NEXT: %res = load atomic float, float* %addr monotonic, align 4 +; CHECK-NEXT: ret float %res +define float @atomic_fsub_zero(float* %addr) { + %res = atomicrmw fsub float* %addr, float 0.0 monotonic + ret float %res +} + +; CHECK-LABEL: atomic_fadd +; CHECK-NEXT: %res = load atomic float, float* %addr monotonic, align 4 +; CHECK-NEXT: ret float %res +define float @atomic_fadd_zero(float* %addr) { + %res = atomicrmw fadd float* %addr, float -0.0 monotonic + ret float %res +} + +; CHECK-LABEL: atomic_fsub_canon +; CHECK-NEXT: %res = atomicrmw fadd float* %addr, float -0.000000e+00 release +; CHECK-NEXT: ret float %res +define float @atomic_fsub_canon(float* %addr) { + %res = atomicrmw fsub float* %addr, float 0.0 release + ret float %res +} ; Can't replace a volatile w/a load; this would eliminate a volatile store. ; CHECK-LABEL: atomic_sub_zero_volatile