diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -608,16 +608,74 @@ namespace { -/// Result values from createMaskInstrs helper. struct PartwordMaskValues { - Type *WordType; - Type *ValueType; - Value *AlignedAddr; - Value *ShiftAmt; - Value *Mask; - Value *Inv_Mask; + // These three fields are guaranteed to be set by the constructor. + Type *WordType = nullptr; + Type *ValueType = nullptr; + Value *AlignedAddr = nullptr; + // The remaining fields can be null. + Value *ShiftAmt = nullptr; + Value *Mask = nullptr; + Value *Inv_Mask = nullptr; + + PartwordMaskValues(IRBuilder<> &Builder, Instruction *I, Type *ValueType, + Value *Addr, unsigned WordSize); + Value *extract(IRBuilder<> &Builder, Value *WideWord) const; + Value *insert(IRBuilder<> &Builder, Value *WideWord, Value *Updated) const; }; +LLVM_ATTRIBUTE_UNUSED +raw_ostream &operator<< (raw_ostream &O, const PartwordMaskValues &PMV) { + auto PrintObj = [&O] (auto *V) { + if (V) + O << *V; + else + O << "nullptr"; + O << '\n'; + }; + O << "PartwordMaskValues {\n"; + O << " WordType: "; + PrintObj(PMV.WordType); + O << " ValueType: "; + PrintObj(PMV.ValueType); + O << " AlignedAddr: "; + PrintObj(PMV.AlignedAddr); + O << " ShiftAmt: "; + PrintObj(PMV.ShiftAmt); + O << " Mask: "; + PrintObj(PMV.Mask); + O << " Inv_Mask: "; + PrintObj(PMV.Inv_Mask); + O << "}\n"; + return O; +} + +Value *PartwordMaskValues::extract(IRBuilder<> &Builder, + Value *WideWord) const { + assert(WideWord->getType() == WordType && "Widened type mismatch"); + if (WordType == ValueType) + return WideWord; + + Value *And = Builder.CreateAnd(WideWord, Mask, "masked"); + Value *Shift = Builder.CreateLShr(And, ShiftAmt, "shifted"); + Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted"); + return Trunc; +} + +Value *PartwordMaskValues::insert(IRBuilder<> &Builder, Value *WideWord, + Value *Updated) const { + assert(WideWord->getType() == WordType && "Widened type mismatch"); + assert(Updated->getType() == ValueType && "Value type mismatch"); + if (WordType == ValueType) + return Updated; + + Value *ZExt = Builder.CreateZExt(Updated, WordType, "extended"); + Value *Shift = Builder.CreateShl(ZExt, ShiftAmt, "shifted", /*HasNUW*/true); + Value *And = Builder.CreateAnd(WideWord, Inv_Mask, "unmasked"); + Value *Or = Builder.CreateOr(And, Shift, "inserted"); + return Or; +} + } // end anonymous namespace /// This is a helper function which builds instructions to provide @@ -636,50 +694,48 @@ /// include only the part that would've been loaded from Addr. /// /// Inv_Mask: The inverse of Mask. -static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, - Type *ValueType, Value *Addr, - unsigned WordSize) { - PartwordMaskValues Ret; - - BasicBlock *BB = I->getParent(); - Function *F = BB->getParent(); +PartwordMaskValues::PartwordMaskValues(IRBuilder<> &Builder, Instruction *I, + Type *ValueType, Value *Addr, + unsigned MinWordSize) { Module *M = I->getModule(); - - LLVMContext &Ctx = F->getContext(); + LLVMContext &Ctx = M->getContext(); const DataLayout &DL = M->getDataLayout(); - unsigned ValueSize = DL.getTypeStoreSize(ValueType); - assert(ValueSize < WordSize); + this->ValueType = ValueType; + this->WordType = MinWordSize > ValueSize + ? Type::getIntNTy(Ctx, MinWordSize * 8) + : ValueType; + if (ValueType == WordType) { + AlignedAddr = Addr; + return; + } - Ret.ValueType = ValueType; - Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8); + assert(ValueSize < MinWordSize); Type *WordPtrType = - Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); + WordType->getPointerTo(Addr->getType()->getPointerAddressSpace()); Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx)); - Ret.AlignedAddr = Builder.CreateIntToPtr( - Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType, + AlignedAddr = Builder.CreateIntToPtr( + Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType, "AlignedAddr"); - Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB"); + Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB"); if (DL.isLittleEndian()) { // turn bytes into bits - Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3); + ShiftAmt = Builder.CreateShl(PtrLSB, 3); } else { // turn bytes into bits, and count from the other side. - Ret.ShiftAmt = - Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3); + ShiftAmt = Builder.CreateShl( + Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3); } - Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt"); - Ret.Mask = Builder.CreateShl( - ConstantInt::get(Ret.WordType, (1 << (ValueSize * 8)) - 1), Ret.ShiftAmt, - "Mask"); - Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask"); - - return Ret; + ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt"); + Mask = Builder.CreateShl( + ConstantInt::get(WordType, (1 << (ValueSize * 8)) - 1), + ShiftAmt, "Mask"); + Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask"); } /// Emit IR to implement a masked version of a given atomicrmw @@ -748,9 +804,8 @@ IRBuilder<> Builder(AI); - PartwordMaskValues PMV = - createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), - TLI->getMinCmpXchgSizeInBits() / 8); + PartwordMaskValues PMV(Builder, AI, AI->getType(), AI->getPointerOperand(), + TLI->getMinCmpXchgSizeInBits() / 8); Value *ValOperand_Shifted = Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), @@ -781,9 +836,8 @@ Op == AtomicRMWInst::And) && "Unable to widen operation"); - PartwordMaskValues PMV = - createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), - TLI->getMinCmpXchgSizeInBits() / 8); + PartwordMaskValues PMV(Builder, AI, AI->getType(), AI->getPointerOperand(), + TLI->getMinCmpXchgSizeInBits() / 8); Value *ValOperand_Shifted = Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType), @@ -865,8 +919,8 @@ std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); - PartwordMaskValues PMV = createMaskInstrs( - Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize); + PartwordMaskValues PMV(Builder, CI, CI->getCompareOperand()->getType(), + Addr, WordSize); // Shift the incoming values over, into the right location in the word. Value *NewVal_Shifted = @@ -947,9 +1001,8 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) { IRBuilder<> Builder(AI); - PartwordMaskValues PMV = - createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(), - TLI->getMinCmpXchgSizeInBits() / 8); + PartwordMaskValues PMV(Builder, AI, AI->getType(), AI->getPointerOperand(), + TLI->getMinCmpXchgSizeInBits() / 8); // The value operand must be sign-extended for signed min/max so that the // target's signed comparison instructions can be used. Otherwise, just @@ -974,9 +1027,9 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) { IRBuilder<> Builder(CI); - PartwordMaskValues PMV = createMaskInstrs( - Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(), - TLI->getMinCmpXchgSizeInBits() / 8); + PartwordMaskValues PMV(Builder, CI, CI->getCompareOperand()->getType(), + CI->getPointerOperand(), + TLI->getMinCmpXchgSizeInBits() / 8); Value *CmpVal_Shifted = Builder.CreateShl( Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt, @@ -1122,28 +1175,34 @@ // do it even on minsize. bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak(); + unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8; + // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord // // The full expansion we produce is: // [...] + // %aligned.addr = ... // cmpxchg.start: - // %unreleasedload = @load.linked(%addr) - // %should_store = icmp eq %unreleasedload, %desired - // br i1 %should_store, label %cmpxchg.fencedstore, + // %unreleasedload = @load.linked(%aligned.addr) + // %unreleasedload.extract = extract value from %unreleasedload + // %should_store = icmp eq %unreleasedload.extract, %desired + // br i1 %should_store, label %cmpxchg.releasingstore, // label %cmpxchg.nostore // cmpxchg.releasingstore: // fence? // br label cmpxchg.trystore // cmpxchg.trystore: - // %loaded.trystore = phi [%unreleasedload, %releasingstore], + // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore], // [%releasedload, %cmpxchg.releasedload] - // %stored = @store_conditional(%new, %addr) + // %updated.new = insert %new into %loaded.trystore + // %stored = @store_conditional(%updated.new, %aligned.addr) // %success = icmp eq i32 %stored, 0 // br i1 %success, label %cmpxchg.success, // label %cmpxchg.releasedload/%cmpxchg.failure // cmpxchg.releasedload: - // %releasedload = @load.linked(%addr) - // %should_store = icmp eq %releasedload, %desired + // %releasedload = @load.linked(%aligned.addr) + // %releasedload.extract = extract value from %releasedload + // %should_store = icmp eq %releasedload.extract, %desired // br i1 %should_store, label %cmpxchg.trystore, // label %cmpxchg.failure // cmpxchg.success: @@ -1159,9 +1218,10 @@ // fence? // br label %cmpxchg.end // cmpxchg.end: - // %loaded = phi [%loaded.nostore, %cmpxchg.failure], - // [%loaded.trystore, %cmpxchg.trystore] + // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure], + // [%loaded.trystore, %cmpxchg.trystore] // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] + // %loaded = extract value from %loaded.exit // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 // [...] @@ -1187,13 +1247,19 @@ Builder.SetInsertPoint(BB); if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier) TLI->emitLeadingFence(Builder, CI, SuccessOrder); + + PartwordMaskValues PMV(Builder, CI, CI->getCompareOperand()->getType(), + Addr, MinCASSize); + Builder.CreateBr(StartBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(StartBB); - Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *UnreleasedLoad = + TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + Value *UnreleasedLoadExtract = PMV.extract(Builder, UnreleasedLoad); Value *ShouldStore = Builder.CreateICmpEQ( - UnreleasedLoad, CI->getCompareOperand(), "should_store"); + UnreleasedLoadExtract, CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). @@ -1205,8 +1271,13 @@ Builder.CreateBr(TryStoreBB); Builder.SetInsertPoint(TryStoreBB); + PHINode *LoadedTryStore = + Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore"); + LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB); + Value *NewValueInsert = PMV.insert( + Builder, LoadedTryStore, CI->getNewValOperand()); Value *StoreSuccess = TLI->emitStoreConditional( - Builder, CI->getNewValOperand(), Addr, MemOpOrder); + Builder, NewValueInsert, Addr, MemOpOrder); StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB; @@ -1216,13 +1287,16 @@ Builder.SetInsertPoint(ReleasedLoadBB); Value *SecondLoad; if (HasReleasedLoadBB) { - SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); - ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(), - "should_store"); + SecondLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder); + Value *SecondLoadExtract = PMV.extract(Builder, SecondLoad); + ShouldStore = Builder.CreateICmpEQ( + SecondLoadExtract, CI->getCompareOperand(), "should_store"); // If the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); + // Update PHI node in TryStoreBB. + LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB); } else Builder.CreateUnreachable(); @@ -1234,6 +1308,12 @@ Builder.CreateBr(ExitBB); Builder.SetInsertPoint(NoStoreBB); + PHINode *LoadedNoStore = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore"); + LoadedNoStore->addIncoming(UnreleasedLoad, StartBB); + if (HasReleasedLoadBB) + LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB); + // In the failing case, where we don't execute the store-conditional, the // target might want to balance out the load-linked with a dedicated // instruction (e.g., on ARM, clearing the exclusive monitor). @@ -1241,6 +1321,11 @@ Builder.CreateBr(FailureBB); Builder.SetInsertPoint(FailureBB); + PHINode *LoadedFailure = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure"); + LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB); + if (CI->isWeak()) + LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB); if (ShouldInsertFencesForAtomic) TLI->emitTrailingFence(Builder, CI, FailureOrder); Builder.CreateBr(ExitBB); @@ -1250,32 +1335,56 @@ // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate // PHI. Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + PHINode *LoadedExit = + Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit"); + LoadedExit->addIncoming(LoadedTryStore, SuccessBB); + LoadedExit->addIncoming(LoadedFailure, FailureBB); + PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success"); Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); - // Setup the builder so we can create any PHIs we need. - Value *Loaded; - if (!HasReleasedLoadBB) - Loaded = UnreleasedLoad; - else { - Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin()); - PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB); - TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); - - Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin()); - PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB); - NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB); - - Builder.SetInsertPoint(ExitBB, ++ExitBB->begin()); - PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2); - ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB); - ExitLoaded->addIncoming(NoStoreLoaded, FailureBB); - - Loaded = ExitLoaded; + // This is the "exit value" from the cmpxchg expansion. It may be of + // a type wider than the one in the cmpxchg instruction. + Value *LoadedFull = LoadedExit; + + // The newly inserted PHI nodes may be unnecessary, for example, they + // may have only one argument. Simplify the code eliminating those + // PHI that can be easily removed. Since the "exit value" (LoadedFull) + // was also a PHI, make sure that it's kept up to date. + SmallVector NewPHIs = {LoadedTryStore, LoadedNoStore, + LoadedFailure, LoadedExit}; + // Step 1: identify PHIs with a single incoming value. + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { + PHINode *P = NewPHIs[i]; + if (P->getNumIncomingValues() != 1) + continue; + Value *V = P->getIncomingValue(0); + if (P == LoadedFull) + LoadedFull = V; + P->replaceAllUsesWith(V); + P->eraseFromParent(); + NewPHIs[i] = nullptr; } + // Step 2: Identify PHIs with the same incoming value from all predecessors. + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) { + PHINode *P = NewPHIs[i]; + if (!P) + continue; + Value *First = P->getIncomingValue(0); + for (unsigned j = 1, f = P->getNumIncomingValues(); j != f; ++j) { + if (First && P->getIncomingValue(j) != First) + First = nullptr; + } + if (First) { + if (P == LoadedFull) + LoadedFull = First; + P->replaceAllUsesWith(First); + P->eraseFromParent(); + } + } + + Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator())); + Value *Loaded = PMV.extract(Builder, LoadedFull); // Look for any users of the cmpxchg that are just comparing the loaded value // against the desired one, and replace them with the CFG-derived version. @@ -1417,8 +1526,6 @@ expandPartwordCmpXchg(CI); return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: { - assert(ValueSize >= MinCASSize && - "MinCmpXchgSizeInBits not yet supported for LL/SC expansions."); return expandAtomicCmpXchg(CI); } case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -3506,9 +3506,5 @@ TargetLowering::AtomicExpansionKind HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { - const DataLayout &DL = AI->getModule()->getDataLayout(); - unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType()); - if (Size >= 4 && Size <= 8) - return AtomicExpansionKind::LLSC; - return AtomicExpansionKind::None; + return AtomicExpansionKind::LLSC; } diff --git a/llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll b/llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; CHECK-LABEL: danny +; CHECK: memw_locked +define i8 @danny(i8* %a0) unnamed_addr #0 { +start: + %v0 = cmpxchg i8* %a0, i8 0, i8 1 seq_cst seq_cst + %v1 = extractvalue { i8, i1 } %v0, 0 + ret i8 %v1 +} + +; CHECK-LABEL: sammy +; CHECK: memw_locked +define i16 @sammy(i16* %a0) unnamed_addr #0 { +start: + %v0 = cmpxchg i16* %a0, i16 0, i16 1 seq_cst seq_cst + %v1 = extractvalue { i16, i1 } %v0, 0 + ret i16 %v1 +} + +; CHECK-LABEL: kirby +; CHECK: memw_locked +define i32 @kirby(i32* %a0) unnamed_addr #0 { +start: + %v0 = cmpxchg i32* %a0, i32 0, i32 1 seq_cst seq_cst + %v1 = extractvalue { i32, i1 } %v0, 0 + ret i32 %v1 +} diff --git a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll --- a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll +++ b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll @@ -261,8 +261,8 @@ ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: -; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] ; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ] +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] ; CHECK: ret i8 [[LOADED]] %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst @@ -311,8 +311,8 @@ ; CHECK: br label %[[DONE]] ; CHECK: [[DONE]]: -; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] ; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ] +; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ] ; CHECK: ret i16 [[LOADED]] %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic