Index: lib/CodeGen/CGAtomic.cpp =================================================================== --- lib/CodeGen/CGAtomic.cpp +++ lib/CodeGen/CGAtomic.cpp @@ -73,6 +73,8 @@ LVal = lvalue; } else if (lvalue.isBitField()) { + ValueTy = lvalue.getType(); + ValueSizeInBits = C.getTypeSize(ValueTy); auto &OrigBFI = lvalue.getBitFieldInfo(); auto Offset = OrigBFI.Offset % C.toBits(lvalue.getAlignment()); AtomicSizeInBits = C.toBits( @@ -93,12 +95,34 @@ BFI.StorageSize = AtomicSizeInBits; LVal = LValue::MakeBitfield(Addr, BFI, lvalue.getType(), lvalue.getAlignment()); + LVal.setTBAAInfo(lvalue.getTBAAInfo()); + AtomicTy = C.getIntTypeForBitwidth(AtomicSizeInBits, OrigBFI.IsSigned); + if (AtomicTy.isNull()) { + llvm::APInt Size( + /*numBits=*/32, + C.toCharUnitsFromBits(AtomicSizeInBits).getQuantity()); + AtomicTy = C.getConstantArrayType(C.CharTy, Size, ArrayType::Normal, + /*IndexTypeQuals=*/0); + } + AtomicAlign = ValueAlign = lvalue.getAlignment(); } else if (lvalue.isVectorElt()) { - AtomicSizeInBits = C.getTypeSize(lvalue.getType()); + ValueTy = lvalue.getType()->getAs()->getElementType(); + ValueSizeInBits = C.getTypeSize(ValueTy); + AtomicTy = lvalue.getType(); + AtomicSizeInBits = C.getTypeSize(AtomicTy); + AtomicAlign = ValueAlign = lvalue.getAlignment(); LVal = lvalue; } else { assert(lvalue.isExtVectorElt()); - AtomicSizeInBits = C.getTypeSize(lvalue.getType()); + ValueTy = lvalue.getType(); + ValueSizeInBits = C.getTypeSize(ValueTy); + AtomicTy = ValueTy = CGF.getContext().getExtVectorType( + lvalue.getType(), lvalue.getExtVectorAddr() + ->getType() + ->getPointerElementType() + ->getVectorNumElements()); + AtomicSizeInBits = C.getTypeSize(AtomicTy); + AtomicAlign = ValueAlign = lvalue.getAlignment(); LVal = lvalue; } UseLibcall = !C.getTargetInfo().hasBuiltinAtomic( @@ -114,6 +138,16 @@ TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; } bool shouldUseLibcall() const { return UseLibcall; } const LValue &getAtomicLValue() const { return LVal; } + llvm::Value *getAtomicAddress() const { + if (LVal.isSimple()) + return LVal.getAddress(); + else if (LVal.isBitField()) + return LVal.getBitFieldAddr(); + else if (LVal.isVectorElt()) + return LVal.getVectorAddr(); + assert(LVal.isExtVectorElt()); + return LVal.getExtVectorAddr(); + } /// Is the atomic size larger than the underlying value type? /// @@ -137,15 +171,15 @@ llvm::Value *emitCastToAtomicIntPointer(llvm::Value *addr) const; /// Turn an atomic-layout object into an r-value. - RValue convertTempToRValue(llvm::Value *addr, - AggValueSlot resultSlot, - SourceLocation loc) const; + RValue convertTempToRValue(llvm::Value *addr, AggValueSlot resultSlot, + SourceLocation loc, bool AsValue) const; /// \brief Converts a rvalue to integer value. llvm::Value *convertRValueToInt(RValue RVal) const; - RValue convertIntToValue(llvm::Value *IntVal, AggValueSlot ResultSlot, - SourceLocation Loc) const; + RValue ConvertIntToValueOrAtomic(llvm::Value *IntVal, + AggValueSlot ResultSlot, + SourceLocation Loc, bool AsValue) const; /// Copy an atomic r-value into atomic-layout memory. void emitCopyIntoMemory(RValue rvalue) const; @@ -153,7 +187,7 @@ /// Project an l-value down to the value field. LValue projectValue() const { assert(LVal.isSimple()); - llvm::Value *addr = LVal.getAddress(); + llvm::Value *addr = getAtomicAddress(); if (hasPadding()) addr = CGF.Builder.CreateStructGEP(addr, 0); @@ -161,14 +195,65 @@ CGF.getContext(), LVal.getTBAAInfo()); } + /// \brief Emits atomic load. + /// \returns Loaded value. + RValue EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc, + bool AsValue); + + /// \brief Emits atomic compare-and-exchange sequence. + /// \param Expected Expected value. + /// \param Desired Desired value. + /// \param Success Atomic ordering for success operation. + /// \param Failure Atomic ordering for failed operation. + /// \param IsWeak true if atomic operation is weak, false otherwise. + /// \returns Pair of values: previous value from storage (value type) and + /// boolean flag (i1 type) with true if success and false otherwise. + std::pair EmitAtomicCompareExchange( + RValue Expected, RValue Desired, + llvm::AtomicOrdering Success = llvm::SequentiallyConsistent, + llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent, + bool IsWeak = false); + /// Materialize an atomic r-value in atomic-layout memory. llvm::Value *materializeRValue(RValue rvalue) const; private: bool requiresMemSetZero(llvm::Type *type) const; + + /// \brief Creates temp alloca for intermediate operations on atomic value. + llvm::Value *CreateTempAlloca() const; + + /// \brief Emits atomic load as a libcall. + void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded); + /// \brief Emits atomic load as LLVM instruction. + llvm::Value *EmitAtomicLoadOp(); + /// \brief Emits atomic compare-and-exchange op as a libcall. + std::pair EmitAtomicCompareExchangeLibcall( + llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr, + llvm::AtomicOrdering Success = llvm::SequentiallyConsistent, + llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent); + /// \brief Emits atomic compare-and-exchange op as LLVM instruction. + std::pair EmitAtomicCompareExchangeOp( + llvm::Value *Expected, llvm::Value *Desired, + llvm::AtomicOrdering Success = llvm::SequentiallyConsistent, + llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent, + bool IsWeak = false); }; } +llvm::Value *AtomicInfo::CreateTempAlloca() const { + auto *TempAlloca = CGF.CreateMemTemp( + (LVal.isBitField() && ValueSizeInBits > AtomicSizeInBits) ? ValueTy + : AtomicTy, + "atomic-temp"); + TempAlloca->setAlignment(getAtomicAlignment().getQuantity()); + // Cast to pointer to value type for bitfields. + if (LVal.isBitField()) + return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TempAlloca, getAtomicAddress()->getType()); + return TempAlloca; +} + static RValue emitAtomicLibcall(CodeGenFunction &CGF, StringRef fnName, QualType resultType, @@ -217,9 +302,10 @@ if (!requiresMemSetZero(addr->getType()->getPointerElementType())) return false; - CGF.Builder.CreateMemSet(addr, llvm::ConstantInt::get(CGF.Int8Ty, 0), - AtomicSizeInBits / 8, - LVal.getAlignment().getQuantity()); + CGF.Builder.CreateMemSet( + addr, llvm::ConstantInt::get(CGF.Int8Ty, 0), + CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits).getQuantity(), + LVal.getAlignment().getQuantity()); return true; } @@ -941,7 +1027,7 @@ RValue AtomicInfo::convertTempToRValue(llvm::Value *addr, AggValueSlot resultSlot, - SourceLocation loc) const { + SourceLocation loc, bool AsValue) const { if (LVal.isSimple()) { if (EvaluationKind == TEK_Aggregate) return resultSlot.asRValue(); @@ -953,7 +1039,11 @@ // Otherwise, just convert the temporary to an r-value using the // normal conversion routine. return CGF.convertTempToRValue(addr, getValueType(), loc); - } else if (LVal.isBitField()) + } else if (!AsValue) + // Get RValue from temp memory as atomic for non-simple lvalues + return RValue::get( + CGF.Builder.CreateAlignedLoad(addr, AtomicAlign.getQuantity())); + else if (LVal.isBitField()) return CGF.EmitLoadOfBitfieldLValue(LValue::MakeBitfield( addr, LVal.getBitFieldInfo(), LVal.getType(), LVal.getAlignment())); else if (LVal.isVectorElt()) @@ -966,14 +1056,20 @@ addr, LVal.getExtVectorElts(), LVal.getType(), LVal.getAlignment())); } -RValue AtomicInfo::convertIntToValue(llvm::Value *IntVal, - AggValueSlot ResultSlot, - SourceLocation Loc) const { - assert(LVal.isSimple()); +RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal, + AggValueSlot ResultSlot, + SourceLocation Loc, + bool AsValue) const { // Try not to in some easy cases. assert(IntVal->getType()->isIntegerTy() && "Expected integer value"); - if (getEvaluationKind() == TEK_Scalar && !hasPadding()) { - auto *ValTy = CGF.ConvertTypeForMem(ValueTy); + if (getEvaluationKind() == TEK_Scalar && + (((!LVal.isBitField() || + LVal.getBitFieldInfo().Size == ValueSizeInBits) && + !hasPadding()) || + !AsValue)) { + auto *ValTy = AsValue + ? CGF.ConvertTypeForMem(ValueTy) + : getAtomicAddress()->getType()->getPointerElementType(); if (ValTy->isIntegerTy()) { assert(IntVal->getType() == ValTy && "Different integer types."); return RValue::get(IntVal); @@ -988,13 +1084,13 @@ llvm::Value *Temp; bool TempIsVolatile = false; CharUnits TempAlignment; - if (getEvaluationKind() == TEK_Aggregate) { + if (AsValue && getEvaluationKind() == TEK_Aggregate) { assert(!ResultSlot.isIgnored()); Temp = ResultSlot.getAddr(); TempAlignment = getValueAlignment(); TempIsVolatile = ResultSlot.isVolatile(); } else { - Temp = CGF.CreateMemTemp(getAtomicType(), "atomic-temp"); + Temp = CreateTempAlloca(); TempAlignment = getAtomicAlignment(); } @@ -1003,86 +1099,75 @@ CGF.Builder.CreateAlignedStore(IntVal, CastTemp, TempAlignment.getQuantity()) ->setVolatile(TempIsVolatile); - return convertTempToRValue(Temp, ResultSlot, Loc); + return convertTempToRValue(Temp, ResultSlot, Loc, AsValue); } -/// Emit a load from an l-value of atomic type. Note that the r-value -/// we produce is an r-value of the atomic *value* type. -RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc, - AggValueSlot resultSlot) { - AtomicInfo atomics(*this, src); - LValue LVal = atomics.getAtomicLValue(); - llvm::Value *SrcAddr = nullptr; - llvm::AllocaInst *NonSimpleTempAlloca = nullptr; - if (LVal.isSimple()) - SrcAddr = LVal.getAddress(); - else { - if (LVal.isBitField()) - SrcAddr = LVal.getBitFieldAddr(); - else if (LVal.isVectorElt()) - SrcAddr = LVal.getVectorAddr(); - else { - assert(LVal.isExtVectorElt()); - SrcAddr = LVal.getExtVectorAddr(); - } - NonSimpleTempAlloca = CreateTempAlloca( - SrcAddr->getType()->getPointerElementType(), "atomic-load-temp"); - NonSimpleTempAlloca->setAlignment(getContext().toBits(src.getAlignment())); - } +void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded) { + // void __atomic_load(size_t size, void *mem, void *return, int order); + CallArgList Args; + Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType()); + Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicAddress())), + CGF.getContext().VoidPtrTy); + Args.add(RValue::get(CGF.EmitCastToVoidPtr(AddForLoaded)), + CGF.getContext().VoidPtrTy); + Args.add(RValue::get(llvm::ConstantInt::get( + CGF.IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)), + CGF.getContext().IntTy); + emitAtomicLibcall(CGF, "__atomic_load", CGF.getContext().VoidTy, Args); +} + +llvm::Value *AtomicInfo::EmitAtomicLoadOp() { + // Okay, we're doing this natively. + llvm::Value *Addr = emitCastToAtomicIntPointer(getAtomicAddress()); + llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, "atomic-load"); + Load->setAtomic(llvm::SequentiallyConsistent); + // Other decoration. + Load->setAlignment(getAtomicAlignment().getQuantity()); + if (LVal.isVolatileQualified()) + Load->setVolatile(true); + if (LVal.getTBAAInfo()) + CGF.CGM.DecorateInstruction(Load, LVal.getTBAAInfo()); + return Load; +} + +RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc, + bool AsValue) { // Check whether we should use a library call. - if (atomics.shouldUseLibcall()) { - llvm::Value *tempAddr; - if (LVal.isSimple()) { - if (!resultSlot.isIgnored()) { - assert(atomics.getEvaluationKind() == TEK_Aggregate); - tempAddr = resultSlot.getAddr(); - } else - tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp"); + if (shouldUseLibcall()) { + llvm::Value *TempAddr; + if (LVal.isSimple() && !ResultSlot.isIgnored()) { + assert(getEvaluationKind() == TEK_Aggregate); + TempAddr = ResultSlot.getAddr(); } else - tempAddr = NonSimpleTempAlloca; + TempAddr = CreateTempAlloca(); - // void __atomic_load(size_t size, void *mem, void *return, int order); - CallArgList args; - args.add(RValue::get(atomics.getAtomicSizeValue()), - getContext().getSizeType()); - args.add(RValue::get(EmitCastToVoidPtr(SrcAddr)), getContext().VoidPtrTy); - args.add(RValue::get(EmitCastToVoidPtr(tempAddr)), getContext().VoidPtrTy); - args.add(RValue::get(llvm::ConstantInt::get( - IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)), - getContext().IntTy); - emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args); + EmitAtomicLoadLibcall(TempAddr); - // Produce the r-value. - return atomics.convertTempToRValue(tempAddr, resultSlot, loc); + // Okay, turn that back into the original value or whole atomic (for + // non-simple lvalues) type. + return convertTempToRValue(TempAddr, ResultSlot, Loc, AsValue); } // Okay, we're doing this natively. - llvm::Value *addr = atomics.emitCastToAtomicIntPointer(SrcAddr); - llvm::LoadInst *load = Builder.CreateLoad(addr, "atomic-load"); - load->setAtomic(llvm::SequentiallyConsistent); - - // Other decoration. - load->setAlignment(src.getAlignment().getQuantity()); - if (src.isVolatileQualified()) - load->setVolatile(true); - if (src.getTBAAInfo()) - CGM.DecorateInstruction(load, src.getTBAAInfo()); + auto *Load = EmitAtomicLoadOp(); // If we're ignoring an aggregate return, don't do anything. - if (atomics.getEvaluationKind() == TEK_Aggregate && resultSlot.isIgnored()) + if (getEvaluationKind() == TEK_Aggregate && ResultSlot.isIgnored()) return RValue::getAggregate(nullptr, false); - // Okay, turn that back into the original value type. - if (src.isSimple()) - return atomics.convertIntToValue(load, resultSlot, loc); - - auto *IntAddr = atomics.emitCastToAtomicIntPointer(NonSimpleTempAlloca); - Builder.CreateAlignedStore(load, IntAddr, src.getAlignment().getQuantity()); - return atomics.convertTempToRValue(NonSimpleTempAlloca, resultSlot, loc); + // Okay, turn that back into the original value or atomic (for non-simple + // lvalues) type. + return ConvertIntToValueOrAtomic(Load, ResultSlot, Loc, AsValue); } - +/// Emit a load from an l-value of atomic type. Note that the r-value +/// we produce is an r-value of the atomic *value* type. +RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc, + AggValueSlot resultSlot) { + AtomicInfo Atomics(*this, src); + return Atomics.EmitAtomicLoad(resultSlot, loc, /*AsValue=*/true); +} /// Copy an r-value into memory as part of storing to an atomic type. /// This needs to create a bit-pattern suitable for atomic operations. @@ -1092,7 +1177,7 @@ // which means that the caller is responsible for having zeroed // any padding. Just do an aggregate copy of that type. if (rvalue.isAggregate()) { - CGF.EmitAggregateCopy(LVal.getAddress(), + CGF.EmitAggregateCopy(getAtomicAddress(), rvalue.getAggregateAddr(), getAtomicType(), (rvalue.isVolatileQualified() @@ -1127,24 +1212,24 @@ return rvalue.getAggregateAddr(); // Otherwise, make a temporary and materialize into it. - llvm::Value *temp = CGF.CreateMemTemp(getAtomicType(), "atomic-store-temp"); - LValue tempLV = - CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment()); - AtomicInfo Atomics(CGF, tempLV); + LValue TempLV = CGF.MakeAddrLValue(CreateTempAlloca(), getAtomicType(), + getAtomicAlignment()); + AtomicInfo Atomics(CGF, TempLV); Atomics.emitCopyIntoMemory(rvalue); - return temp; + return TempLV.getAddress(); } llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal) const { // If we've got a scalar value of the right size, try to avoid going // through memory. - if (RVal.isScalar() && !hasPadding()) { + if (RVal.isScalar() && (!hasPadding() || !LVal.isSimple())) { llvm::Value *Value = RVal.getScalarVal(); if (isa(Value->getType())) return Value; else { - llvm::IntegerType *InputIntTy = - llvm::IntegerType::get(CGF.getLLVMContext(), getValueSizeInBits()); + llvm::IntegerType *InputIntTy = llvm::IntegerType::get( + CGF.getLLVMContext(), + LVal.isSimple() ? getValueSizeInBits() : getAtomicSizeInBits()); if (isa(Value->getType())) return CGF.Builder.CreatePtrToInt(Value, InputIntTy); else if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy)) @@ -1161,6 +1246,74 @@ getAtomicAlignment().getQuantity()); } +std::pair AtomicInfo::EmitAtomicCompareExchangeOp( + llvm::Value *Expected, llvm::Value *Desired, llvm::AtomicOrdering Success, + llvm::AtomicOrdering Failure, bool IsWeak) { + // Do the atomic store. + auto *Addr = emitCastToAtomicIntPointer(getAtomicAddress()); + auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr, Expected, Desired, Success, + Failure); + // Other decoration. + Inst->setVolatile(LVal.isVolatileQualified()); + Inst->setWeak(IsWeak); + + // Okay, turn that back into the original value type. + auto *PreviousVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/0); + auto *SuccessFailureVal = CGF.Builder.CreateExtractValue(Inst, /*Idxs=*/1); + return std::make_pair(PreviousVal, SuccessFailureVal); +} + +std::pair +AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr, + llvm::Value *DesiredAddr, + llvm::AtomicOrdering Success, + llvm::AtomicOrdering Failure) { + // bool __atomic_compare_exchange(size_t size, void *obj, void *expected, + // void *desired, int success, int failure); + CallArgList Args; + Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType()); + Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicAddress())), + CGF.getContext().VoidPtrTy); + Args.add(RValue::get(CGF.EmitCastToVoidPtr(ExpectedAddr)), + CGF.getContext().VoidPtrTy); + Args.add(RValue::get(CGF.EmitCastToVoidPtr(DesiredAddr)), + CGF.getContext().VoidPtrTy); + Args.add(RValue::get(llvm::ConstantInt::get(CGF.IntTy, Success)), + CGF.getContext().IntTy); + Args.add(RValue::get(llvm::ConstantInt::get(CGF.IntTy, Failure)), + CGF.getContext().IntTy); + auto SuccessFailureRVal = emitAtomicLibcall(CGF, "__atomic_compare_exchange", + CGF.getContext().BoolTy, Args); + auto *PreviousVal = CGF.Builder.CreateAlignedLoad( + ExpectedAddr, getValueAlignment().getQuantity()); + return std::make_pair(PreviousVal, SuccessFailureRVal.getScalarVal()); +} + +std::pair AtomicInfo::EmitAtomicCompareExchange( + RValue Expected, RValue Desired, llvm::AtomicOrdering Success, + llvm::AtomicOrdering Failure, bool IsWeak) { + if (Failure >= Success) + // Don't assert on undefined behavior. + Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(Success); + + // Check whether we should use a library call. + if (shouldUseLibcall()) { + auto *ExpectedAddr = materializeRValue(Expected); + // Produce a source address. + auto *DesiredAddr = materializeRValue(Desired); + return EmitAtomicCompareExchangeLibcall(ExpectedAddr, DesiredAddr, Success, + Failure); + } + + // If we've got a scalar value of the right size, try to avoid going + // through memory. + auto *ExpectedIntVal = convertRValueToInt(Expected); + auto *DesiredIntVal = convertRValueToInt(Desired); + + return EmitAtomicCompareExchangeOp(ExpectedIntVal, DesiredIntVal, Success, + Failure, IsWeak); +} + /// Emit a store to an l-value of atomic type. /// /// Note that the r-value is expected to be an r-value *of the atomic @@ -1174,49 +1327,102 @@ == dest.getAddress()->getType()->getPointerElementType()); AtomicInfo atomics(*this, dest); + LValue LVal = atomics.getAtomicLValue(); // If this is an initialization, just put the value there normally. - if (isInit) { - atomics.emitCopyIntoMemory(rvalue); - return; - } + if (LVal.isSimple()) { + if (isInit) { + atomics.emitCopyIntoMemory(rvalue); + return; + } - // Check whether we should use a library call. - if (atomics.shouldUseLibcall()) { - // Produce a source address. - llvm::Value *srcAddr = atomics.materializeRValue(rvalue); + // Check whether we should use a library call. + if (atomics.shouldUseLibcall()) { + // Produce a source address. + llvm::Value *srcAddr = atomics.materializeRValue(rvalue); + + // void __atomic_store(size_t size, void *mem, void *val, int order) + CallArgList args; + args.add(RValue::get(atomics.getAtomicSizeValue()), + getContext().getSizeType()); + args.add(RValue::get(EmitCastToVoidPtr(atomics.getAtomicAddress())), + getContext().VoidPtrTy); + args.add(RValue::get(EmitCastToVoidPtr(srcAddr)), getContext().VoidPtrTy); + args.add(RValue::get(llvm::ConstantInt::get( + IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)), + getContext().IntTy); + emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args); + return; + } - // void __atomic_store(size_t size, void *mem, void *val, int order) - CallArgList args; - args.add(RValue::get(atomics.getAtomicSizeValue()), - getContext().getSizeType()); - args.add(RValue::get(EmitCastToVoidPtr(dest.getAddress())), - getContext().VoidPtrTy); - args.add(RValue::get(EmitCastToVoidPtr(srcAddr)), - getContext().VoidPtrTy); - args.add(RValue::get(llvm::ConstantInt::get( - IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)), - getContext().IntTy); - emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args); + // Okay, we're doing this natively. + llvm::Value *intValue = atomics.convertRValueToInt(rvalue); + + // Do the atomic store. + llvm::Value *addr = + atomics.emitCastToAtomicIntPointer(atomics.getAtomicAddress()); + intValue = Builder.CreateIntCast( + intValue, addr->getType()->getPointerElementType(), /*isSigned=*/false); + llvm::StoreInst *store = Builder.CreateStore(intValue, addr); + + // Initializations don't need to be atomic. + if (!isInit) + store->setAtomic(llvm::SequentiallyConsistent); + + // Other decoration. + store->setAlignment(dest.getAlignment().getQuantity()); + if (dest.isVolatileQualified()) + store->setVolatile(true); + if (dest.getTBAAInfo()) + CGM.DecorateInstruction(store, dest.getTBAAInfo()); return; } - // Okay, we're doing this natively. - llvm::Value *intValue = atomics.convertRValueToInt(rvalue); - - // Do the atomic store. - llvm::Value *addr = atomics.emitCastToAtomicIntPointer(dest.getAddress()); - llvm::StoreInst *store = Builder.CreateStore(intValue, addr); - - // Initializations don't need to be atomic. - if (!isInit) store->setAtomic(llvm::SequentiallyConsistent); - - // Other decoration. - store->setAlignment(dest.getAlignment().getQuantity()); - if (dest.isVolatileQualified()) - store->setVolatile(true); - if (dest.getTBAAInfo()) - CGM.DecorateInstruction(store, dest.getTBAAInfo()); + // Atomic load of prev value. + RValue OldRVal = atomics.EmitAtomicLoad(AggValueSlot::ignored(), + SourceLocation(), /*AsValue=*/false); + // For non-simple lvalues perform compare-and-swap procedure. + auto *ContBB = createBasicBlock("atomic_cont"); + auto *ExitBB = createBasicBlock("atomic_exit"); + auto *CurBB = Builder.GetInsertBlock(); + EmitBlock(ContBB); + llvm::PHINode *PHI = Builder.CreatePHI(OldRVal.getScalarVal()->getType(), + /*NumReservedValues=*/2); + PHI->addIncoming(OldRVal.getScalarVal(), CurBB); + RValue OriginalRValue = RValue::get(PHI); + // Build new lvalue for temp address + auto *Ptr = atomics.materializeRValue(OriginalRValue); + // Build new lvalue for temp address + LValue UpdateLVal; + if (LVal.isBitField()) + UpdateLVal = LValue::MakeBitfield(Ptr, LVal.getBitFieldInfo(), + LVal.getType(), LVal.getAlignment()); + else if (LVal.isVectorElt()) + UpdateLVal = LValue::MakeVectorElt(Ptr, LVal.getVectorIdx(), LVal.getType(), + LVal.getAlignment()); + else { + assert(LVal.isExtVectorElt()); + UpdateLVal = LValue::MakeExtVectorElt(Ptr, LVal.getExtVectorElts(), + LVal.getType(), LVal.getAlignment()); + } + UpdateLVal.setTBAAInfo(LVal.getTBAAInfo()); + // Store new value in the corresponding memory area + EmitStoreThroughLValue(rvalue, UpdateLVal); + // Load new value + RValue NewRValue = RValue::get(EmitLoadOfScalar( + Ptr, LVal.isVolatile(), atomics.getAtomicAlignment().getQuantity(), + atomics.getAtomicType(), SourceLocation())); + // Try to write new value using cmpxchg operation + auto Pair = atomics.EmitAtomicCompareExchange(OriginalRValue, NewRValue); + llvm::Value *OldValue = Pair.first; + if (!atomics.shouldUseLibcall()) + // Convert integer value to original atomic type + OldValue = atomics.ConvertIntToValueOrAtomic( + OldValue, AggValueSlot::ignored(), SourceLocation(), + /*AsValue=*/false).getScalarVal(); + PHI->addIncoming(OldValue, ContBB); + Builder.CreateCondBr(Pair.second, ContBB, ExitBB); + EmitBlock(ExitBB); } /// Emit a compare-and-exchange op for atomic type. @@ -1235,56 +1441,13 @@ Obj.getAddress()->getType()->getPointerElementType()); AtomicInfo Atomics(*this, Obj); - if (Failure >= Success) - // Don't assert on undefined behavior. - Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(Success); - - auto Alignment = Atomics.getValueAlignment(); - // Check whether we should use a library call. - if (Atomics.shouldUseLibcall()) { - auto *ExpectedAddr = Atomics.materializeRValue(Expected); - // Produce a source address. - auto *DesiredAddr = Atomics.materializeRValue(Desired); - // bool __atomic_compare_exchange(size_t size, void *obj, void *expected, - // void *desired, int success, int failure); - CallArgList Args; - Args.add(RValue::get(Atomics.getAtomicSizeValue()), - getContext().getSizeType()); - Args.add(RValue::get(EmitCastToVoidPtr(Obj.getAddress())), - getContext().VoidPtrTy); - Args.add(RValue::get(EmitCastToVoidPtr(ExpectedAddr)), - getContext().VoidPtrTy); - Args.add(RValue::get(EmitCastToVoidPtr(DesiredAddr)), - getContext().VoidPtrTy); - Args.add(RValue::get(llvm::ConstantInt::get(IntTy, Success)), - getContext().IntTy); - Args.add(RValue::get(llvm::ConstantInt::get(IntTy, Failure)), - getContext().IntTy); - auto SuccessFailureRVal = emitAtomicLibcall( - *this, "__atomic_compare_exchange", getContext().BoolTy, Args); - auto *PreviousVal = - Builder.CreateAlignedLoad(ExpectedAddr, Alignment.getQuantity()); - return std::make_pair(RValue::get(PreviousVal), SuccessFailureRVal); - } - - // If we've got a scalar value of the right size, try to avoid going - // through memory. - auto *ExpectedIntVal = Atomics.convertRValueToInt(Expected); - auto *DesiredIntVal = Atomics.convertRValueToInt(Desired); - - // Do the atomic store. - auto *Addr = Atomics.emitCastToAtomicIntPointer(Obj.getAddress()); - auto *Inst = Builder.CreateAtomicCmpXchg(Addr, ExpectedIntVal, DesiredIntVal, - Success, Failure); - // Other decoration. - Inst->setVolatile(Obj.isVolatileQualified()); - Inst->setWeak(IsWeak); - - // Okay, turn that back into the original value type. - auto *PreviousVal = Builder.CreateExtractValue(Inst, /*Idxs=*/0); - auto *SuccessFailureVal = Builder.CreateExtractValue(Inst, /*Idxs=*/1); - return std::make_pair(Atomics.convertIntToValue(PreviousVal, Slot, Loc), - RValue::get(SuccessFailureVal)); + auto Pair = Atomics.EmitAtomicCompareExchange(Expected, Desired, Success, + Failure, IsWeak); + return std::make_pair(Atomics.shouldUseLibcall() + ? RValue::get(Pair.first) + : Atomics.ConvertIntToValueOrAtomic( + Pair.first, Slot, Loc, /*AsValue=*/true), + RValue::get(Pair.second)); } void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) { Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -846,14 +846,35 @@ } } +static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, + const Expr *X, const Expr *E, + SourceLocation Loc) { + // x = expr; + assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); + LValue XLValue = CGF.EmitLValue(X); + RValue ExprRValue = CGF.EmitAnyExpr(E); + if (XLValue.isGlobalReg()) + CGF.EmitStoreThroughGlobalRegLValue(ExprRValue, XLValue); + else + CGF.EmitAtomicStore(ExprRValue, XLValue, /*isInit=*/false); + // OpenMP, 2.12.6, atomic Construct + // Any atomic construct with a seq_cst clause forces the atomically + // performed operation to include an implicit flush operation without a + // list. + if (IsSeqCst) + CGF.CGM.getOpenMPRuntime().EmitOMPFlush(CGF, llvm::None, Loc); +} + static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, bool IsSeqCst, const Expr *X, const Expr *V, - const Expr *, SourceLocation Loc) { + const Expr *E, SourceLocation Loc) { switch (Kind) { case OMPC_read: EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); break; case OMPC_write: + EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); + break; case OMPC_update: case OMPC_capture: llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet."); Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -3356,8 +3356,8 @@ auto AtomicBinOp = dyn_cast(AtomicBody->IgnoreParenImpCasts()); if (AtomicBinOp && AtomicBinOp->getOpcode() == BO_Assign) { - X = AtomicBinOp->getLHS()->IgnoreParenImpCasts(); - E = AtomicBinOp->getRHS()->IgnoreParenImpCasts(); + X = AtomicBinOp->getLHS(); + E = AtomicBinOp->getRHS(); if ((X->isInstantiationDependent() || X->getType()->isScalarType()) && (E->isInstantiationDependent() || E->getType()->isScalarType())) { if (!X->isLValue()) { Index: test/OpenMP/atomic_read_codegen.c =================================================================== --- test/OpenMP/atomic_read_codegen.c +++ test/OpenMP/atomic_read_codegen.c @@ -314,9 +314,9 @@ // CHECK: store x86_fp80 #pragma omp atomic read ldv = bfx4_packed.b; -// CHECK: [[LD:%.+]] = load atomic i32* bitcast (<2 x float>* @{{.+}} to i32*) seq_cst -// CHECK: [[BITCAST:%.+]] = bitcast <2 x float>* [[LDTEMP:%.+]] to i32* -// CHECK: store i32 [[LD]], i32* [[BITCAST]] +// CHECK: [[LD:%.+]] = load atomic i64* bitcast (<2 x float>* @{{.+}} to i64*) seq_cst +// CHECK: [[BITCAST:%.+]] = bitcast <2 x float>* [[LDTEMP:%.+]] to i64* +// CHECK: store i64 [[LD]], i64* [[BITCAST]] // CHECK: [[LD:%.+]] = load <2 x float>* [[LDTEMP]] // CHECK: extractelement <2 x float> [[LD]] // CHECK: store i64 Index: test/OpenMP/atomic_write_codegen.c =================================================================== --- test/OpenMP/atomic_write_codegen.c +++ test/OpenMP/atomic_write_codegen.c @@ -0,0 +1,524 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -x c -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +_Bool bv, bx; +char cv, cx; +unsigned char ucv, ucx; +short sv, sx; +unsigned short usv, usx; +int iv, ix; +unsigned int uiv, uix; +long lv, lx; +unsigned long ulv, ulx; +long long llv, llx; +unsigned long long ullv, ullx; +float fv, fx; +double dv, dx; +long double ldv, ldx; +_Complex int civ, cix; +_Complex float cfv, cfx; +_Complex double cdv, cdx; + +typedef int int4 __attribute__((__vector_size__(16))); +int4 int4x; + +struct BitFields { + int : 32; + int a : 31; +} bfx; + +struct BitFields_packed { + int : 32; + int a : 31; +} __attribute__ ((__packed__)) bfx_packed; + +struct BitFields2 { + int : 31; + int a : 1; +} bfx2; + +struct BitFields2_packed { + int : 31; + int a : 1; +} __attribute__ ((__packed__)) bfx2_packed; + +struct BitFields3 { + int : 11; + int a : 14; +} bfx3; + +struct BitFields3_packed { + int : 11; + int a : 14; +} __attribute__ ((__packed__)) bfx3_packed; + +struct BitFields4 { + short : 16; + int a: 1; + long b : 7; +} bfx4; + +struct BitFields4_packed { + short : 16; + int a: 1; + long b : 7; +} __attribute__ ((__packed__)) bfx4_packed; + +typedef float float2 __attribute__((ext_vector_type(2))); +float2 float2x; + +register int rix __asm__("0"); + +int main() { +// CHECK: load i8* +// CHECK: store atomic i8 +#pragma omp atomic write + bx = bv; +// CHECK: load i8* +// CHECK: store atomic i8 +#pragma omp atomic write + cx = cv; +// CHECK: load i8* +// CHECK: store atomic i8 +#pragma omp atomic write + ucx = ucv; +// CHECK: load i16* +// CHECK: store atomic i16 +#pragma omp atomic write + sx = sv; +// CHECK: load i16* +// CHECK: store atomic i16 +#pragma omp atomic write + usx = usv; +// CHECK: load i32* +// CHECK: store atomic i32 +#pragma omp atomic write + ix = iv; +// CHECK: load i32* +// CHECK: store atomic i32 +#pragma omp atomic write + uix = uiv; +// CHECK: load i64* +// CHECK: store atomic i64 +#pragma omp atomic write + lx = lv; +// CHECK: load i64* +// CHECK: store atomic i64 +#pragma omp atomic write + ulx = ulv; +// CHECK: load i64* +// CHECK: store atomic i64 +#pragma omp atomic write + llx = llv; +// CHECK: load i64* +// CHECK: store atomic i64 +#pragma omp atomic write + ullx = ullv; +// CHECK: load float* +// CHECK: bitcast float {{.*}} to i32 +// CHECK: store atomic i32 {{.*}}, i32* bitcast (float* +#pragma omp atomic write + fx = fv; +// CHECK: load double* +// CHECK: bitcast double {{.*}} to i64 +// CHECK: store atomic i64 {{.*}}, i64* bitcast (double* +#pragma omp atomic write + dx = dv; +// CHECK: [[LD:%.+]] = load x86_fp80* +// CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* [[BITCAST]], i8 0, i64 16, i32 16, i1 false) +// CHECK: store x86_fp80 [[LD]], x86_fp80* [[LDTEMP]] +// CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i128* +// CHECK: [[LD:%.+]] = load i128* [[BITCAST]] +// CHECK: store atomic i128 [[LD]], i128* bitcast (x86_fp80* +#pragma omp atomic write + ldx = ldv; +// CHECK: [[REAL_VAL:%.+]] = load i32* getelementptr inbounds ({ i32, i32 }* @{{.*}}, i32 0, i32 0) +// CHECK: [[IMG_VAL:%.+]] = load i32* getelementptr inbounds ({ i32, i32 }* @{{.*}}, i32 0, i32 1) +// CHECK: [[TEMP_REAL_REF:%.+]] = getelementptr inbounds { i32, i32 }* [[TEMP:%.+]], i32 0, i32 0 +// CHECK: [[TEMP_IMG_REF:%.+]] = getelementptr inbounds { i32, i32 }* [[TEMP]], i32 0, i32 1 +// CHECK: store i32 [[REAL_VAL]], i32* [[TEMP_REAL_REF]] +// CHECK: store i32 [[IMG_VAL]], i32* [[TEMP_IMG_REF]] +// CHECK: [[BITCAST:%.+]] = bitcast { i32, i32 }* [[TEMP]] to i8* +// CHECK: call void @__atomic_store(i64 8, i8* bitcast ({ i32, i32 }* @{{.*}} to i8*), i8* [[BITCAST]], i32 5) +#pragma omp atomic write + cix = civ; +// CHECK: [[REAL_VAL:%.+]] = load float* getelementptr inbounds ({ float, float }* @{{.*}}, i32 0, i32 0) +// CHECK: [[IMG_VAL:%.+]] = load float* getelementptr inbounds ({ float, float }* @{{.*}}, i32 0, i32 1) +// CHECK: [[TEMP_REAL_REF:%.+]] = getelementptr inbounds { float, float }* [[TEMP:%.+]], i32 0, i32 0 +// CHECK: [[TEMP_IMG_REF:%.+]] = getelementptr inbounds { float, float }* [[TEMP]], i32 0, i32 1 +// CHECK: store float [[REAL_VAL]], float* [[TEMP_REAL_REF]] +// CHECK: store float [[IMG_VAL]], float* [[TEMP_IMG_REF]] +// CHECK: [[BITCAST:%.+]] = bitcast { float, float }* [[TEMP]] to i8* +// CHECK: call void @__atomic_store(i64 8, i8* bitcast ({ float, float }* @{{.*}} to i8*), i8* [[BITCAST]], i32 5) +#pragma omp atomic write + cfx = cfv; +// CHECK: [[REAL_VAL:%.+]] = load double* getelementptr inbounds ({ double, double }* @{{.*}}, i32 0, i32 0) +// CHECK: [[IMG_VAL:%.+]] = load double* getelementptr inbounds ({ double, double }* @{{.*}}, i32 0, i32 1) +// CHECK: [[TEMP_REAL_REF:%.+]] = getelementptr inbounds { double, double }* [[TEMP:%.+]], i32 0, i32 0 +// CHECK: [[TEMP_IMG_REF:%.+]] = getelementptr inbounds { double, double }* [[TEMP]], i32 0, i32 1 +// CHECK: store double [[REAL_VAL]], double* [[TEMP_REAL_REF]] +// CHECK: store double [[IMG_VAL]], double* [[TEMP_IMG_REF]] +// CHECK: [[BITCAST:%.+]] = bitcast { double, double }* [[TEMP]] to i8* +// CHECK: call void @__atomic_store(i64 16, i8* bitcast ({ double, double }* @{{.*}} to i8*), i8* [[BITCAST]], i32 5) +// CHECK: call{{.*}} @__kmpc_flush( +#pragma omp atomic seq_cst write + cdx = cdv; +// CHECK: load i8* +// CHECK: store atomic i64 +#pragma omp atomic write + ulx = bv; +// CHECK: load i8* +// CHECK: store atomic i8 +#pragma omp atomic write + bx = cv; +// CHECK: load i8* +// CHECK: store atomic i8 +// CHECK: call{{.*}} @__kmpc_flush( +#pragma omp atomic write, seq_cst + cx = ucv; +// CHECK: load i16* +// CHECK: store atomic i64 +#pragma omp atomic write + ulx = sv; +// CHECK: load i16* +// CHECK: store atomic i64 +#pragma omp atomic write + lx = usv; +// CHECK: load i32* +// CHECK: store atomic i32 +// CHECK: call{{.*}} @__kmpc_flush( +#pragma omp atomic seq_cst, write + uix = iv; +// CHECK: load i32* +// CHECK: store atomic i32 +#pragma omp atomic write + ix = uiv; +// CHECK: load i64* +// CHECK: [[VAL:%.+]] = trunc i64 %{{.*}} to i32 +// CHECK: [[TEMP_REAL_REF:%.+]] = getelementptr inbounds { i32, i32 }* [[TEMP:%.+]], i32 0, i32 0 +// CHECK: [[TEMP_IMG_REF:%.+]] = getelementptr inbounds { i32, i32 }* [[TEMP]], i32 0, i32 1 +// CHECK: store i32 [[VAL]], i32* [[TEMP_REAL_REF]] +// CHECK: store i32 0, i32* [[TEMP_IMG_REF]] +// CHECK: [[BITCAST:%.+]] = bitcast { i32, i32 }* [[TEMP]] to i8* +// CHECK: call void @__atomic_store(i64 8, i8* bitcast ({ i32, i32 }* @{{.+}} to i8*), i8* [[BITCAST]], i32 5) +#pragma omp atomic write + cix = lv; +// CHECK: load i64* +// CHECK: store atomic i32 %{{.+}}, i32* bitcast (float* +#pragma omp atomic write + fx = ulv; +// CHECK: load i64* +// CHECK: store atomic i64 %{{.+}}, i64* bitcast (double* +#pragma omp atomic write + dx = llv; +// CHECK: load i64* +// CHECK: [[VAL:%.+]] = uitofp i64 %{{.+}} to x86_fp80 +// CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[TEMP:%.+]] to i8* +// CHECK: call void @llvm.memset.p0i8.i64(i8* [[BITCAST]], i8 0, i64 16, i32 16, i1 false) +// CHECK: store x86_fp80 [[VAL]], x86_fp80* [[TEMP]] +// CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[TEMP]] to i128* +// CHECK: [[VAL:%.+]] = load i128* [[BITCAST]] +// CHECK: store atomic i128 [[VAL]], i128* bitcast (x86_fp80* +#pragma omp atomic write + ldx = ullv; +// CHECK: load float* +// CHECK: [[VAL:%.+]] = fptosi float %{{.*}} to i32 +// CHECK: [[TEMP_REAL_REF:%.+]] = getelementptr inbounds { i32, i32 }* [[TEMP:%.+]], i32 0, i32 0 +// CHECK: [[TEMP_IMG_REF:%.+]] = getelementptr inbounds { i32, i32 }* [[TEMP]], i32 0, i32 1 +// CHECK: store i32 [[VAL]], i32* [[TEMP_REAL_REF]] +// CHECK: store i32 0, i32* [[TEMP_IMG_REF]] +// CHECK: [[BITCAST:%.+]] = bitcast { i32, i32 }* [[TEMP]] to i8* +// CHECK: call void @__atomic_store(i64 8, i8* bitcast ({ i32, i32 }* @{{.+}} to i8*), i8* [[BITCAST]], i32 5) +#pragma omp atomic write + cix = fv; +// CHECK: load double* +// CHECK: store atomic i16 +#pragma omp atomic write + sx = dv; +// CHECK: load x86_fp80* +// CHECK: store atomic i8 +#pragma omp atomic write + bx = ldv; +// CHECK: load i32* getelementptr inbounds ({ i32, i32 }* @{{.+}}, i32 0, i32 0) +// CHECK: load i32* getelementptr inbounds ({ i32, i32 }* @{{.+}}, i32 0, i32 1) +// CHECK: icmp ne i32 %{{.+}}, 0 +// CHECK: icmp ne i32 %{{.+}}, 0 +// CHECK: or i1 +// CHECK: store atomic i8 +#pragma omp atomic write + bx = civ; +// CHECK: load float* getelementptr inbounds ({ float, float }* @{{.*}}, i32 0, i32 0) +// CHECK: store atomic i16 +#pragma omp atomic write + usx = cfv; +// CHECK: load double* getelementptr inbounds ({ double, double }* @{{.+}}, i32 0, i32 0) +// CHECK: store atomic i64 +#pragma omp atomic write + llx = cdv; +// CHECK: [[IDX:%.+]] = load i16* @{{.+}} +// CHECK: load i8* +// CHECK: [[VEC_ITEM_VAL:%.+]] = zext i1 %{{.+}} to i32 +// CHECK: [[I128VAL:%.+]] = load atomic i128* bitcast (<4 x i32>* [[DEST:@.+]] to i128*) seq_cst +// CHECK: [[LD:%.+]] = bitcast i128 [[I128VAL]] to <4 x i32> +// CHECK: br label %[[CONT:.+]] +// CHECK: [[CONT]] +// CHECK: [[OLD_VEC_VAL:%.+]] = phi <4 x i32> [ [[LD]], %{{.+}} ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: store <4 x i32> [[OLD_VEC_VAL]], <4 x i32>* [[LDTEMP:%.+]], +// CHECK: [[VEC_VAL:%.+]] = load <4 x i32>* [[LDTEMP]] +// CHECK: [[NEW_VEC_VAL:%.+]] = insertelement <4 x i32> [[VEC_VAL]], i32 [[VEC_ITEM_VAL]], i16 [[IDX]] +// CHECK: store <4 x i32> [[NEW_VEC_VAL]], <4 x i32>* [[LDTEMP]] +// CHECK: [[NEW_VEC_VAL:%.+]] = load <4 x i32>* [[LDTEMP]] +// CHECK: [[OLD_I128:%.+]] = bitcast <4 x i32> [[OLD_VEC_VAL]] to i128 +// CHECK: [[NEW_I128:%.+]] = bitcast <4 x i32> [[NEW_VEC_VAL]] to i128 +// CHECK: [[RES:%.+]] = cmpxchg i128* bitcast (<4 x i32>* [[DEST]] to i128*), i128 [[OLD_I128]], i128 [[NEW_I128]] seq_cst seq_cst +// CHECK: [[FAILED_I128_OLD_VAL:%.+]] = extractvalue { i128, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1 +// CHECK: [[FAILED_OLD_VAL]] = bitcast i128 [[FAILED_I128_OLD_VAL]] to <4 x i32> +// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]] +// CHECK: [[EXIT]] +#pragma omp atomic write + int4x[sv] = bv; +// CHECK: load x86_fp80* @{{.+}} +// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32 +// CHECK: [[PREV_VALUE:%.+]] = load atomic i32* bitcast (i8* getelementptr (i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*) seq_cst +// CHECK: br label %[[CONT:.+]] +// CHECK: [[CONT]] +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: [[BF_VALUE:%.+]] = and i32 [[NEW_VAL]], 2147483647 +// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, -2147483648 +// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i32 %{{.+}}, i32* [[LDTEMP:%.+]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i32* [[LDTEMP]] +// CHECK: [[RES:%.+]] = cmpxchg i32* bitcast (i8* getelementptr (i8* bitcast (%struct.BitFields* @{{.+}} to i8*), i64 4) to i32*), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] seq_cst seq_cst +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1 +// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]] +// CHECK: [[EXIT]] +#pragma omp atomic write + bfx.a = ldv; +// CHECK: load x86_fp80* @{{.+}} +// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32 +// CHECK: [[BITCAST:%.+]] = bitcast i32* [[LDTEMP:%.+]] to i8* +// CHECK: call void @__atomic_load(i64 4, i8* getelementptr (i8* bitcast (%struct.BitFields_packed* @{{.+}} to i8*), i64 4), i8* [[BITCAST]], i32 5) +// CHECK: [[PREV_VALUE:%.+]] = load i32* [[LDTEMP]] +// CHECK: br label %[[CONT:.+]] +// CHECK: [[CONT]] +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: [[BF_VALUE:%.+]] = and i32 [[NEW_VAL]], 2147483647 +// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, -2147483648 +// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i32 %{{.+}}, i32* [[LDTEMP:%.+]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i32* [[LDTEMP]] +// CHECK: store i32 [[OLD_BF_VALUE]], i32* [[TEMP_OLD_BF_ADDR:%.+]], +// CHECK: store i32 [[NEW_BF_VALUE]], i32* [[TEMP_NEW_BF_ADDR:%.+]], +// CHECK: [[BITCAST_TEMP_OLD_BF_ADDR:%.+]] = bitcast i32* [[TEMP_OLD_BF_ADDR]] to i8* +// CHECK: [[BITCAST_TEMP_NEW_BF_ADDR:%.+]] = bitcast i32* [[TEMP_NEW_BF_ADDR]] to i8* +// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i64 4, i8* getelementptr (i8* bitcast (%struct.BitFields_packed* @{{.+}} to i8*), i64 4), i8* [[BITCAST_TEMP_OLD_BF_ADDR]], i8* [[BITCAST_TEMP_NEW_BF_ADDR]], i32 7, i32 7) +// CHECK: [[FAILED_OLD_VAL]] = load i32* [[TEMP_OLD_BF_ADDR]] +// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]] +// CHECK: [[EXIT]] +#pragma omp atomic write + bfx_packed.a = ldv; +// CHECK: load x86_fp80* @{{.+}} +// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32 +// CHECK: [[PREV_VALUE:%.+]] = load atomic i32* getelementptr inbounds (%struct.BitFields2* @{{.+}}, i32 0, i32 0) seq_cst +// CHECK: br label %[[CONT:.+]] +// CHECK: [[CONT]] +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 1 +// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 31 +// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, 2147483647 +// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i32 %{{.+}}, i32* [[LDTEMP:%.+]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i32* [[LDTEMP]] +// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields2* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] seq_cst seq_cst +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1 +// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]] +// CHECK: [[EXIT]] +#pragma omp atomic write + bfx2.a = ldv; +// CHECK: load x86_fp80* @{{.+}} +// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32 +// CHECK: [[PREV_VALUE:%.+]] = load atomic i8* getelementptr (i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3) seq_cst +// CHECK: br label %[[CONT:.+]] +// CHECK: [[CONT]] +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i8 +// CHECK: [[BF_AND:%.+]] = and i8 [[TRUNC]], 1 +// CHECK: [[BF_VALUE:%.+]] = shl i8 [[BF_AND]], 7 +// CHECK: [[BF_CLEAR:%.+]] = and i8 %{{.+}}, 127 +// CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i8 %{{.+}}, i8* [[LDTEMP:%.+]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i8* [[LDTEMP]] +// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr (i8* bitcast (%struct.BitFields2_packed* @{{.+}} to i8*), i64 3), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] seq_cst seq_cst +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1 +// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]] +// CHECK: [[EXIT]] +#pragma omp atomic write + bfx2_packed.a = ldv; +// CHECK: load x86_fp80* @{{.+}} +// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32 +// CHECK: [[PREV_VALUE:%.+]] = load atomic i32* getelementptr inbounds (%struct.BitFields3* @{{.+}}, i32 0, i32 0) seq_cst +// CHECK: br label %[[CONT:.+]] +// CHECK: [[CONT]] +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i32 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: [[BF_AND:%.+]] = and i32 [[NEW_VAL]], 16383 +// CHECK: [[BF_VALUE:%.+]] = shl i32 [[BF_AND]], 11 +// CHECK: [[BF_CLEAR:%.+]] = and i32 %{{.+}}, -33552385 +// CHECK: or i32 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i32 %{{.+}}, i32* [[LDTEMP:%.+]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i32* [[LDTEMP]] +// CHECK: [[RES:%.+]] = cmpxchg i32* getelementptr inbounds (%struct.BitFields3* @{{.+}}, i32 0, i32 0), i32 [[OLD_BF_VALUE]], i32 [[NEW_BF_VALUE]] seq_cst seq_cst +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i32, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i32, i1 } [[RES]], 1 +// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]] +// CHECK: [[EXIT]] +#pragma omp atomic write + bfx3.a = ldv; +// CHECK: load x86_fp80* @{{.+}} +// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32 +// CHECK: [[LDTEMP:%.+]] = bitcast i32* %{{.+}} to i24* +// CHECK: [[BITCAST:%.+]] = bitcast i24* %{{.+}} to i8* +// CHECK: call void @__atomic_load(i64 3, i8* getelementptr (i8* bitcast (%struct.BitFields3_packed* @{{.+}} to i8*), i64 1), i8* [[BITCAST]], i32 5) +// CHECK: [[PREV_VALUE:%.+]] = load i24* [[LDTEMP]] +// CHECK: br label %[[CONT:.+]] +// CHECK: [[CONT]] +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i24 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i24 +// CHECK: [[BF_AND:%.+]] = and i24 [[TRUNC]], 16383 +// CHECK: [[BF_VALUE:%.+]] = shl i24 [[BF_AND]], 3 +// CHECK: [[BF_CLEAR:%.+]] = and i24 %{{.+}}, -131065 +// CHECK: or i24 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i24 %{{.+}}, i24* [[LDTEMP:%.+]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i24* [[LDTEMP]] +// CHECK: [[TEMP_OLD_BF_ADDR:%.+]] = bitcast i32* %{{.+}} to i24* +// CHECK: store i24 [[OLD_BF_VALUE]], i24* [[TEMP_OLD_BF_ADDR]] +// CHECK: [[TEMP_NEW_BF_ADDR:%.+]] = bitcast i32* %{{.+}} to i24* +// CHECK: store i24 [[NEW_BF_VALUE]], i24* [[TEMP_NEW_BF_ADDR]] +// CHECK: [[BITCAST_TEMP_OLD_BF_ADDR:%.+]] = bitcast i24* [[TEMP_OLD_BF_ADDR]] to i8* +// CHECK: [[BITCAST_TEMP_NEW_BF_ADDR:%.+]] = bitcast i24* [[TEMP_NEW_BF_ADDR]] to i8* +// CHECK: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i64 3, i8* getelementptr (i8* bitcast (%struct.BitFields3_packed* @{{.+}} to i8*), i64 1), i8* [[BITCAST_TEMP_OLD_BF_ADDR]], i8* [[BITCAST_TEMP_NEW_BF_ADDR]], i32 7, i32 7) +// CHECK: [[FAILED_OLD_VAL]] = load i24* [[TEMP_OLD_BF_ADDR]] +// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]] +// CHECK: [[EXIT]] +#pragma omp atomic write + bfx3_packed.a = ldv; +// CHECK: load x86_fp80* @{{.+}} +// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32 +// CHECK: [[PREV_VALUE:%.+]] = load atomic i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) seq_cst +// CHECK: br label %[[CONT:.+]] +// CHECK: [[CONT]] +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: [[ZEXT:%.+]] = zext i32 [[NEW_VAL]] to i64 +// CHECK: [[BF_AND:%.+]] = and i64 [[ZEXT]], 1 +// CHECK: [[BF_VALUE:%.+]] = shl i64 [[BF_AND]], 16 +// CHECK: [[BF_CLEAR:%.+]] = and i64 %{{.+}}, -65537 +// CHECK: or i64 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i64 %{{.+}}, i64* [[LDTEMP:%.+]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i64* [[LDTEMP]] +// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] seq_cst seq_cst +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1 +// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]] +// CHECK: [[EXIT]] +#pragma omp atomic write + bfx4.a = ldv; +// CHECK: load x86_fp80* @{{.+}} +// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i32 +// CHECK: [[PREV_VALUE:%.+]] = load atomic i8* getelementptr inbounds (%struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) seq_cst +// CHECK: br label %[[CONT:.+]] +// CHECK: [[CONT]] +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: [[TRUNC:%.+]] = trunc i32 [[NEW_VAL]] to i8 +// CHECK: [[BF_VALUE:%.+]] = and i8 [[TRUNC]], 1 +// CHECK: [[BF_CLEAR:%.+]] = and i8 %{{.+}}, -2 +// CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i8 %{{.+}}, i8* [[LDTEMP:%.+]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i8* [[LDTEMP]] +// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] seq_cst seq_cst +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1 +// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]] +// CHECK: [[EXIT]] +#pragma omp atomic write + bfx4_packed.a = ldv; +// CHECK: load x86_fp80* @{{.+}} +// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i64 +// CHECK: [[PREV_VALUE:%.+]] = load atomic i64* bitcast (%struct.BitFields4* @{{.+}} to i64*) seq_cst +// CHECK: br label %[[CONT:.+]] +// CHECK: [[CONT]] +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i64 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: [[BF_AND:%.+]] = and i64 [[NEW_VAL]], 127 +// CHECK: [[BF_VALUE:%.+]] = shl i64 [[BF_AND]], 17 +// CHECK: [[BF_CLEAR:%.+]] = and i64 %{{.+}}, -16646145 +// CHECK: or i64 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i64 %{{.+}}, i64* [[LDTEMP:%.+]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i64* [[LDTEMP]] +// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (%struct.BitFields4* @{{.+}} to i64*), i64 [[OLD_BF_VALUE]], i64 [[NEW_BF_VALUE]] seq_cst seq_cst +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i64, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1 +// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]] +// CHECK: [[EXIT]] +#pragma omp atomic write + bfx4.b = ldv; +// CHECK: load x86_fp80* @{{.+}} +// CHECK: [[NEW_VAL:%.+]] = fptosi x86_fp80 %{{.+}} to i64 +// CHECK: [[PREV_VALUE:%.+]] = load atomic i8* getelementptr inbounds (%struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2) seq_cst +// CHECK: br label %[[CONT:.+]] +// CHECK: [[CONT]] +// CHECK: [[OLD_BF_VALUE:%.+]] = phi i8 [ [[PREV_VALUE]], %[[EXIT]] ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: [[TRUNC:%.+]] = trunc i64 [[NEW_VAL]] to i8 +// CHECK: [[BF_AND:%.+]] = and i8 [[TRUNC]], 127 +// CHECK: [[BF_VALUE:%.+]] = shl i8 [[BF_AND]], 1 +// CHECK: [[BF_CLEAR:%.+]] = and i8 %{{.+}}, 1 +// CHECK: or i8 [[BF_CLEAR]], [[BF_VALUE]] +// CHECK: store i8 %{{.+}}, i8* [[LDTEMP:%.+]] +// CHECK: [[NEW_BF_VALUE:%.+]] = load i8* [[LDTEMP]] +// CHECK: [[RES:%.+]] = cmpxchg i8* getelementptr inbounds (%struct.BitFields4_packed* @{{.+}}, i32 0, i32 0, i64 2), i8 [[OLD_BF_VALUE]], i8 [[NEW_BF_VALUE]] seq_cst seq_cst +// CHECK: [[FAILED_OLD_VAL]] = extractvalue { i8, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i8, i1 } [[RES]], 1 +// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]] +// CHECK: [[EXIT]] +#pragma omp atomic write + bfx4_packed.b = ldv; +// CHECK: load i64* +// CHECK: [[VEC_ITEM_VAL:%.+]] = uitofp i64 %{{.+}} to float +// CHECK: [[I64VAL:%.+]] = load atomic i64* bitcast (<2 x float>* [[DEST:@.+]] to i64*) seq_cst +// CHECK: [[LD:%.+]] = bitcast i64 [[I64VAL]] to <2 x float> +// CHECK: br label %[[CONT:.+]] +// CHECK: [[CONT]] +// CHECK: [[OLD_VEC_VAL:%.+]] = phi <2 x float> [ [[LD]], %{{.+}} ], [ [[FAILED_OLD_VAL:%.+]], %[[CONT]] ] +// CHECK: store <2 x float> [[OLD_VEC_VAL]], <2 x float>* [[LDTEMP:%.+]], +// CHECK: [[VEC_VAL:%.+]] = load <2 x float>* [[LDTEMP]] +// CHECK: [[NEW_VEC_VAL:%.+]] = insertelement <2 x float> [[VEC_VAL]], float [[VEC_ITEM_VAL]], i64 0 +// CHECK: store <2 x float> [[NEW_VEC_VAL]], <2 x float>* [[LDTEMP]] +// CHECK: [[NEW_VEC_VAL:%.+]] = load <2 x float>* [[LDTEMP]] +// CHECK: [[OLD_I64:%.+]] = bitcast <2 x float> [[OLD_VEC_VAL]] to i64 +// CHECK: [[NEW_I64:%.+]] = bitcast <2 x float> [[NEW_VEC_VAL]] to i64 +// CHECK: [[RES:%.+]] = cmpxchg i64* bitcast (<2 x float>* [[DEST]] to i64*), i64 [[OLD_I64]], i64 [[NEW_I64]] seq_cst seq_cst +// CHECK: [[FAILED_I64_OLD_VAL:%.+]] = extractvalue { i64, i1 } [[RES]], 0 +// CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i64, i1 } [[RES]], 1 +// CHECK: [[FAILED_OLD_VAL]] = bitcast i64 [[FAILED_I64_OLD_VAL]] to <2 x float> +// CHECK: br i1 [[FAIL_SUCCESS]], label %[[CONT]], label %[[EXIT:.+]] +// CHECK: [[EXIT]] +#pragma omp atomic write + float2x.x = ulv; +// CHECK: call i32 @llvm.read_register.i32( +// CHECK: sitofp i32 %{{.+}} to double +// CHECK: bitcast double %{{.+}} to i64 +// CHECK: store atomic i64 %{{.+}}, i64* bitcast (double* @{{.+}} to i64*) seq_cst +// CHECK: call{{.*}} @__kmpc_flush( +#pragma omp atomic write seq_cst + dv = rix; + return 0; +} + +#endif