Index: cfe/trunk/lib/CodeGen/CGAtomic.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGAtomic.cpp +++ cfe/trunk/lib/CodeGen/CGAtomic.cpp @@ -13,6 +13,7 @@ #include "CodeGenFunction.h" #include "CGCall.h" +#include "CGRecordLayout.h" #include "CodeGenModule.h" #include "clang/AST/ASTContext.h" #include "clang/CodeGen/CGFunctionInfo.h" @@ -36,34 +37,69 @@ CharUnits LValueAlign; TypeEvaluationKind EvaluationKind; bool UseLibcall; + LValue LVal; + CGBitFieldInfo BFI; public: - AtomicInfo(CodeGenFunction &CGF, LValue &lvalue) : CGF(CGF) { - assert(lvalue.isSimple()); - - AtomicTy = lvalue.getType(); - ValueTy = AtomicTy->castAs()->getValueType(); - EvaluationKind = CGF.getEvaluationKind(ValueTy); - + AtomicInfo(CodeGenFunction &CGF, LValue &lvalue) + : CGF(CGF), AtomicSizeInBits(0), ValueSizeInBits(0), UseLibcall(true) { + assert(!lvalue.isGlobalReg()); ASTContext &C = CGF.getContext(); - - uint64_t ValueAlignInBits; - uint64_t AtomicAlignInBits; - TypeInfo ValueTI = C.getTypeInfo(ValueTy); - ValueSizeInBits = ValueTI.Width; - ValueAlignInBits = ValueTI.Align; - - TypeInfo AtomicTI = C.getTypeInfo(AtomicTy); - AtomicSizeInBits = AtomicTI.Width; - AtomicAlignInBits = AtomicTI.Align; - - assert(ValueSizeInBits <= AtomicSizeInBits); - assert(ValueAlignInBits <= AtomicAlignInBits); - - AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits); - ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits); - if (lvalue.getAlignment().isZero()) - lvalue.setAlignment(AtomicAlign); - + if (lvalue.isSimple()) { + AtomicTy = lvalue.getType(); + if (auto *ATy = AtomicTy->getAs()) + ValueTy = ATy->getValueType(); + else + ValueTy = AtomicTy; + EvaluationKind = CGF.getEvaluationKind(ValueTy); + + uint64_t ValueAlignInBits; + uint64_t AtomicAlignInBits; + TypeInfo ValueTI = C.getTypeInfo(ValueTy); + ValueSizeInBits = ValueTI.Width; + ValueAlignInBits = ValueTI.Align; + + TypeInfo AtomicTI = C.getTypeInfo(AtomicTy); + AtomicSizeInBits = AtomicTI.Width; + AtomicAlignInBits = AtomicTI.Align; + + assert(ValueSizeInBits <= AtomicSizeInBits); + assert(ValueAlignInBits <= AtomicAlignInBits); + + AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits); + ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits); + if (lvalue.getAlignment().isZero()) + lvalue.setAlignment(AtomicAlign); + + LVal = lvalue; + } else if (lvalue.isBitField()) { + auto &OrigBFI = lvalue.getBitFieldInfo(); + auto Offset = OrigBFI.Offset % C.toBits(lvalue.getAlignment()); + AtomicSizeInBits = C.toBits( + C.toCharUnitsFromBits(Offset + OrigBFI.Size + C.getCharWidth() - 1) + .RoundUpToAlignment(lvalue.getAlignment())); + auto VoidPtrAddr = CGF.EmitCastToVoidPtr(lvalue.getBitFieldAddr()); + auto OffsetInChars = + (C.toCharUnitsFromBits(OrigBFI.Offset) / lvalue.getAlignment()) * + lvalue.getAlignment(); + VoidPtrAddr = CGF.Builder.CreateConstGEP1_64( + VoidPtrAddr, OffsetInChars.getQuantity()); + auto Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + VoidPtrAddr, + CGF.Builder.getIntNTy(AtomicSizeInBits)->getPointerTo(), + "atomic_bitfield_base"); + BFI = OrigBFI; + BFI.Offset = Offset; + BFI.StorageSize = AtomicSizeInBits; + LVal = LValue::MakeBitfield(Addr, BFI, lvalue.getType(), + lvalue.getAlignment()); + } else if (lvalue.isVectorElt()) { + AtomicSizeInBits = C.getTypeSize(lvalue.getType()); + LVal = lvalue; + } else { + assert(lvalue.isExtVectorElt()); + AtomicSizeInBits = C.getTypeSize(lvalue.getType()); + LVal = lvalue; + } UseLibcall = !C.getTargetInfo().hasBuiltinAtomic( AtomicSizeInBits, C.toBits(lvalue.getAlignment())); } @@ -76,6 +112,7 @@ uint64_t getValueSizeInBits() const { return ValueSizeInBits; } TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; } bool shouldUseLibcall() const { return UseLibcall; } + const LValue &getAtomicLValue() const { return LVal; } /// Is the atomic size larger than the underlying value type? /// @@ -87,7 +124,7 @@ return (ValueSizeInBits != AtomicSizeInBits); } - bool emitMemSetZeroIfNecessary(LValue dest) const; + bool emitMemSetZeroIfNecessary() const; llvm::Value *getAtomicSizeValue() const { CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits); @@ -110,16 +147,17 @@ SourceLocation Loc) const; /// Copy an atomic r-value into atomic-layout memory. - void emitCopyIntoMemory(RValue rvalue, LValue lvalue) const; + void emitCopyIntoMemory(RValue rvalue) const; /// Project an l-value down to the value field. - LValue projectValue(LValue lvalue) const { - llvm::Value *addr = lvalue.getAddress(); + LValue projectValue() const { + assert(LVal.isSimple()); + llvm::Value *addr = LVal.getAddress(); if (hasPadding()) addr = CGF.Builder.CreateStructGEP(addr, 0); - return LValue::MakeAddr(addr, getValueType(), lvalue.getAlignment(), - CGF.getContext(), lvalue.getTBAAInfo()); + return LValue::MakeAddr(addr, getValueType(), LVal.getAlignment(), + CGF.getContext(), LVal.getTBAAInfo()); } /// Materialize an atomic r-value in atomic-layout memory. @@ -172,14 +210,15 @@ llvm_unreachable("bad evaluation kind"); } -bool AtomicInfo::emitMemSetZeroIfNecessary(LValue dest) const { - llvm::Value *addr = dest.getAddress(); +bool AtomicInfo::emitMemSetZeroIfNecessary() const { + assert(LVal.isSimple()); + llvm::Value *addr = LVal.getAddress(); if (!requiresMemSetZero(addr->getType()->getPointerElementType())) return false; CGF.Builder.CreateMemSet(addr, llvm::ConstantInt::get(CGF.Int8Ty, 0), AtomicSizeInBits / 8, - dest.getAlignment().getQuantity()); + LVal.getAlignment().getQuantity()); return true; } @@ -902,21 +941,34 @@ RValue AtomicInfo::convertTempToRValue(llvm::Value *addr, AggValueSlot resultSlot, SourceLocation loc) const { - if (EvaluationKind == TEK_Aggregate) - return resultSlot.asRValue(); - - // Drill into the padding structure if we have one. - if (hasPadding()) - addr = CGF.Builder.CreateStructGEP(addr, 0); - - // Otherwise, just convert the temporary to an r-value using the - // normal conversion routine. - return CGF.convertTempToRValue(addr, getValueType(), loc); + if (LVal.isSimple()) { + if (EvaluationKind == TEK_Aggregate) + return resultSlot.asRValue(); + + // Drill into the padding structure if we have one. + if (hasPadding()) + addr = CGF.Builder.CreateStructGEP(addr, 0); + + // Otherwise, just convert the temporary to an r-value using the + // normal conversion routine. + return CGF.convertTempToRValue(addr, getValueType(), loc); + } else if (LVal.isBitField()) + return CGF.EmitLoadOfBitfieldLValue(LValue::MakeBitfield( + addr, LVal.getBitFieldInfo(), LVal.getType(), LVal.getAlignment())); + else if (LVal.isVectorElt()) + return CGF.EmitLoadOfLValue(LValue::MakeVectorElt(addr, LVal.getVectorIdx(), + LVal.getType(), + LVal.getAlignment()), + loc); + assert(LVal.isExtVectorElt()); + return CGF.EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt( + addr, LVal.getExtVectorElts(), LVal.getType(), LVal.getAlignment())); } RValue AtomicInfo::convertIntToValue(llvm::Value *IntVal, AggValueSlot ResultSlot, SourceLocation Loc) const { + assert(LVal.isSimple()); // Try not to in some easy cases. assert(IntVal->getType()->isIntegerTy() && "Expected integer value"); if (getEvaluationKind() == TEK_Scalar && !hasPadding()) { @@ -958,25 +1010,43 @@ RValue CodeGenFunction::EmitAtomicLoad(LValue src, SourceLocation loc, AggValueSlot resultSlot) { AtomicInfo atomics(*this, src); + LValue LVal = atomics.getAtomicLValue(); + llvm::Value *SrcAddr = nullptr; + llvm::AllocaInst *NonSimpleTempAlloca = nullptr; + if (LVal.isSimple()) + SrcAddr = LVal.getAddress(); + else { + if (LVal.isBitField()) + SrcAddr = LVal.getBitFieldAddr(); + else if (LVal.isVectorElt()) + SrcAddr = LVal.getVectorAddr(); + else { + assert(LVal.isExtVectorElt()); + SrcAddr = LVal.getExtVectorAddr(); + } + NonSimpleTempAlloca = CreateTempAlloca( + SrcAddr->getType()->getPointerElementType(), "atomic-load-temp"); + NonSimpleTempAlloca->setAlignment(getContext().toBits(src.getAlignment())); + } // Check whether we should use a library call. if (atomics.shouldUseLibcall()) { llvm::Value *tempAddr; - if (!resultSlot.isIgnored()) { - assert(atomics.getEvaluationKind() == TEK_Aggregate); - tempAddr = resultSlot.getAddr(); - } else { - tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp"); - } + if (LVal.isSimple()) { + if (!resultSlot.isIgnored()) { + assert(atomics.getEvaluationKind() == TEK_Aggregate); + tempAddr = resultSlot.getAddr(); + } else + tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp"); + } else + tempAddr = NonSimpleTempAlloca; // void __atomic_load(size_t size, void *mem, void *return, int order); CallArgList args; args.add(RValue::get(atomics.getAtomicSizeValue()), getContext().getSizeType()); - args.add(RValue::get(EmitCastToVoidPtr(src.getAddress())), - getContext().VoidPtrTy); - args.add(RValue::get(EmitCastToVoidPtr(tempAddr)), - getContext().VoidPtrTy); + args.add(RValue::get(EmitCastToVoidPtr(SrcAddr)), getContext().VoidPtrTy); + args.add(RValue::get(EmitCastToVoidPtr(tempAddr)), getContext().VoidPtrTy); args.add(RValue::get(llvm::ConstantInt::get( IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)), getContext().IntTy); @@ -987,7 +1057,7 @@ } // Okay, we're doing this natively. - llvm::Value *addr = atomics.emitCastToAtomicIntPointer(src.getAddress()); + llvm::Value *addr = atomics.emitCastToAtomicIntPointer(SrcAddr); llvm::LoadInst *load = Builder.CreateLoad(addr, "atomic-load"); load->setAtomic(llvm::SequentiallyConsistent); @@ -1003,40 +1073,46 @@ return RValue::getAggregate(nullptr, false); // Okay, turn that back into the original value type. - return atomics.convertIntToValue(load, resultSlot, loc); + if (src.isSimple()) + return atomics.convertIntToValue(load, resultSlot, loc); + + auto *IntAddr = atomics.emitCastToAtomicIntPointer(NonSimpleTempAlloca); + Builder.CreateAlignedStore(load, IntAddr, src.getAlignment().getQuantity()); + return atomics.convertTempToRValue(NonSimpleTempAlloca, resultSlot, loc); } /// Copy an r-value into memory as part of storing to an atomic type. /// This needs to create a bit-pattern suitable for atomic operations. -void AtomicInfo::emitCopyIntoMemory(RValue rvalue, LValue dest) const { +void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const { + assert(LVal.isSimple()); // If we have an r-value, the rvalue should be of the atomic type, // which means that the caller is responsible for having zeroed // any padding. Just do an aggregate copy of that type. if (rvalue.isAggregate()) { - CGF.EmitAggregateCopy(dest.getAddress(), + CGF.EmitAggregateCopy(LVal.getAddress(), rvalue.getAggregateAddr(), getAtomicType(), (rvalue.isVolatileQualified() - || dest.isVolatileQualified()), - dest.getAlignment()); + || LVal.isVolatileQualified()), + LVal.getAlignment()); return; } // Okay, otherwise we're copying stuff. // Zero out the buffer if necessary. - emitMemSetZeroIfNecessary(dest); + emitMemSetZeroIfNecessary(); // Drill past the padding if present. - dest = projectValue(dest); + LValue TempLVal = projectValue(); // Okay, store the rvalue in. if (rvalue.isScalar()) { - CGF.EmitStoreOfScalar(rvalue.getScalarVal(), dest, /*init*/ true); + CGF.EmitStoreOfScalar(rvalue.getScalarVal(), TempLVal, /*init*/ true); } else { - CGF.EmitStoreOfComplex(rvalue.getComplexVal(), dest, /*init*/ true); + CGF.EmitStoreOfComplex(rvalue.getComplexVal(), TempLVal, /*init*/ true); } } @@ -1051,8 +1127,10 @@ // Otherwise, make a temporary and materialize into it. llvm::Value *temp = CGF.CreateMemTemp(getAtomicType(), "atomic-store-temp"); - LValue tempLV = CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment()); - emitCopyIntoMemory(rvalue, tempLV); + LValue tempLV = + CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment()); + AtomicInfo Atomics(CGF, tempLV); + Atomics.emitCopyIntoMemory(rvalue); return temp; } @@ -1098,7 +1176,7 @@ // If this is an initialization, just put the value there normally. if (isInit) { - atomics.emitCopyIntoMemory(rvalue, dest); + atomics.emitCopyIntoMemory(rvalue); return; } @@ -1214,13 +1292,13 @@ switch (atomics.getEvaluationKind()) { case TEK_Scalar: { llvm::Value *value = EmitScalarExpr(init); - atomics.emitCopyIntoMemory(RValue::get(value), dest); + atomics.emitCopyIntoMemory(RValue::get(value)); return; } case TEK_Complex: { ComplexPairTy value = EmitComplexExpr(init); - atomics.emitCopyIntoMemory(RValue::getComplex(value), dest); + atomics.emitCopyIntoMemory(RValue::getComplex(value)); return; } @@ -1229,8 +1307,8 @@ // of atomic type. bool Zeroed = false; if (!init->getType()->isAtomicType()) { - Zeroed = atomics.emitMemSetZeroIfNecessary(dest); - dest = atomics.projectValue(dest); + Zeroed = atomics.emitMemSetZeroIfNecessary(); + dest = atomics.projectValue(); } // Evaluate the expression directly into the destination. Index: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp +++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp @@ -691,8 +691,125 @@ llvm_unreachable("CodeGen for 'omp ordered' is not supported yet."); } -void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &) { - llvm_unreachable("CodeGen for 'omp atomic' is not supported yet."); +static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, + QualType SrcType, QualType DestType) { + assert(CGF.hasScalarEvaluationKind(DestType) && + "DestType must have scalar evaluation kind."); + assert(!Val.isAggregate() && "Must be a scalar or complex."); + return Val.isScalar() + ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType) + : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, + DestType); +} + +static CodeGenFunction::ComplexPairTy +convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, + QualType DestType) { + assert(CGF.getEvaluationKind(DestType) == TEK_Complex && + "DestType must have complex evaluation kind."); + CodeGenFunction::ComplexPairTy ComplexVal; + if (Val.isScalar()) { + // Convert the input element to the element type of the complex. + auto DestElementType = DestType->castAs()->getElementType(); + auto ScalarVal = + CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType); + ComplexVal = CodeGenFunction::ComplexPairTy( + ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); + } else { + assert(Val.isComplex() && "Must be a scalar or complex."); + auto SrcElementType = SrcType->castAs()->getElementType(); + auto DestElementType = DestType->castAs()->getElementType(); + ComplexVal.first = CGF.EmitScalarConversion( + Val.getComplexVal().first, SrcElementType, DestElementType); + ComplexVal.second = CGF.EmitScalarConversion( + Val.getComplexVal().second, SrcElementType, DestElementType); + } + return ComplexVal; +} + +static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, + const Expr *X, const Expr *V, + SourceLocation Loc) { + // v = x; + assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); + assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); + LValue XLValue = CGF.EmitLValue(X); + LValue VLValue = CGF.EmitLValue(V); + RValue Res = XLValue.isGlobalReg() ? CGF.EmitLoadOfLValue(XLValue, Loc) + : CGF.EmitAtomicLoad(XLValue, Loc); + // OpenMP, 2.12.6, atomic Construct + // Any atomic construct with a seq_cst clause forces the atomically + // performed operation to include an implicit flush operation without a + // list. + if (IsSeqCst) + CGF.CGM.getOpenMPRuntime().EmitOMPFlush(CGF, llvm::None, Loc); + switch (CGF.getEvaluationKind(V->getType())) { + case TEK_Scalar: + CGF.EmitStoreOfScalar( + convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue); + break; + case TEK_Complex: + CGF.EmitStoreOfComplex( + convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue, + /*isInit=*/false); + break; + case TEK_Aggregate: + llvm_unreachable("Must be a scalar or complex."); + } +} + +static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, + bool IsSeqCst, const Expr *X, const Expr *V, + const Expr *, SourceLocation Loc) { + switch (Kind) { + case OMPC_read: + EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); + break; + case OMPC_write: + case OMPC_update: + case OMPC_capture: + llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet."); + case OMPC_if: + case OMPC_final: + case OMPC_num_threads: + case OMPC_private: + case OMPC_firstprivate: + case OMPC_lastprivate: + case OMPC_reduction: + case OMPC_safelen: + case OMPC_collapse: + case OMPC_default: + case OMPC_seq_cst: + case OMPC_shared: + case OMPC_linear: + case OMPC_aligned: + case OMPC_copyin: + case OMPC_copyprivate: + case OMPC_flush: + case OMPC_proc_bind: + case OMPC_schedule: + case OMPC_ordered: + case OMPC_nowait: + case OMPC_untied: + case OMPC_threadprivate: + case OMPC_mergeable: + case OMPC_unknown: + llvm_unreachable("Clause is not allowed in 'omp atomic'."); + } +} + +void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { + bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst); + OpenMPClauseKind Kind = OMPC_unknown; + for (auto *C : S.clauses()) { + // Find first clause (skip seq_cst clause, if it is first). + if (C->getClauseKind() != OMPC_seq_cst) { + Kind = C->getClauseKind(); + break; + } + } + EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(), + S.getLocStart()); } void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { Index: cfe/trunk/lib/Sema/SemaType.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaType.cpp +++ cfe/trunk/lib/Sema/SemaType.cpp @@ -2716,7 +2716,7 @@ // and not, for instance, a pointer to a function. if (D.getDeclSpec().containsPlaceholderType() && !FTI.hasTrailingReturnType() && chunkIndex == 0 && - !S.getLangOpts().CPlusPlus14) { + !S.getLangOpts().CPlusPlus14 && !S.getLangOpts().MSVCCompat) { S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(), D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto ? diag::err_auto_missing_trailing_return Index: cfe/trunk/test/OpenMP/atomic_read_codegen.c =================================================================== --- cfe/trunk/test/OpenMP/atomic_read_codegen.c +++ cfe/trunk/test/OpenMP/atomic_read_codegen.c @@ -0,0 +1,333 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -x c -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +_Bool bv, bx; +char cv, cx; +unsigned char ucv, ucx; +short sv, sx; +unsigned short usv, usx; +int iv, ix; +unsigned int uiv, uix; +long lv, lx; +unsigned long ulv, ulx; +long long llv, llx; +unsigned long long ullv, ullx; +float fv, fx; +double dv, dx; +long double ldv, ldx; +_Complex int civ, cix; +_Complex float cfv, cfx; +_Complex double cdv, cdx; + +typedef int int4 __attribute__((__vector_size__(16))); +int4 int4x; + +struct BitFields { + int : 32; + int a : 31; +} bfx; + +struct BitFields_packed { + int : 32; + int a : 31; +} __attribute__ ((__packed__)) bfx_packed; + +struct BitFields2 { + int : 31; + int a : 1; +} bfx2; + +struct BitFields2_packed { + int : 31; + int a : 1; +} __attribute__ ((__packed__)) bfx2_packed; + +struct BitFields3 { + int : 11; + int a : 14; +} bfx3; + +struct BitFields3_packed { + int : 11; + int a : 14; +} __attribute__ ((__packed__)) bfx3_packed; + +struct BitFields4 { + short : 16; + int a: 1; + long b : 7; +} bfx4; + +struct BitFields4_packed { + short : 16; + int a: 1; + long b : 7; +} __attribute__ ((__packed__)) bfx4_packed; + +typedef float float2 __attribute__((ext_vector_type(2))); +float2 float2x; + +register int rix __asm__("0"); + +int main() { +// CHECK: load atomic i8* +// CHECK: store i8 +#pragma omp atomic read + bv = bx; +// CHECK: load atomic i8* +// CHECK: store i8 +#pragma omp atomic read + cv = cx; +// CHECK: load atomic i8* +// CHECK: store i8 +#pragma omp atomic read + ucv = ucx; +// CHECK: load atomic i16* +// CHECK: store i16 +#pragma omp atomic read + sv = sx; +// CHECK: load atomic i16* +// CHECK: store i16 +#pragma omp atomic read + usv = usx; +// CHECK: load atomic i32* +// CHECK: store i32 +#pragma omp atomic read + iv = ix; +// CHECK: load atomic i32* +// CHECK: store i32 +#pragma omp atomic read + uiv = uix; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + lv = lx; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + ulv = ulx; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + llv = llx; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + ullv = ullx; +// CHECK: load atomic i32* bitcast (float* +// CHECK: bitcast i32 {{.*}} to float +// CHECK: store float +#pragma omp atomic read + fv = fx; +// CHECK: load atomic i64* bitcast (double* +// CHECK: bitcast i64 {{.*}} to double +// CHECK: store double +#pragma omp atomic read + dv = dx; +// CHECK: [[LD:%.+]] = load atomic i128* bitcast (x86_fp80* +// CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i128* +// CHECK: store i128 [[LD]], i128* [[BITCAST]] +// CHECK: [[LD:%.+]] = load x86_fp80* [[LDTEMP]] +// CHECK: store x86_fp80 [[LD]] +#pragma omp atomic read + ldv = ldx; +// CHECK: call{{.*}} void @__atomic_load(i64 8, +// CHECK: store i32 +// CHECK: store i32 +#pragma omp atomic read + civ = cix; +// CHECK: call{{.*}} void @__atomic_load(i64 8, +// CHECK: store float +// CHECK: store float +#pragma omp atomic read + cfv = cfx; +// CHECK: call{{.*}} void @__atomic_load(i64 16, +// CHECK: call{{.*}} @__kmpc_flush( +// CHECK: store double +// CHECK: store double +#pragma omp atomic seq_cst read + cdv = cdx; +// CHECK: load atomic i64* +// CHECK: store i8 +#pragma omp atomic read + bv = ulx; +// CHECK: load atomic i8* +// CHECK: store i8 +#pragma omp atomic read + cv = bx; +// CHECK: load atomic i8* +// CHECK: call{{.*}} @__kmpc_flush( +// CHECK: store i8 +#pragma omp atomic read, seq_cst + ucv = cx; +// CHECK: load atomic i64* +// CHECK: store i16 +#pragma omp atomic read + sv = ulx; +// CHECK: load atomic i64* +// CHECK: store i16 +#pragma omp atomic read + usv = lx; +// CHECK: load atomic i32* +// CHECK: call{{.*}} @__kmpc_flush( +// CHECK: store i32 +#pragma omp atomic seq_cst, read + iv = uix; +// CHECK: load atomic i32* +// CHECK: store i32 +#pragma omp atomic read + uiv = ix; +// CHECK: call{{.*}} void @__atomic_load(i64 8, +// CHECK: store i64 +#pragma omp atomic read + lv = cix; +// CHECK: load atomic i32* +// CHECK: store i64 +#pragma omp atomic read + ulv = fx; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + llv = dx; +// CHECK: load atomic i128* +// CHECK: store i64 +#pragma omp atomic read + ullv = ldx; +// CHECK: call{{.*}} void @__atomic_load(i64 8, +// CHECK: store float +#pragma omp atomic read + fv = cix; +// CHECK: load atomic i16* +// CHECK: store double +#pragma omp atomic read + dv = sx; +// CHECK: load atomic i8* +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bx; +// CHECK: load atomic i8* +// CHECK: store i32 +// CHECK: store i32 +#pragma omp atomic read + civ = bx; +// CHECK: load atomic i16* +// CHECK: store float +// CHECK: store float +#pragma omp atomic read + cfv = usx; +// CHECK: load atomic i64* +// CHECK: store double +// CHECK: store double +#pragma omp atomic read + cdv = llx; +// CHECK: [[I128VAL:%.+]] = load atomic i128* bitcast (<4 x i32>* @{{.+}} to i128*) seq_cst +// CHECK: [[I128PTR:%.+]] = bitcast <4 x i32>* [[LDTEMP:%.+]] to i128* +// CHECK: store i128 [[I128VAL]], i128* [[I128PTR]] +// CHECK: [[LD:%.+]] = load <4 x i32>* [[LDTEMP]] +// CHECK: extractelement <4 x i32> [[LD]] +// CHECK: store i8 +#pragma omp atomic read + bv = int4x[0]; +// CHECK: [[LD:%.+]] = load atomic i32* bitcast (i8* getelementptr (i8* bitcast (%{{.+}}* @{{.+}} to i8*), i64 4) to i32*) seq_cst +// CHECK: store i32 [[LD]], i32* [[LDTEMP:%.+]] +// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]] +// CHECK: [[SHL:%.+]] = shl i32 [[LD]], 1 +// CHECK: ashr i32 [[SHL]], 1 +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bfx.a; +// CHECK: [[LDTEMP_VOID_PTR:%.+]] = bitcast i32* [[LDTEMP:%.+]] to i8* +// CHECK: call void @__atomic_load(i64 4, i8* getelementptr (i8* bitcast (%struct.BitFields_packed* @bfx_packed to i8*), i64 4), i8* [[LDTEMP_VOID_PTR]], i32 5) +// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]] +// CHECK: [[SHL:%.+]] = shl i32 [[LD]], 1 +// CHECK: ashr i32 [[SHL]], 1 +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bfx_packed.a; +// CHECK: [[LD:%.+]] = load atomic i32* getelementptr inbounds (%struct.BitFields2* @bfx2, i32 0, i32 0) seq_cst +// CHECK: store i32 [[LD]], i32* [[LDTEMP:%.+]] +// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]] +// CHECK: ashr i32 [[LD]], 31 +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bfx2.a; +// CHECK: [[LD:%.+]] = load atomic i8* getelementptr (i8* bitcast (%struct.BitFields2_packed* @bfx2_packed to i8*), i64 3) seq_cst +// CHECK: store i8 [[LD]], i8* [[LDTEMP:%.+]] +// CHECK: [[LD:%.+]] = load i8* [[LDTEMP]] +// CHECK: ashr i8 [[LD]], 7 +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bfx2_packed.a; +// CHECK: [[LD:%.+]] = load atomic i32* getelementptr inbounds (%struct.BitFields3* @bfx3, i32 0, i32 0) seq_cst +// CHECK: store i32 [[LD]], i32* [[LDTEMP:%.+]] +// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]] +// CHECK: [[SHL:%.+]] = shl i32 [[LD]], 7 +// CHECK: ashr i32 [[SHL]], 18 +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bfx3.a; +// CHECK: [[LDTEMP_VOID_PTR:%.+]] = bitcast i24* [[LDTEMP:%.+]] to i8* +// CHECK: call void @__atomic_load(i64 3, i8* getelementptr (i8* bitcast (%struct.BitFields3_packed* @bfx3_packed to i8*), i64 1), i8* [[LDTEMP_VOID_PTR]], i32 5) +// CHECK: [[LD:%.+]] = load i24* [[LDTEMP]] +// CHECK: [[SHL:%.+]] = shl i24 [[LD]], 7 +// CHECK: [[ASHR:%.+]] = ashr i24 [[SHL]], 10 +// CHECK: sext i24 [[ASHR]] to i32 +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bfx3_packed.a; +// CHECK: [[LD:%.+]] = load atomic i64* bitcast (%struct.BitFields4* @bfx4 to i64*) seq_cst +// CHECK: store i64 [[LD]], i64* [[LDTEMP:%.+]] +// CHECK: [[LD:%.+]] = load i64* [[LDTEMP]] +// CHECK: [[SHL:%.+]] = shl i64 [[LD]], 47 +// CHECK: [[ASHR:%.+]] = ashr i64 [[SHL]], 63 +// CHECK: trunc i64 [[ASHR]] to i32 +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bfx4.a; +// CHECK: [[LD:%.+]] = load atomic i8* getelementptr inbounds (%struct.BitFields4_packed* @bfx4_packed, i32 0, i32 0, i64 2) seq_cst +// CHECK: store i8 [[LD]], i8* [[LDTEMP:%.+]] +// CHECK: [[LD:%.+]] = load i8* [[LDTEMP]] +// CHECK: [[SHL:%.+]] = shl i8 [[LD]], 7 +// CHECK: [[ASHR:%.+]] = ashr i8 [[SHL]], 7 +// CHECK: sext i8 [[ASHR]] to i32 +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bfx4_packed.a; +// CHECK: [[LD:%.+]] = load atomic i64* bitcast (%struct.BitFields4* @bfx4 to i64*) seq_cst +// CHECK: store i64 [[LD]], i64* [[LDTEMP:%.+]] +// CHECK: [[LD:%.+]] = load i64* [[LDTEMP]] +// CHECK: [[SHL:%.+]] = shl i64 [[LD]], 40 +// CHECK: [[ASHR:%.+]] = ashr i64 [[SHL]], 57 +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bfx4.b; +// CHECK: [[LD:%.+]] = load atomic i8* getelementptr inbounds (%struct.BitFields4_packed* @bfx4_packed, i32 0, i32 0, i64 2) seq_cst +// CHECK: store i8 [[LD]], i8* [[LDTEMP:%.+]] +// CHECK: [[LD:%.+]] = load i8* [[LDTEMP]] +// CHECK: [[ASHR:%.+]] = ashr i8 [[LD]], 1 +// CHECK: sext i8 [[ASHR]] to i64 +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bfx4_packed.b; +// CHECK: [[LD:%.+]] = load atomic i32* bitcast (<2 x float>* @{{.+}} to i32*) seq_cst +// CHECK: [[BITCAST:%.+]] = bitcast <2 x float>* [[LDTEMP:%.+]] to i32* +// CHECK: store i32 [[LD]], i32* [[BITCAST]] +// CHECK: [[LD:%.+]] = load <2 x float>* [[LDTEMP]] +// CHECK: extractelement <2 x float> [[LD]] +// CHECK: store i64 +#pragma omp atomic read + ulv = float2x.x; +// CHECK: call{{.*}} i{{[0-9]+}} @llvm.read_register +// CHECK: call{{.*}} @__kmpc_flush( +// CHECK: store double +#pragma omp atomic read seq_cst + dv = rix; + return 0; +} + +#endif