Index: include/clang/Basic/TargetInfo.h =================================================================== --- include/clang/Basic/TargetInfo.h +++ include/clang/Basic/TargetInfo.h @@ -370,6 +370,14 @@ /// \brief Return the maximum width lock-free atomic operation which can be /// inlined given the supported features of the given target. unsigned getMaxAtomicInlineWidth() const { return MaxAtomicInlineWidth; } + /// \brief Returns true if the given target supports lock-free atomic + /// operations at the specified width and alignment. + virtual bool hasBuiltinAtomic(uint64_t AtomicSizeInBits, + uint64_t AlignmentInBits) const { + return AtomicSizeInBits <= AlignmentInBits && + AtomicSizeInBits <= getMaxAtomicInlineWidth() && + llvm::isPowerOf2_64(AtomicSizeInBits / getTypeWidth(SignedChar)); + } /// \brief Return the maximum vector alignment supported for the given target. unsigned getMaxVectorAlign() const { return MaxVectorAlign; } Index: lib/CodeGen/CGAtomic.cpp =================================================================== --- lib/CodeGen/CGAtomic.cpp +++ lib/CodeGen/CGAtomic.cpp @@ -41,7 +41,10 @@ assert(lvalue.isSimple()); AtomicTy = lvalue.getType(); - ValueTy = AtomicTy->castAs()->getValueType(); + if (auto *ATy = AtomicTy->getAs()) + ValueTy = ATy->getValueType(); + else + ValueTy = AtomicTy; EvaluationKind = CGF.getEvaluationKind(ValueTy); ASTContext &C = CGF.getContext(); @@ -64,9 +67,8 @@ if (lvalue.getAlignment().isZero()) lvalue.setAlignment(AtomicAlign); - UseLibcall = - (AtomicSizeInBits > uint64_t(C.toBits(lvalue.getAlignment())) || - AtomicSizeInBits > C.getTargetInfo().getMaxAtomicInlineWidth()); + UseLibcall = !C.getTargetInfo().hasBuiltinAtomic( + AtomicSizeInBits, C.toBits(lvalue.getAlignment())); } QualType getAtomicType() const { return AtomicTy; } @@ -74,7 +76,7 @@ CharUnits getAtomicAlignment() const { return AtomicAlign; } CharUnits getValueAlignment() const { return ValueAlign; } uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; } - uint64_t getValueSizeInBits() const { return AtomicSizeInBits; } + uint64_t getValueSizeInBits() const { return ValueSizeInBits; } TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; } bool shouldUseLibcall() const { return UseLibcall; } @@ -965,13 +967,12 @@ llvm::Type *resultTy = CGM.getTypes().ConvertTypeForMem(valueType); if (isa(resultTy)) { assert(result->getType() == resultTy); - result = EmitFromMemory(result, valueType); + return RValue::get(EmitFromMemory(result, valueType)); } else if (isa(resultTy)) { - result = Builder.CreateIntToPtr(result, resultTy); - } else { - result = Builder.CreateBitCast(result, resultTy); + return RValue::get(Builder.CreateIntToPtr(result, resultTy)); + } else if (llvm::CastInst::isBitCastable(result->getType(), resultTy)) { + return RValue::get(Builder.CreateBitCast(result, resultTy)); } - return RValue::get(result); } // Create a temporary. This needs to be big enough to hold the Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -94,7 +94,11 @@ // kmp_int32 num_threads); OMPRTL__kmpc_push_num_threads, // Call to void __kmpc_flush(ident_t *loc, ...); - OMPRTL__kmpc_flush + OMPRTL__kmpc_flush, + // Call to void __kmpc_atomic_start(void); + OMPRTL__kmpc_atomic_start, + // Call to void __kmpc_atomic_end(void); + OMPRTL__kmpc_atomic_end }; private: @@ -335,6 +339,12 @@ /// \param Vars List of variables to flush. virtual void EmitOMPFlush(CodeGenFunction &CGF, ArrayRef Vars, SourceLocation Loc); + + /// \brief Emit code for the atomic region. + /// \param AtomicOpGen Generator for the expression/statement that must be + /// executed in atomic region. + virtual void EmitOMPAtomicRegion(CodeGenFunction &CGF, + const std::function &AtomicOpGen); }; } // namespace CodeGen } // namespace clang Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -371,6 +371,20 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); break; } + case OMPRTL__kmpc_atomic_start: { + // Build void __kmpc_atomic_start(void); + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_atomic_start"); + break; + } + case OMPRTL__kmpc_atomic_end: { + // Build void __kmpc_atomic_end(void); + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_atomic_end"); + break; + } } return RTLFn; } @@ -670,3 +684,17 @@ CGOpenMPRuntime::OMPRTL__kmpc_flush); CGF.EmitRuntimeCall(RTLFn, Args); } + +void CGOpenMPRuntime::EmitOMPAtomicRegion( + CodeGenFunction &CGF, const std::function &AtomicOpGen) { + // __kmpc_atomic_start(void); + // AtomicOpGen(); + // __kmpc_atomic_end(void); + auto *RTLFn = CGF.CGM.getOpenMPRuntime().CreateRuntimeFunction( + CGOpenMPRuntime::OMPRTL__kmpc_atomic_start); + CGF.EmitRuntimeCall(RTLFn, llvm::None); + AtomicOpGen(); + RTLFn = CGF.CGM.getOpenMPRuntime().CreateRuntimeFunction( + CGOpenMPRuntime::OMPRTL__kmpc_atomic_end); + CGF.EmitRuntimeCall(RTLFn, llvm::None); +} Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -560,8 +560,127 @@ llvm_unreachable("CodeGen for 'omp ordered' is not supported yet."); } -void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &) { - llvm_unreachable("CodeGen for 'omp atomic' is not supported yet."); +static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, + QualType SrcType, QualType DestType) { + assert(CGF.hasScalarEvaluationKind(DestType) && + "DestType must have scalar evaluation kind."); + assert(!Val.isAggregate() && "Must be a scalar or complex."); + return Val.isScalar() + ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType) + : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, + DestType); +} + +static CodeGenFunction::ComplexPairTy +convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, + QualType DestType) { + assert(CGF.getEvaluationKind(DestType) == TEK_Complex && + "DestType must have complex evaluation kind."); + CodeGenFunction::ComplexPairTy ComplexVal; + if (Val.isScalar()) { + // Convert the input element to the element type of the complex. + auto DestElementType = DestType->castAs()->getElementType(); + auto ScalarVal = + CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType); + ComplexVal = CodeGenFunction::ComplexPairTy( + ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); + } else { + assert(Val.isComplex() && "Must be a scalar or complex."); + auto SrcElementType = SrcType->castAs()->getElementType(); + auto DestElementType = DestType->castAs()->getElementType(); + ComplexVal.first = CGF.EmitScalarConversion( + Val.getComplexVal().first, SrcElementType, DestElementType); + ComplexVal.second = CGF.EmitScalarConversion( + Val.getComplexVal().second, SrcElementType, DestElementType); + } + return ComplexVal; +} + +static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, + bool IsSeqCst, const Expr *X, const Expr *V, + const Expr *, SourceLocation Loc) { + switch (Kind) { + case OMPC_read: { + // v = x; + assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); + assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); + LValue XLValue = CGF.EmitLValue(X); + LValue VLValue = CGF.EmitLValue(V); + RValue Res; + if (XLValue.isSimple()) + // Simple lvalues use target's rule for atomics of the given size + Res = CGF.EmitAtomicLoad(XLValue, X->getExprLoc()); + else + // Bitfields, vector elements, global register lvalues always go through + // the OpenMP locking path. + CGF.CGM.getOpenMPRuntime().EmitOMPAtomicRegion( + CGF, [&]() -> void { Res = CGF.EmitLoadOfLValue(XLValue, Loc); }); + // OpenMP, 2.12.6, atomic Construct + // Any atomic construct with a seq_cst clause forces the atomically + // performed operation to include an implicit flush operation without a + // list. + if (IsSeqCst) + CGF.CGM.getOpenMPRuntime().EmitOMPFlush(CGF, llvm::None, Loc); + switch (CGF.getEvaluationKind(V->getType())) { + case TEK_Scalar: + CGF.EmitStoreOfScalar( + convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue); + break; + case TEK_Complex: + CGF.EmitStoreOfComplex( + convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue, + /*isInit=*/false); + break; + case TEK_Aggregate: + llvm_unreachable("Must be a scalar or complex."); + } + break; + } + case OMPC_write: + case OMPC_update: + case OMPC_capture: + llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet."); + case OMPC_if: + case OMPC_final: + case OMPC_num_threads: + case OMPC_private: + case OMPC_firstprivate: + case OMPC_lastprivate: + case OMPC_reduction: + case OMPC_safelen: + case OMPC_collapse: + case OMPC_default: + case OMPC_seq_cst: + case OMPC_shared: + case OMPC_linear: + case OMPC_aligned: + case OMPC_copyin: + case OMPC_copyprivate: + case OMPC_flush: + case OMPC_proc_bind: + case OMPC_schedule: + case OMPC_ordered: + case OMPC_nowait: + case OMPC_untied: + case OMPC_threadprivate: + case OMPC_mergeable: + case OMPC_unknown: + llvm_unreachable("Clause is not allowed in 'omp atomic'."); + } +} + +void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { + bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst); + OpenMPClauseKind Kind = OMPC_unknown; + for (auto *C : S.clauses()) { + // Find first clause (skip seq_cst clause, if it is first). + if (C->getClauseKind() != OMPC_seq_cst) { + Kind = C->getClauseKind(); + break; + } + } + EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(), + S.getLocStart()); } void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { Index: test/OpenMP/atomic_read_codegen.c =================================================================== --- test/OpenMP/atomic_read_codegen.c +++ test/OpenMP/atomic_read_codegen.c @@ -0,0 +1,215 @@ +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +_Bool bv, bx; +char cv, cx; +unsigned char ucv, ucx; +short sv, sx; +unsigned short usv, usx; +int iv, ix; +unsigned int uiv, uix; +long lv, lx; +unsigned long ulv, ulx; +long long llv, llx; +unsigned long long ullv, ullx; +float fv, fx; +double dv, dx; +long double ldv, ldx; +_Complex int civ, cix; +_Complex float cfv, cfx; +_Complex double cdv, cdx; + +typedef int v4si __attribute__((__vector_size__(16))); +v4si v4six; + +struct BitFields { + int a : 3; +} bfx; + +typedef float float2 __attribute__((ext_vector_type(2))); +float2 float2x; + +register int rix __asm__("0"); + +int main() { +// CHECK: load atomic i8* +// CHECK: store i8 +#pragma omp atomic read + bv = bx; +// CHECK: load atomic i8* +// CHECK: store i8 +#pragma omp atomic read + cv = cx; +// CHECK: load atomic i8* +// CHECK: store i8 +#pragma omp atomic read + ucv = ucx; +// CHECK: load atomic i16* +// CHECK: store i16 +#pragma omp atomic read + sv = sx; +// CHECK: load atomic i16* +// CHECK: store i16 +#pragma omp atomic read + usv = usx; +// CHECK: load atomic i32* +// CHECK: store i32 +#pragma omp atomic read + iv = ix; +// CHECK: load atomic i32* +// CHECK: store i32 +#pragma omp atomic read + uiv = uix; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + lv = lx; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + ulv = ulx; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + llv = llx; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + ullv = ullx; +// CHECK: load atomic i32* +// CHECK: store float +#pragma omp atomic read + fv = fx; +// CHECK: load atomic i64* +// CHECK: store double +#pragma omp atomic read + dv = dx; +// CHECK: load atomic i128* +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = ldx; +// CHECK: call{{.*}} void @__atomic_load(i64 8, +// CHECK: store i32 +// CHECK: store i32 +#pragma omp atomic read + civ = cix; +// CHECK: call{{.*}} void @__atomic_load(i64 8, +// CHECK: store float +// CHECK: store float +#pragma omp atomic read + cfv = cfx; +// CHECK: call{{.*}} void @__atomic_load(i64 16, +// CHECK: call{{.*}} @__kmpc_flush( +// CHECK: store double +// CHECK: store double +#pragma omp atomic seq_cst read + cdv = cdx; +// CHECK: load atomic i64* +// CHECK: store i8 +#pragma omp atomic read + bv = ulx; +// CHECK: load atomic i8* +// CHECK: store i8 +#pragma omp atomic read + cv = bx; +// CHECK: load atomic i8* +// CHECK: call{{.*}} @__kmpc_flush( +// CHECK: store i8 +#pragma omp atomic read, seq_cst + ucv = cx; +// CHECK: load atomic i64* +// CHECK: store i16 +#pragma omp atomic read + sv = ulx; +// CHECK: load atomic i64* +// CHECK: store i16 +#pragma omp atomic read + usv = lx; +// CHECK: load atomic i32* +// CHECK: call{{.*}} @__kmpc_flush( +// CHECK: store i32 +#pragma omp atomic seq_cst, read + iv = uix; +// CHECK: load atomic i32* +// CHECK: store i32 +#pragma omp atomic read + uiv = ix; +// CHECK: call{{.*}} void @__atomic_load(i64 8, +// CHECK: store i64 +#pragma omp atomic read + lv = cix; +// CHECK: load atomic i32* +// CHECK: store i64 +#pragma omp atomic read + ulv = fx; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + llv = dx; +// CHECK: load atomic i128* +// CHECK: store i64 +#pragma omp atomic read + ullv = ldx; +// CHECK: call{{.*}} void @__atomic_load(i64 8, +// CHECK: store float +#pragma omp atomic read + fv = cix; +// CHECK: load atomic i16* +// CHECK: store double +#pragma omp atomic read + dv = sx; +// CHECK: load atomic i8* +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bx; +// CHECK: load atomic i8* +// CHECK: store i32 +// CHECK: store i32 +#pragma omp atomic read + civ = bx; +// CHECK: load atomic i16* +// CHECK: store float +// CHECK: store float +#pragma omp atomic read + cfv = usx; +// CHECK: load atomic i64* +// CHECK: store double +// CHECK: store double +#pragma omp atomic read + cdv = llx; +// CHECK: call{{.*}} void @__kmpc_atomic_start( +// CHECK: load <4 x i32>* +// CHECK: extractelement +// CHECK: call{{.*}} void @__kmpc_atomic_end( +// CHECK: store i8 +#pragma omp atomic read + bv = v4six[0]; +// CHECK: call{{.*}} void @__kmpc_atomic_start( +// CHECK: load i +// CHECK: call{{.*}} void @__kmpc_atomic_end( +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bfx.a; +// CHECK: call{{.*}} void @__kmpc_atomic_start( +// CHECK: load <2 x float>* +// CHECK: extractelement +// CHECK: call{{.*}} void @__kmpc_atomic_end( +// CHECK: store i64 +#pragma omp atomic read + ulv = float2x.x; +// CHECK: call{{.*}} void @__kmpc_atomic_start( +// CHECK: call{{.*}} i{{[0-9]+}} @llvm.read_register +// CHECK: call{{.*}} void @__kmpc_atomic_end( +// CHECK: call{{.*}} @__kmpc_flush( +// CHECK: store double +#pragma omp atomic read seq_cst + dv = rix; + return 0; +} + +#endif