Index: lib/CodeGen/CGAtomic.cpp =================================================================== --- lib/CodeGen/CGAtomic.cpp +++ lib/CodeGen/CGAtomic.cpp @@ -36,36 +36,51 @@ CharUnits LValueAlign; TypeEvaluationKind EvaluationKind; bool UseLibcall; + llvm::Value *Addr; public: - AtomicInfo(CodeGenFunction &CGF, LValue &lvalue) : CGF(CGF) { - assert(lvalue.isSimple()); - - AtomicTy = lvalue.getType(); - ValueTy = AtomicTy->castAs()->getValueType(); - EvaluationKind = CGF.getEvaluationKind(ValueTy); - - ASTContext &C = CGF.getContext(); - - uint64_t ValueAlignInBits; - uint64_t AtomicAlignInBits; - TypeInfo ValueTI = C.getTypeInfo(ValueTy); - ValueSizeInBits = ValueTI.Width; - ValueAlignInBits = ValueTI.Align; - - TypeInfo AtomicTI = C.getTypeInfo(AtomicTy); - AtomicSizeInBits = AtomicTI.Width; - AtomicAlignInBits = AtomicTI.Align; - - assert(ValueSizeInBits <= AtomicSizeInBits); - assert(ValueAlignInBits <= AtomicAlignInBits); - - AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits); - ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits); - if (lvalue.getAlignment().isZero()) - lvalue.setAlignment(AtomicAlign); - - UseLibcall = !C.getTargetInfo().hasBuiltinAtomic( - AtomicSizeInBits, C.toBits(lvalue.getAlignment())); + AtomicInfo(CodeGenFunction &CGF, LValue &lvalue) + : CGF(CGF), AtomicSizeInBits(0), ValueSizeInBits(0), UseLibcall(true), + Addr(nullptr) { + assert(!lvalue.isGlobalReg() && "Global registers are not supported!"); + if (lvalue.isSimple()) { + Addr = lvalue.getAddress(); + AtomicTy = lvalue.getType(); + if (auto *ATy = AtomicTy->getAs()) + ValueTy = ATy->getValueType(); + else + ValueTy = AtomicTy; + EvaluationKind = CGF.getEvaluationKind(ValueTy); + + ASTContext &C = CGF.getContext(); + + uint64_t ValueAlignInBits; + uint64_t AtomicAlignInBits; + TypeInfo ValueTI = C.getTypeInfo(ValueTy); + ValueSizeInBits = ValueTI.Width; + ValueAlignInBits = ValueTI.Align; + + TypeInfo AtomicTI = C.getTypeInfo(AtomicTy); + AtomicSizeInBits = AtomicTI.Width; + AtomicAlignInBits = AtomicTI.Align; + + assert(ValueSizeInBits <= AtomicSizeInBits); + assert(ValueAlignInBits <= AtomicAlignInBits); + + AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits); + ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits); + if (lvalue.getAlignment().isZero()) + lvalue.setAlignment(AtomicAlign); + + UseLibcall = !C.getTargetInfo().hasBuiltinAtomic( + AtomicSizeInBits, C.toBits(lvalue.getAlignment())); + } else if (lvalue.isBitField()) + Addr = lvalue.getBitFieldAddr(); + else if (lvalue.isVectorElt()) + Addr = lvalue.getVectorAddr(); + else { + assert(lvalue.isExtVectorElt()); + Addr = lvalue.getExtVectorAddr(); + } } QualType getAtomicType() const { return AtomicTy; } @@ -76,6 +91,7 @@ uint64_t getValueSizeInBits() const { return ValueSizeInBits; } TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; } bool shouldUseLibcall() const { return UseLibcall; } + llvm::Value *getAddress() const { return Addr; } /// Is the atomic size larger than the underlying value type? /// @@ -90,6 +106,10 @@ bool emitMemSetZeroIfNecessary(LValue dest) const; llvm::Value *getAtomicSizeValue() const { + if (!AtomicSizeInBits) { + auto Ty = Addr->getType()->getPointerElementType(); + return llvm::ConstantExpr::getSizeOf(Ty); + } CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits); return CGF.CGM.getSize(size); } @@ -962,18 +982,24 @@ // Check whether we should use a library call. if (atomics.shouldUseLibcall()) { llvm::Value *tempAddr; - if (!resultSlot.isIgnored()) { - assert(atomics.getEvaluationKind() == TEK_Aggregate); - tempAddr = resultSlot.getAddr(); + if (src.isSimple()) { + if (!resultSlot.isIgnored()) { + assert(atomics.getEvaluationKind() == TEK_Aggregate); + tempAddr = resultSlot.getAddr(); + } else + tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp"); } else { - tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp"); + auto AllocaTy = atomics.getAddress()->getType()->getPointerElementType(); + auto TempAlloca = CreateTempAlloca(AllocaTy, "atomic-load-temp"); + TempAlloca->setAlignment(getContext().toBits(src.getAlignment())); + tempAddr = TempAlloca; } // void __atomic_load(size_t size, void *mem, void *return, int order); CallArgList args; args.add(RValue::get(atomics.getAtomicSizeValue()), getContext().getSizeType()); - args.add(RValue::get(EmitCastToVoidPtr(src.getAddress())), + args.add(RValue::get(EmitCastToVoidPtr(atomics.getAddress())), getContext().VoidPtrTy); args.add(RValue::get(EmitCastToVoidPtr(tempAddr)), getContext().VoidPtrTy); @@ -983,7 +1009,19 @@ emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args); // Produce the r-value. - return atomics.convertTempToRValue(tempAddr, resultSlot, loc); + if (src.isSimple()) + return atomics.convertTempToRValue(tempAddr, resultSlot, loc); + else if (src.isBitField()) + return EmitLoadOfBitfieldLValue(LValue::MakeBitfield( + tempAddr, src.getBitFieldInfo(), src.getType(), src.getAlignment())); + else if (src.isVectorElt()) + return EmitLoadOfLValue( + LValue::MakeVectorElt(tempAddr, src.getVectorIdx(), src.getType(), + src.getAlignment()), + loc); + else if (src.isExtVectorElt()) + return EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt( + tempAddr, src.getExtVectorElts(), src.getType(), src.getAlignment())); } // Okay, we're doing this natively. Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -88,6 +88,10 @@ OMPRTL__kmpc_master, // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); OMPRTL__kmpc_end_master, + // Call to void __kmpc_atomic_start(void); + OMPRTL__kmpc_atomic_start, + // Call to void __kmpc_atomic_end(void); + OMPRTL__kmpc_atomic_end, }; /// \brief Values for bit flags used in the ident_t to describe the fields. @@ -395,6 +399,13 @@ /// \param Vars List of variables to flush. virtual void EmitOMPFlush(CodeGenFunction &CGF, ArrayRef Vars, SourceLocation Loc); + + /// \brief Emits an atomic region. + /// \param AtomicOpGen Generator for the statement associated with the given + /// atomic region. + virtual void EmitOMPAtomicRegion(CodeGenFunction &CGF, + const std::function &AtomicOpGen, + SourceLocation Loc); }; } // namespace CodeGen } // namespace clang Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -475,6 +475,20 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); break; } + case OMPRTL__kmpc_atomic_start: { + // Build void __kmpc_atomic_start(void); + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_atomic_start"); + break; + } + case OMPRTL__kmpc_atomic_end: { + // Build void __kmpc_atomic_end(void); + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_atomic_end"); + break; + } } return RTLFn; } @@ -920,3 +934,32 @@ auto *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_flush); CGF.EmitRuntimeCall(RTLFn, Args); } + +void CGOpenMPRuntime::EmitOMPAtomicRegion( + CodeGenFunction &CGF, const std::function &AtomicOpGen, + SourceLocation) { + // __kmpc_atomic_start(); + // AtomicOpGen(); + // __kmpc_atomic_end(); + // Prepare arguments and build a call to __kmpc_atomic_start(). + auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_atomic_start); + CGF.EmitRuntimeCall(RTLFn, llvm::None); + AtomicOpGen(); + // Build a call to __kmpc_atomic_end(). + // OpenMP [1.2.2 OpenMP Language Terminology] + // For C/C++, an executable statement, possibly compound, with a single + // entry at the top and a single exit at the bottom, or an OpenMP construct. + // * Access to the structured block must not be the result of a branch. + // * The point of exit cannot be a branch out of the structured block. + // * The point of entry must not be a call to setjmp(). + // * longjmp() and throw() must not violate the entry/exit criteria. + // * An expression statement, iteration statement, selection statement, or + // try block is considered to be a structured block if the corresponding + // compound statement obtained by enclosing it in { and } would be a + // structured block. + // It is analyzed in Sema, so we can just call __kmpc_end_master() on + // fallthrough rather than pushing a normal cleanup for it. + RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_atomic_end); + CGF.EmitRuntimeCall(RTLFn, llvm::None); +} + Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -691,8 +691,130 @@ llvm_unreachable("CodeGen for 'omp ordered' is not supported yet."); } -void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &) { - llvm_unreachable("CodeGen for 'omp atomic' is not supported yet."); +static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, + QualType SrcType, QualType DestType) { + assert(CGF.hasScalarEvaluationKind(DestType) && + "DestType must have scalar evaluation kind."); + assert(!Val.isAggregate() && "Must be a scalar or complex."); + return Val.isScalar() + ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType) + : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType, + DestType); +} + +static CodeGenFunction::ComplexPairTy +convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, + QualType DestType) { + assert(CGF.getEvaluationKind(DestType) == TEK_Complex && + "DestType must have complex evaluation kind."); + CodeGenFunction::ComplexPairTy ComplexVal; + if (Val.isScalar()) { + // Convert the input element to the element type of the complex. + auto DestElementType = DestType->castAs()->getElementType(); + auto ScalarVal = + CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType); + ComplexVal = CodeGenFunction::ComplexPairTy( + ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType())); + } else { + assert(Val.isComplex() && "Must be a scalar or complex."); + auto SrcElementType = SrcType->castAs()->getElementType(); + auto DestElementType = DestType->castAs()->getElementType(); + ComplexVal.first = CGF.EmitScalarConversion( + Val.getComplexVal().first, SrcElementType, DestElementType); + ComplexVal.second = CGF.EmitScalarConversion( + Val.getComplexVal().second, SrcElementType, DestElementType); + } + return ComplexVal; +} + +static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, + const Expr *X, const Expr *V, + SourceLocation Loc) { + // v = x; + assert(V->isLValue() && "V of 'omp atomic read' is not lvalue"); + assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); + LValue XLValue = CGF.EmitLValue(X); + LValue VLValue = CGF.EmitLValue(V); + RValue Res; + if (!XLValue.isGlobalReg()) + Res = CGF.EmitAtomicLoad(XLValue, Loc); + else + CGF.CGM.getOpenMPRuntime().EmitOMPAtomicRegion(CGF, [&]() -> void { + Res = CGF.EmitLoadOfGlobalRegLValue(XLValue); + }, Loc); + // OpenMP, 2.12.6, atomic Construct + // Any atomic construct with a seq_cst clause forces the atomically + // performed operation to include an implicit flush operation without a + // list. + if (IsSeqCst) + CGF.CGM.getOpenMPRuntime().EmitOMPFlush(CGF, llvm::None, Loc); + switch (CGF.getEvaluationKind(V->getType())) { + case TEK_Scalar: + CGF.EmitStoreOfScalar( + convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue); + break; + case TEK_Complex: + CGF.EmitStoreOfComplex( + convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue, + /*isInit=*/false); + break; + case TEK_Aggregate: + llvm_unreachable("Must be a scalar or complex."); + } +} + +static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, + bool IsSeqCst, const Expr *X, const Expr *V, + const Expr *, SourceLocation Loc) { + switch (Kind) { + case OMPC_read: + EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); + break; + case OMPC_write: + case OMPC_update: + case OMPC_capture: + llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet."); + case OMPC_if: + case OMPC_final: + case OMPC_num_threads: + case OMPC_private: + case OMPC_firstprivate: + case OMPC_lastprivate: + case OMPC_reduction: + case OMPC_safelen: + case OMPC_collapse: + case OMPC_default: + case OMPC_seq_cst: + case OMPC_shared: + case OMPC_linear: + case OMPC_aligned: + case OMPC_copyin: + case OMPC_copyprivate: + case OMPC_flush: + case OMPC_proc_bind: + case OMPC_schedule: + case OMPC_ordered: + case OMPC_nowait: + case OMPC_untied: + case OMPC_threadprivate: + case OMPC_mergeable: + case OMPC_unknown: + llvm_unreachable("Clause is not allowed in 'omp atomic'."); + } +} + +void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { + bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst); + OpenMPClauseKind Kind = OMPC_unknown; + for (auto *C : S.clauses()) { + // Find first clause (skip seq_cst clause, if it is first). + if (C->getClauseKind() != OMPC_seq_cst) { + Kind = C->getClauseKind(); + break; + } + } + EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(), + S.getLocStart()); } void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { Index: test/OpenMP/atomic_read_codegen.c =================================================================== --- test/OpenMP/atomic_read_codegen.c +++ test/OpenMP/atomic_read_codegen.c @@ -0,0 +1,222 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -x c -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +_Bool bv, bx; +char cv, cx; +unsigned char ucv, ucx; +short sv, sx; +unsigned short usv, usx; +int iv, ix; +unsigned int uiv, uix; +long lv, lx; +unsigned long ulv, ulx; +long long llv, llx; +unsigned long long ullv, ullx; +float fv, fx; +double dv, dx; +long double ldv, ldx; +_Complex int civ, cix; +_Complex float cfv, cfx; +_Complex double cdv, cdx; + +typedef int v4si __attribute__((__vector_size__(16))); +v4si v4six; + +struct BitFields { + float f; + int a : 31; +} bfx; + +typedef float float2 __attribute__((ext_vector_type(2))); +float2 float2x; + +register int rix __asm__("0"); + +int main() { +// CHECK: load atomic i8* +// CHECK: store i8 +#pragma omp atomic read + bv = bx; +// CHECK: load atomic i8* +// CHECK: store i8 +#pragma omp atomic read + cv = cx; +// CHECK: load atomic i8* +// CHECK: store i8 +#pragma omp atomic read + ucv = ucx; +// CHECK: load atomic i16* +// CHECK: store i16 +#pragma omp atomic read + sv = sx; +// CHECK: load atomic i16* +// CHECK: store i16 +#pragma omp atomic read + usv = usx; +// CHECK: load atomic i32* +// CHECK: store i32 +#pragma omp atomic read + iv = ix; +// CHECK: load atomic i32* +// CHECK: store i32 +#pragma omp atomic read + uiv = uix; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + lv = lx; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + ulv = ulx; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + llv = llx; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + ullv = ullx; +// CHECK: load atomic i32* bitcast (float* +// CHECK: bitcast i32 {{.*}} to float +// CHECK: store float +#pragma omp atomic read + fv = fx; +// CHECK: load atomic i64* bitcast (double* +// CHECK: bitcast i64 {{.*}} to double +// CHECK: store double +#pragma omp atomic read + dv = dx; +// CHECK: [[LD:%.+]] = load atomic i128* bitcast (x86_fp80* +// CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i128* +// CHECK: store i128 [[LD]], i128* [[BITCAST]] +// CHECK: [[LD:%.+]] = load x86_fp80* [[LDTEMP]] +// CHECK: store x86_fp80 [[LD]] +#pragma omp atomic read + ldv = ldx; +// CHECK: call{{.*}} void @__atomic_load(i64 8, +// CHECK: store i32 +// CHECK: store i32 +#pragma omp atomic read + civ = cix; +// CHECK: call{{.*}} void @__atomic_load(i64 8, +// CHECK: store float +// CHECK: store float +#pragma omp atomic read + cfv = cfx; +// CHECK: call{{.*}} void @__atomic_load(i64 16, +// CHECK: call{{.*}} @__kmpc_flush( +// CHECK: store double +// CHECK: store double +#pragma omp atomic seq_cst read + cdv = cdx; +// CHECK: load atomic i64* +// CHECK: store i8 +#pragma omp atomic read + bv = ulx; +// CHECK: load atomic i8* +// CHECK: store i8 +#pragma omp atomic read + cv = bx; +// CHECK: load atomic i8* +// CHECK: call{{.*}} @__kmpc_flush( +// CHECK: store i8 +#pragma omp atomic read, seq_cst + ucv = cx; +// CHECK: load atomic i64* +// CHECK: store i16 +#pragma omp atomic read + sv = ulx; +// CHECK: load atomic i64* +// CHECK: store i16 +#pragma omp atomic read + usv = lx; +// CHECK: load atomic i32* +// CHECK: call{{.*}} @__kmpc_flush( +// CHECK: store i32 +#pragma omp atomic seq_cst, read + iv = uix; +// CHECK: load atomic i32* +// CHECK: store i32 +#pragma omp atomic read + uiv = ix; +// CHECK: call{{.*}} void @__atomic_load(i64 8, +// CHECK: store i64 +#pragma omp atomic read + lv = cix; +// CHECK: load atomic i32* +// CHECK: store i64 +#pragma omp atomic read + ulv = fx; +// CHECK: load atomic i64* +// CHECK: store i64 +#pragma omp atomic read + llv = dx; +// CHECK: load atomic i128* +// CHECK: store i64 +#pragma omp atomic read + ullv = ldx; +// CHECK: call{{.*}} void @__atomic_load(i64 8, +// CHECK: store float +#pragma omp atomic read + fv = cix; +// CHECK: load atomic i16* +// CHECK: store double +#pragma omp atomic read + dv = sx; +// CHECK: load atomic i8* +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bx; +// CHECK: load atomic i8* +// CHECK: store i32 +// CHECK: store i32 +#pragma omp atomic read + civ = bx; +// CHECK: load atomic i16* +// CHECK: store float +// CHECK: store float +#pragma omp atomic read + cfv = usx; +// CHECK: load atomic i64* +// CHECK: store double +// CHECK: store double +#pragma omp atomic read + cdv = llx; +// CHECK: [[BITCAST:%.+]] = bitcast <4 x i32>* [[LDTEMP:%.+]] to i8* +// CHECK: call{{.*}} void @__atomic_load(i64 ptrtoint (<4 x i32>* getelementptr (<4 x i32>* null, i32 1) to i64), i8* bitcast (<4 x i32>* {{@.+}} to i8*), i8* [[BITCAST]], i32 5) +// CHECK: [[LD:%.+]] = load <4 x i32>* [[LDTEMP]] +// CHECK: extractelement <4 x i32> [[LD]] +// CHECK: store i8 +#pragma omp atomic read + bv = v4six[0]; +// CHECK: [[BITCAST:%.+]] = bitcast i32* [[LDTEMP:%.+]] to i8* +// CHECK: call{{.*}} void @__atomic_load(i64 ptrtoint (i32* getelementptr (i32* null, i32 1) to i64), i8* bitcast (i32* {{.+}} to i8*), i8* [[BITCAST]], i32 5) +// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]] +// CHECK: shl i32 [[LD]] +// CHECK: store x86_fp80 +#pragma omp atomic read + ldv = bfx.a; +// CHECK: [[BITCAST:%.+]] = bitcast <2 x float>* [[LDTEMP:%.+]] to i8* +// CHECK: call{{.*}} void @__atomic_load(i64 ptrtoint (<2 x float>* getelementptr (<2 x float>* null, i32 1) to i64), i8* bitcast (<2 x float>* {{@.+}} to i8*), i8* [[BITCAST]], i32 5) +// CHECK: [[LD:%.+]] = load <2 x float>* [[LDTEMP]] +// CHECK: extractelement <2 x float> [[LD]] +// CHECK: store i64 +#pragma omp atomic read + ulv = float2x.x; +// CHECK: call{{.*}} void @__kmpc_atomic_start( +// CHECK: call{{.*}} i{{[0-9]+}} @llvm.read_register +// CHECK: call{{.*}} void @__kmpc_atomic_end( +// CHECK: call{{.*}} @__kmpc_flush( +// CHECK: store double +#pragma omp atomic read seq_cst + dv = rix; + return 0; +} + +#endif