Index: clang/lib/CodeGen/CGDeclCXX.cpp =================================================================== --- clang/lib/CodeGen/CGDeclCXX.cpp +++ clang/lib/CodeGen/CGDeclCXX.cpp @@ -153,7 +153,7 @@ Addr, CGF.getContext().getTypeSizeInChars(D.getType())); } -void CodeGenFunction::EmitInvariantStart(llvm::Constant *Addr, CharUnits Size) { +void CodeGenFunction::EmitInvariantStart(llvm::Value *Addr, CharUnits Size) { // Do not emit the intrinsic if we're not optimizing. if (!CGM.getCodeGenOpts().OptimizationLevel) return; @@ -161,13 +161,18 @@ // Grab the llvm.invariant.start intrinsic. llvm::Intrinsic::ID InvStartID = llvm::Intrinsic::invariant_start; // Overloaded address space type. - llvm::Type *ObjectPtr[1] = {Int8PtrTy}; + llvm::Type *ObjectPtr[1] = { + Int8Ty->getPointerTo(Addr->getType()->getPointerAddressSpace())}; llvm::Function *InvariantStart = CGM.getIntrinsic(InvStartID, ObjectPtr); // Emit a call with the size in bytes of the object. uint64_t Width = Size.getQuantity(); - llvm::Value *Args[2] = { llvm::ConstantInt::getSigned(Int64Ty, Width), - llvm::ConstantExpr::getBitCast(Addr, Int8PtrTy)}; + llvm::Value *Cast; + if (llvm::Constant *C = dyn_cast(Addr)) + Cast = llvm::ConstantExpr::getBitCast(C, ObjectPtr[0]); + else + Cast = Builder.CreateBitCast(Addr, ObjectPtr[0]); + llvm::Value *Args[2] = {llvm::ConstantInt::getSigned(Int64Ty, Width), Cast}; Builder.CreateCall(InvariantStart, Args); } Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -1252,62 +1252,88 @@ /// LValue CodeGenFunction::EmitLValue(const Expr *E) { ApplyDebugLocation DL(*this, E); + LValue Ret; switch (E->getStmtClass()) { - default: return EmitUnsupportedLValue(E, "l-value expression"); + default: + Ret = EmitUnsupportedLValue(E, "l-value expression"); + break; case Expr::ObjCPropertyRefExprClass: llvm_unreachable("cannot emit a property reference directly"); + break; case Expr::ObjCSelectorExprClass: - return EmitObjCSelectorLValue(cast(E)); + Ret = EmitObjCSelectorLValue(cast(E)); + break; case Expr::ObjCIsaExprClass: - return EmitObjCIsaExpr(cast(E)); + Ret = EmitObjCIsaExpr(cast(E)); + break; case Expr::BinaryOperatorClass: - return EmitBinaryOperatorLValue(cast(E)); + Ret = EmitBinaryOperatorLValue(cast(E)); + break; case Expr::CompoundAssignOperatorClass: { QualType Ty = E->getType(); if (const AtomicType *AT = Ty->getAs()) Ty = AT->getValueType(); if (!Ty->isAnyComplexType()) - return EmitCompoundAssignmentLValue(cast(E)); - return EmitComplexCompoundAssignmentLValue(cast(E)); + Ret = EmitCompoundAssignmentLValue(cast(E)); + else + Ret = + EmitComplexCompoundAssignmentLValue(cast(E)); + break; } case Expr::CallExprClass: case Expr::CXXMemberCallExprClass: case Expr::CXXOperatorCallExprClass: case Expr::UserDefinedLiteralClass: - return EmitCallExprLValue(cast(E)); + Ret = EmitCallExprLValue(cast(E)); + break; case Expr::CXXRewrittenBinaryOperatorClass: - return EmitLValue(cast(E)->getSemanticForm()); + Ret = EmitLValue(cast(E)->getSemanticForm()); + break; case Expr::VAArgExprClass: - return EmitVAArgExprLValue(cast(E)); + Ret = EmitVAArgExprLValue(cast(E)); + break; case Expr::DeclRefExprClass: - return EmitDeclRefLValue(cast(E)); + Ret = EmitDeclRefLValue(cast(E)); + break; case Expr::ConstantExprClass: - return EmitLValue(cast(E)->getSubExpr()); + Ret = EmitLValue(cast(E)->getSubExpr()); + break; case Expr::ParenExprClass: - return EmitLValue(cast(E)->getSubExpr()); + Ret = EmitLValue(cast(E)->getSubExpr()); + break; case Expr::GenericSelectionExprClass: - return EmitLValue(cast(E)->getResultExpr()); + Ret = EmitLValue(cast(E)->getResultExpr()); + break; case Expr::PredefinedExprClass: - return EmitPredefinedLValue(cast(E)); + Ret = EmitPredefinedLValue(cast(E)); + break; case Expr::StringLiteralClass: - return EmitStringLiteralLValue(cast(E)); + Ret = EmitStringLiteralLValue(cast(E)); + break; case Expr::ObjCEncodeExprClass: - return EmitObjCEncodeExprLValue(cast(E)); + Ret = EmitObjCEncodeExprLValue(cast(E)); + break; case Expr::PseudoObjectExprClass: - return EmitPseudoObjectLValue(cast(E)); + Ret = EmitPseudoObjectLValue(cast(E)); + break; case Expr::InitListExprClass: - return EmitInitListLValue(cast(E)); + Ret = EmitInitListLValue(cast(E)); + break; case Expr::CXXTemporaryObjectExprClass: case Expr::CXXConstructExprClass: - return EmitCXXConstructLValue(cast(E)); + Ret = EmitCXXConstructLValue(cast(E)); + break; case Expr::CXXBindTemporaryExprClass: - return EmitCXXBindTemporaryLValue(cast(E)); + Ret = EmitCXXBindTemporaryLValue(cast(E)); + break; case Expr::CXXUuidofExprClass: - return EmitCXXUuidofLValue(cast(E)); + Ret = EmitCXXUuidofLValue(cast(E)); + break; case Expr::LambdaExprClass: - return EmitAggExprToLValue(E); + Ret = EmitAggExprToLValue(E); + break; case Expr::ExprWithCleanupsClass: { const auto *cleanups = cast(E); @@ -1319,55 +1345,74 @@ // cleanups. llvm::Value *V = LV.getPointer(*this); Scope.ForceCleanup({&V}); - return LValue::MakeAddr(Address(V, LV.getAlignment()), LV.getType(), - getContext(), LV.getBaseInfo(), LV.getTBAAInfo()); + Ret = LValue::MakeAddr(Address(V, LV.getAlignment()), LV.getType(), + getContext(), LV.getBaseInfo(), LV.getTBAAInfo()); } // FIXME: Is it possible to create an ExprWithCleanups that produces a // bitfield lvalue or some other non-simple lvalue? - return LV; + else + Ret = LV; + break; } case Expr::CXXDefaultArgExprClass: { auto *DAE = cast(E); CXXDefaultArgExprScope Scope(*this, DAE); - return EmitLValue(DAE->getExpr()); + Ret = EmitLValue(DAE->getExpr()); + break; } case Expr::CXXDefaultInitExprClass: { auto *DIE = cast(E); CXXDefaultInitExprScope Scope(*this, DIE); - return EmitLValue(DIE->getExpr()); + Ret = EmitLValue(DIE->getExpr()); + break; } case Expr::CXXTypeidExprClass: - return EmitCXXTypeidLValue(cast(E)); + Ret = EmitCXXTypeidLValue(cast(E)); + break; case Expr::ObjCMessageExprClass: - return EmitObjCMessageExprLValue(cast(E)); + Ret = EmitObjCMessageExprLValue(cast(E)); + break; case Expr::ObjCIvarRefExprClass: - return EmitObjCIvarRefLValue(cast(E)); + Ret = EmitObjCIvarRefLValue(cast(E)); + break; case Expr::StmtExprClass: - return EmitStmtExprLValue(cast(E)); + Ret = EmitStmtExprLValue(cast(E)); + break; case Expr::UnaryOperatorClass: - return EmitUnaryOpLValue(cast(E)); + Ret = EmitUnaryOpLValue(cast(E)); + break; case Expr::ArraySubscriptExprClass: - return EmitArraySubscriptExpr(cast(E)); + Ret = EmitArraySubscriptExpr(cast(E)); + break; case Expr::OMPArraySectionExprClass: - return EmitOMPArraySectionExpr(cast(E)); + Ret = EmitOMPArraySectionExpr(cast(E)); + break; case Expr::ExtVectorElementExprClass: - return EmitExtVectorElementExpr(cast(E)); + Ret = EmitExtVectorElementExpr(cast(E)); + break; case Expr::MemberExprClass: - return EmitMemberExpr(cast(E)); + Ret = EmitMemberExpr(cast(E)); + break; case Expr::CompoundLiteralExprClass: - return EmitCompoundLiteralLValue(cast(E)); + Ret = EmitCompoundLiteralLValue(cast(E)); + break; case Expr::ConditionalOperatorClass: - return EmitConditionalOperatorLValue(cast(E)); + Ret = EmitConditionalOperatorLValue(cast(E)); + break; case Expr::BinaryConditionalOperatorClass: - return EmitConditionalOperatorLValue(cast(E)); + Ret = EmitConditionalOperatorLValue(cast(E)); + break; case Expr::ChooseExprClass: - return EmitLValue(cast(E)->getChosenSubExpr()); + Ret = EmitLValue(cast(E)->getChosenSubExpr()); + break; case Expr::OpaqueValueExprClass: - return EmitOpaqueValueLValue(cast(E)); + Ret = EmitOpaqueValueLValue(cast(E)); + break; case Expr::SubstNonTypeTemplateParmExprClass: - return EmitLValue(cast(E)->getReplacement()); + Ret = EmitLValue(cast(E)->getReplacement()); + break; case Expr::ImplicitCastExprClass: case Expr::CStyleCastExprClass: case Expr::CXXFunctionalCastExprClass: @@ -1376,16 +1421,30 @@ case Expr::CXXReinterpretCastExprClass: case Expr::CXXConstCastExprClass: case Expr::ObjCBridgedCastExprClass: - return EmitCastLValue(cast(E)); + Ret = EmitCastLValue(cast(E)); + break; case Expr::MaterializeTemporaryExprClass: - return EmitMaterializeTemporaryExpr(cast(E)); + Ret = EmitMaterializeTemporaryExpr(cast(E)); + break; case Expr::CoawaitExprClass: - return EmitCoawaitLValue(cast(E)); + Ret = EmitCoawaitLValue(cast(E)); + break; case Expr::CoyieldExprClass: - return EmitCoyieldLValue(cast(E)); + Ret = EmitCoyieldLValue(cast(E)); + break; } + + // Mark a pointer to OpenCL constant memory as invariant. + // ToDo: Currently we only handle simple l-value. We should also handle other + // l-values. + if (Ret.getAddressSpace() == LangAS::opencl_constant && Ret.isSimple()) { + EmitInvariantStart(Ret.getPointer(*this), + getContext().getTypeSizeInChars(Ret.getType())); + } + + return Ret; } /// Given an object of the given canonical type, can we safely copy a @@ -4159,6 +4218,7 @@ } unsigned RecordCVR = base.getVRQualifiers(); + auto RecordAddrSpace = base.getAddressSpace(); if (rec->isUnion()) { // For unions, there is no pointer adjustment. if (CGM.getCodeGenOpts().StrictVTablePointers && @@ -4217,6 +4277,7 @@ LValue LV = MakeAddrLValue(addr, FieldType, FieldBaseInfo, FieldTBAAInfo); LV.getQuals().addCVRQualifiers(RecordCVR); + LV.getQuals().setAddressSpace(RecordAddrSpace); // __weak attribute on a field is ignored. if (LV.getQuals().getObjCGCAttr() == Qualifiers::Weak) Index: clang/lib/CodeGen/CGExprScalar.cpp =================================================================== --- clang/lib/CodeGen/CGExprScalar.cpp +++ clang/lib/CodeGen/CGExprScalar.cpp @@ -262,7 +262,17 @@ const BinOpInfo &Info); Value *EmitLoadOfLValue(LValue LV, SourceLocation Loc) { - return CGF.EmitLoadOfLValue(LV, Loc).getScalarVal(); + auto *V = CGF.EmitLoadOfLValue(LV, Loc).getScalarVal(); + // Mark a pointer to OpenCL constant address space as invariant. + auto QT = LV.getType(); + if (QT->isPointerType()) { + auto PointeeTy = QT->getPointeeType(); + if (PointeeTy.getAddressSpace() == LangAS::opencl_constant) { + CGF.EmitInvariantStart(V, + CGF.getContext().getTypeSizeInChars(PointeeTy)); + } + } + return V; } void EmitLValueAlignmentAssumption(const Expr *E, Value *V) { Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -4070,7 +4070,7 @@ llvm::GlobalVariable *GV); // Emit an @llvm.invariant.start call for the given memory region. - void EmitInvariantStart(llvm::Constant *Addr, CharUnits Size); + void EmitInvariantStart(llvm::Value *Addr, CharUnits Size); /// EmitCXXGlobalVarDeclInit - Create the initializer for a C++ /// variable with global storage. Index: clang/test/CodeGenOpenCL/invariant.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/invariant.cl @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O3 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple spir -O3 -emit-llvm -o - %s | FileCheck %s + +typedef struct { + int a; + char b; +} X; + +constant X x = {0, 'a'}; + +constant char* constant p = &(x.b); + +constant X* foo(); + +// CHECK-LABEL: test1 +// CHECK: llvm.invariant.start +char test1() { + return x.b; +} + +// CHECK-LABEL: test2 +// CHECK: llvm.invariant.start +char test2() { + return *p; +} + +// CHECK-LABEL: test3 +// CHECK: llvm.invariant.start +char test3(constant X *x) { + constant char *p = &(x->b); + return *p; +} + +// CHECK-LABEL: test4 +// CHECK: llvm.invariant.start +char test4() { + return foo()->b; +} Index: clang/test/CodeGenOpenCL/printf.cl =================================================================== --- clang/test/CodeGenOpenCL/printf.cl +++ clang/test/CodeGenOpenCL/printf.cl @@ -12,25 +12,25 @@ // ALL-LABEL: @test_printf_float2( -// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0) +// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> -// NOFP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> %0) +// NOFP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(2)* @.str, i32 0, i32 0), <2 x float> kernel void test_printf_float2(float2 arg) { printf("%v2hlf", arg); } // ALL-LABEL: @test_printf_half2( -// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.1, i32 0, i32 0), <2 x half> %0) +// FP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.1, i32 0, i32 0), <2 x half> -// NOFP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.1, i32 0, i32 0), <2 x half> %0) +// NOFP64: %call = call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.1, i32 0, i32 0), <2 x half> kernel void test_printf_half2(half2 arg) { printf("%v2hf", arg); } #ifdef cl_khr_fp64 // FP64-LABEL: @test_printf_double2( -// FP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.2, i32 0, i32 0), <2 x double> %0) +// FP64: call spir_func i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str.2, i32 0, i32 0), <2 x double> kernel void test_printf_double2(double2 arg) { printf("%v2lf", arg); }