Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -1129,15 +1129,14 @@ B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator()); } - Value *P = Alloc; - Type *PTy = Fsincos.getFunctionType()->getParamType(1); + Type *CosPtrTy = Fsincos.getFunctionType()->getParamType(1); + // The allocaInst allocates the memory in private address space. This need - // to be bitcasted to point to the address space of cos pointer type. + // to be addrspacecasted to point to the address space of cos pointer type. // In OpenCL 2.0 this is generic, while in 1.2 that is private. - if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) - P = B.CreateAddrSpaceCast(Alloc, PTy); + Value *CastAlloc = B.CreateAddrSpaceCast(Alloc, CosPtrTy); - CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, P); + CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, CastAlloc); LoadInst *LoadCos = B.CreateLoad(Alloc->getAllocatedType(), Alloc); return {SinCos, LoadCos, SinCos}; } @@ -1180,17 +1179,21 @@ Function *F = B.GetInsertBlock()->getParent(); Module *M = F->getParent(); - // Merge the sin and cos. + // Merge the sin and cos. For OpenCL 2.0, there may only be a generic pointer + // implementation. Prefer the private form if available. + AMDGPULibFunc SinCosLibFuncPrivate(AMDGPULibFunc::EI_SINCOS, fInfo); + SinCosLibFuncPrivate.getLeads()[0].PtrKind = + AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::PRIVATE_ADDRESS); - // for OpenCL 2.0 we have only generic implementation of sincos - // function. - // FIXME: This is not true anymore - AMDGPULibFunc SinCosLibFunc(AMDGPULibFunc::EI_SINCOS, fInfo); - SinCosLibFunc.getLeads()[0].PtrKind = + AMDGPULibFunc SinCosLibFuncGeneric(AMDGPULibFunc::EI_SINCOS, fInfo); + SinCosLibFuncGeneric.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS); - FunctionCallee FSinCos = getFunction(M, SinCosLibFunc); + + FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate); + FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric); + FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric; if (!FSinCos) - return false; + return Changed; SmallVector SinCalls; SmallVector CosCalls; @@ -1201,7 +1204,8 @@ StringRef SinName = isSin ? CI->getCalledFunction()->getName() : PairName; StringRef CosName = isSin ? PairName : CI->getCalledFunction()->getName(); - const std::string SinCosName = SinCosLibFunc.mangle(); + const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle(); + const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle(); // Intersect the two sets of flags. FastMathFlags FMF = FPOp->getFastMathFlags(); @@ -1224,7 +1228,8 @@ } else if (UCallee->getName() == CosName) { Handled = true; CosCalls.push_back(XI); - } else if (UCallee->getName() == SinCosName) { + } else if (UCallee->getName() == SinCosPrivateName || + UCallee->getName() == SinCosGenericName) { Handled = true; SinCosCalls.push_back(XI); } Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll @@ -106,12 +106,11 @@ ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]) -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -127,12 +126,11 @@ ; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float 4.200000e+01, ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float 4.200000e+01, ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float 4.200000e+01) -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -157,12 +155,11 @@ ; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x float> @_Z6sincosDv2_fPU3AS0S_(<2 x float> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 -; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = call contract <2 x float> @_Z6sincosDv2_fPU3AS5S_(<2 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 8 ; CHECK-NEXT: [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> [[X]]) -; CHECK-NEXT: store <2 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll @@ -223,11 +223,10 @@ ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -244,11 +243,10 @@ ; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @_Z6sincosDv2_fPU3AS0S_(<2 x float> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 -; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 -; CHECK-NEXT: store <2 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = call <2 x float> @_Z6sincosDv2_fPU3AS5S_(<2 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -265,11 +263,10 @@ ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -285,11 +282,10 @@ ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -305,11 +301,10 @@ ; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x float> @_Z6sincosDv2_fPU3AS0S_(<2 x float> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 -; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 -; CHECK-NEXT: store <2 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = call contract <2 x float> @_Z6sincosDv2_fPU3AS5S_(<2 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -325,12 +320,11 @@ ; CHECK-SAME: (<3 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <3 x float>, align 16, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract <3 x float> @_Z6sincosDv3_fPU3AS0S_(<3 x float> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr addrspace(5) [[__SINCOS_]], align 16 -; CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = call contract <3 x float> @_Z6sincosDv3_fPU3AS5S_(<3 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <3 x float>, ptr addrspace(5) [[__SINCOS_]], align 16 +; CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <3 x float> [[TMP0]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: store <4 x float> [[EXTRACTVEC2]], ptr addrspace(1) [[SIN_OUT]], align 16 -; CHECK-NEXT: [[EXTRACTVEC6:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[EXTRACTVEC6:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: store <4 x float> [[EXTRACTVEC6]], ptr addrspace(1) [[COS_OUT]], align 16 ; CHECK-NEXT: ret void ; @@ -349,11 +343,10 @@ ; CHECK-SAME: (<4 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <4 x float>, align 16, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract <4 x float> @_Z6sincosDv4_fPU3AS0S_(<4 x float> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr addrspace(5) [[__SINCOS_]], align 16 -; CHECK-NEXT: store <4 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 16 -; CHECK-NEXT: store <4 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = call contract <4 x float> @_Z6sincosDv4_fPU3AS5S_(<4 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr addrspace(5) [[__SINCOS_]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 16 ; CHECK-NEXT: ret void ; entry: @@ -369,11 +362,10 @@ ; CHECK-SAME: (<8 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <8 x float>, align 32, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract <8 x float> @_Z6sincosDv8_fPU3AS0S_(<8 x float> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, ptr addrspace(5) [[__SINCOS_]], align 32 -; CHECK-NEXT: store <8 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 32 -; CHECK-NEXT: store <8 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 32 +; CHECK-NEXT: [[TMP0:%.*]] = call contract <8 x float> @_Z6sincosDv8_fPU3AS5S_(<8 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr addrspace(5) [[__SINCOS_]], align 32 +; CHECK-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 32 +; CHECK-NEXT: store <8 x float> [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 32 ; CHECK-NEXT: ret void ; entry: @@ -389,11 +381,10 @@ ; CHECK-SAME: (<16 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <16 x float>, align 64, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract <16 x float> @_Z6sincosDv16_fPU3AS0S_(<16 x float> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x float>, ptr addrspace(5) [[__SINCOS_]], align 64 -; CHECK-NEXT: store <16 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 64 -; CHECK-NEXT: store <16 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 64 +; CHECK-NEXT: [[TMP0:%.*]] = call contract <16 x float> @_Z6sincosDv16_fPU3AS5S_(<16 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr addrspace(5) [[__SINCOS_]], align 64 +; CHECK-NEXT: store <16 x float> [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 64 +; CHECK-NEXT: store <16 x float> [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 64 ; CHECK-NEXT: ret void ; entry: @@ -409,11 +400,10 @@ ; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca double, align 8, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call double @_Z6sincosdPU3AS0d(double [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(5) [[__SINCOS_]], align 8 -; CHECK-NEXT: store double [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 -; CHECK-NEXT: store double [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = call double @_Z6sincosdPU3AS5d(double [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store double [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: store double [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -430,11 +420,10 @@ ; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x double>, align 16, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @_Z6sincosDv2_dPU3AS0S_(<2 x double> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr addrspace(5) [[__SINCOS_]], align 16 -; CHECK-NEXT: store <2 x double> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 16 -; CHECK-NEXT: store <2 x double> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @_Z6sincosDv2_dPU3AS5S_(<2 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr addrspace(5) [[__SINCOS_]], align 16 +; CHECK-NEXT: store <2 x double> [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 16 +; CHECK-NEXT: store <2 x double> [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 16 ; CHECK-NEXT: ret void ; entry: @@ -450,11 +439,10 @@ ; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca double, align 8, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract double @_Z6sincosdPU3AS0d(double [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(5) [[__SINCOS_]], align 8 -; CHECK-NEXT: store double [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 -; CHECK-NEXT: store double [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = call contract double @_Z6sincosdPU3AS5d(double [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store double [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: store double [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -470,11 +458,10 @@ ; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca double, align 8, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract double @_Z6sincosdPU3AS0d(double [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(5) [[__SINCOS_]], align 8 -; CHECK-NEXT: store double [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 -; CHECK-NEXT: store double [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = call contract double @_Z6sincosdPU3AS5d(double [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store double [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: store double [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -490,11 +477,10 @@ ; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x double>, align 16, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @_Z6sincosDv2_dPU3AS0S_(<2 x double> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr addrspace(5) [[__SINCOS_]], align 16 -; CHECK-NEXT: store <2 x double> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 16 -; CHECK-NEXT: store <2 x double> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = call contract <2 x double> @_Z6sincosDv2_dPU3AS5S_(<2 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr addrspace(5) [[__SINCOS_]], align 16 +; CHECK-NEXT: store <2 x double> [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 16 +; CHECK-NEXT: store <2 x double> [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 16 ; CHECK-NEXT: ret void ; entry: @@ -510,12 +496,11 @@ ; CHECK-SAME: (<3 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <3 x double>, align 32, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract <3 x double> @_Z6sincosDv3_dPU3AS0S_(<3 x double> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <3 x double>, ptr addrspace(5) [[__SINCOS_]], align 32 -; CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <3 x double> [[TMP1]], <3 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = call contract <3 x double> @_Z6sincosDv3_dPU3AS5S_(<3 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <3 x double>, ptr addrspace(5) [[__SINCOS_]], align 32 +; CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <3 x double> [[TMP0]], <3 x double> poison, <4 x i32> ; CHECK-NEXT: store <4 x double> [[EXTRACTVEC2]], ptr addrspace(1) [[SIN_OUT]], align 32 -; CHECK-NEXT: [[EXTRACTVEC6:%.*]] = shufflevector <3 x double> [[TMP2]], <3 x double> poison, <4 x i32> +; CHECK-NEXT: [[EXTRACTVEC6:%.*]] = shufflevector <3 x double> [[TMP1]], <3 x double> poison, <4 x i32> ; CHECK-NEXT: store <4 x double> [[EXTRACTVEC6]], ptr addrspace(1) [[COS_OUT]], align 32 ; CHECK-NEXT: ret void ; @@ -534,11 +519,10 @@ ; CHECK-SAME: (<4 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <4 x double>, align 32, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract <4 x double> @_Z6sincosDv4_dPU3AS0S_(<4 x double> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, ptr addrspace(5) [[__SINCOS_]], align 32 -; CHECK-NEXT: store <4 x double> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 32 -; CHECK-NEXT: store <4 x double> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 32 +; CHECK-NEXT: [[TMP0:%.*]] = call contract <4 x double> @_Z6sincosDv4_dPU3AS5S_(<4 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr addrspace(5) [[__SINCOS_]], align 32 +; CHECK-NEXT: store <4 x double> [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 32 +; CHECK-NEXT: store <4 x double> [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 32 ; CHECK-NEXT: ret void ; entry: @@ -554,11 +538,10 @@ ; CHECK-SAME: (<8 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <8 x double>, align 64, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract <8 x double> @_Z6sincosDv8_dPU3AS0S_(<8 x double> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x double>, ptr addrspace(5) [[__SINCOS_]], align 64 -; CHECK-NEXT: store <8 x double> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 64 -; CHECK-NEXT: store <8 x double> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 64 +; CHECK-NEXT: [[TMP0:%.*]] = call contract <8 x double> @_Z6sincosDv8_dPU3AS5S_(<8 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, ptr addrspace(5) [[__SINCOS_]], align 64 +; CHECK-NEXT: store <8 x double> [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 64 +; CHECK-NEXT: store <8 x double> [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 64 ; CHECK-NEXT: ret void ; entry: @@ -574,11 +557,10 @@ ; CHECK-SAME: (<16 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <16 x double>, align 128, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract <16 x double> @_Z6sincosDv16_dPU3AS0S_(<16 x double> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <16 x double>, ptr addrspace(5) [[__SINCOS_]], align 128 -; CHECK-NEXT: store <16 x double> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 128 -; CHECK-NEXT: store <16 x double> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 128 +; CHECK-NEXT: [[TMP0:%.*]] = call contract <16 x double> @_Z6sincosDv16_dPU3AS5S_(<16 x double> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x double>, ptr addrspace(5) [[__SINCOS_]], align 128 +; CHECK-NEXT: store <16 x double> [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 128 +; CHECK-NEXT: store <16 x double> [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 128 ; CHECK-NEXT: ret void ; entry: @@ -596,17 +578,16 @@ ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) ; CHECK-NEXT: br i1 [[COND]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: bb0: -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: br label [[COMMON_RET:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: br label [[COMMON_RET]] ; entry: @@ -629,16 +610,15 @@ ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[OTHER:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: br i1 [[COND]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: ; CHECK-NEXT: store i32 0, ptr addrspace(1) [[OTHER]], align 4 ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb1: -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -662,11 +642,10 @@ ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr addrspace(1) nocapture readnone [[OTHER:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], float [[TMP2]], float 0.000000e+00 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], float [[TMP1]], float 0.000000e+00 ; CHECK-NEXT: store float [[SPEC_SELECT]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; @@ -690,10 +669,9 @@ ; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[TMP1]], float [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[TMP0]], float [[TMP1]] ; CHECK-NEXT: ret float [[SELECT]] ; entry: @@ -712,11 +690,10 @@ ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: tail call void @func(ptr addrspace(1) [[VALUE_PTR]]) ; CHECK-NEXT: [[X:%.*]] = load float, ptr addrspace(1) [[VALUE_PTR]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -734,11 +711,10 @@ ; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float bitcast (i32 ptrtoint (ptr @func to i32) to float), ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float bitcast (i32 ptrtoint (ptr @func to i32) to float), ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -772,11 +748,10 @@ ; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float 4.200000e+01, ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float 4.200000e+01, ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -828,11 +803,10 @@ ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -848,11 +822,10 @@ ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call nnan contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call nnan contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -868,11 +841,10 @@ ; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call nnan contract <2 x float> @_Z6sincosDv2_fPU3AS0S_(<2 x float> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 -; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 -; CHECK-NEXT: store <2 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = call nnan contract <2 x float> @_Z6sincosDv2_fPU3AS5S_(<2 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -892,11 +864,10 @@ ; CHECK-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(5) [[ALLOCA0]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(5) [[ALLOCA1]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(5) [[ALLOCA0]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(5) [[ALLOCA1]], align 4 ; CHECK-NEXT: call void @use_stack_ptrs(ptr addrspace(5) [[ALLOCA0]], ptr addrspace(5) [[ALLOCA1]]) ; CHECK-NEXT: ret void ; @@ -916,9 +887,8 @@ ; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: ret float [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: ret float [[TMP0]] ; entry: %alloca0 = alloca i32, addrspace(5) @@ -933,10 +903,9 @@ ; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: ret float [[TMP1]] ; entry: %alloca0 = alloca i32, addrspace(5) @@ -952,16 +921,14 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[__SINCOS_3:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_3]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP3:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP2]]) -; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store volatile float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store volatile float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store volatile float [[TMP4]], ptr addrspace(1) [[COS_OUT]], align 4 -; CHECK-NEXT: store volatile float [[TMP4]], ptr addrspace(1) [[COS_OUT]], align 4 -; CHECK-NEXT: store volatile float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_3]]) +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store volatile float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store volatile float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store volatile float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store volatile float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store volatile float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -1024,11 +991,10 @@ ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]), !fpmath !5 -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]), !fpmath !5 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -1044,11 +1010,10 @@ ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]), !fpmath !6 -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]), !fpmath !6 +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -1065,11 +1030,10 @@ ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -1085,13 +1049,12 @@ ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] !dbg [[DBG7:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5), !dbg [[DBG14:![0-9]+]] -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr, !dbg [[DBG14]] -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]), !dbg [[DBG14]] -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4, !dbg [[DBG14]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata float [[TMP1]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14]] -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4, !dbg [[DBG15:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata float [[TMP2]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG16:![0-9]+]] -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4, !dbg [[DBG17:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]), !dbg [[DBG14]] +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4, !dbg [[DBG14]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata float [[TMP0]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14]] +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4, !dbg [[DBG15:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata float [[TMP1]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG16:![0-9]+]] +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4, !dbg [[DBG17:![0-9]+]] ; CHECK-NEXT: ret void, !dbg [[DBG18:![0-9]+]] ; entry: @@ -1286,15 +1249,14 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 ; CHECK-NEXT: [[SIN0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[COS_TMP0]]) -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: [[COS2:%.*]] = load float, ptr addrspace(5) [[COS_TMP0]], align 4 ; CHECK-NEXT: store float [[COS2]], ptr addrspace(1) [[COS_OUT]], align 4 -; CHECK-NEXT: ret float [[TMP1]] +; CHECK-NEXT: ret float [[TMP0]] ; entry: %cos.tmp0 = alloca float, addrspace(5) Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll @@ -14,12 +14,11 @@ ; CHECK-SAME: (float noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float noundef [[X]]) -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -36,12 +35,11 @@ ; CHECK-SAME: (<2 x float> noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x float> @_Z6sincosDv2_fPU3AS0S_(<2 x float> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 -; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = call contract <2 x float> @_Z6sincosDv2_fPU3AS5S_(<2 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 8 ; CHECK-NEXT: [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef [[X]]) -; CHECK-NEXT: store <2 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -57,12 +55,11 @@ ; CHECK-SAME: (float noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float noundef [[X]]) -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -78,12 +75,11 @@ ; CHECK-SAME: (<2 x float> noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x float> @_Z6sincosDv2_fPU3AS0S_(<2 x float> [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 -; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = call contract <2 x float> @_Z6sincosDv2_fPU3AS5S_(<2 x float> [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 8 ; CHECK-NEXT: [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef [[X]]) -; CHECK-NEXT: store <2 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: Index: llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -8,7 +8,7 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos ; GCN-POSTLINK: call fast float @_Z3sinf( ; GCN-POSTLINK: call fast float @_Z3cosf( -; GCN-PRELINK: call fast float @_Z6sincosfPf( +; GCN-PRELINK: call fast float @_Z6sincosfPU3AS5f( ; GCN-NATIVE: call fast float @_Z10native_sinf( ; GCN-NATIVE: call fast float @_Z10native_cosf( define amdgpu_kernel void @test_sincos(ptr addrspace(1) nocapture %a) { @@ -29,7 +29,7 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2 ; GCN-POSTLINK: call fast <2 x float> @_Z3sinDv2_f( ; GCN-POSTLINK: call fast <2 x float> @_Z3cosDv2_f( -; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPS_( +; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPU3AS5S_( ; GCN-NATIVE: call fast <2 x float> @_Z10native_sinDv2_f( ; GCN-NATIVE: call fast <2 x float> @_Z10native_cosDv2_f( define amdgpu_kernel void @test_sincos_v2(ptr addrspace(1) nocapture %a) { @@ -50,7 +50,7 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3 ; GCN-POSTLINK: call fast <3 x float> @_Z3sinDv3_f( ; GCN-POSTLINK: call fast <3 x float> @_Z3cosDv3_f( -; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPS_( +; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPU3AS5S_( ; GCN-NATIVE: call fast <3 x float> @_Z10native_sinDv3_f( ; GCN-NATIVE: call fast <3 x float> @_Z10native_cosDv3_f( define amdgpu_kernel void @test_sincos_v3(ptr addrspace(1) nocapture %a) { @@ -74,7 +74,7 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4 ; GCN-POSTLINK: call fast <4 x float> @_Z3sinDv4_f( ; GCN-POSTLINK: call fast <4 x float> @_Z3cosDv4_f( -; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPS_( +; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPU3AS5S_( ; GCN-NATIVE: call fast <4 x float> @_Z10native_sinDv4_f( ; GCN-NATIVE: call fast <4 x float> @_Z10native_cosDv4_f( define amdgpu_kernel void @test_sincos_v4(ptr addrspace(1) nocapture %a) { @@ -95,7 +95,7 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8 ; GCN-POSTLINK: call fast <8 x float> @_Z3sinDv8_f( ; GCN-POSTLINK: call fast <8 x float> @_Z3cosDv8_f( -; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPS_( +; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPU3AS5S_( ; GCN-NATIVE: call fast <8 x float> @_Z10native_sinDv8_f( ; GCN-NATIVE: call fast <8 x float> @_Z10native_cosDv8_f( define amdgpu_kernel void @test_sincos_v8(ptr addrspace(1) nocapture %a) { @@ -116,7 +116,7 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16 ; GCN-POSTLINK: call fast <16 x float> @_Z3sinDv16_f( ; GCN-POSTLINK: call fast <16 x float> @_Z3cosDv16_f( -; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPS_( +; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPU3AS5S_( ; GCN-NATIVE: call fast <16 x float> @_Z10native_sinDv16_f( ; GCN-NATIVE: call fast <16 x float> @_Z10native_cosDv16_f( define amdgpu_kernel void @test_sincos_v16(ptr addrspace(1) nocapture %a) {