Index: llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -82,6 +82,9 @@ // sqrt bool fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); + bool insertSinCos(CallInst *Sin, CallInst *Cos, IRBuilder<> &B, + const FuncInfo &FInfo); + // sin/cos bool fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); @@ -89,11 +92,6 @@ bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); - // Get insertion point at entry. - BasicBlock::iterator getEntryIns(CallInst * UI); - // Insert an Alloc instruction. - AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix); - // Get a scalar native builtin single argument FP function FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo); @@ -1043,6 +1041,71 @@ return false; } +bool AMDGPULibCalls::insertSinCos(CallInst *Sin, CallInst *Cos, IRBuilder<> &B, + const FuncInfo &fInfo) { + Value *Arg = Sin->getOperand(0); + assert(Arg == Cos->getOperand(0)); + + Function *F = B.GetInsertBlock()->getParent(); + Module *M = F->getParent(); + // Merge the sin and cos. + + // for OpenCL 2.0 we have only generic implementation of sincos + // function. + // FIXME: This is not true anymore + AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo); + nf.getLeads()[0].PtrKind = + AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS); + FunctionCallee Fsincos = getFunction(M, nf); + if (!Fsincos) + return false; + + B.SetInsertPointPastAllocas(F); + + DILocation *MergedDebugLoc = + DILocation::getMergedLocation(Sin->getDebugLoc(), Cos->getDebugLoc()); + B.SetCurrentDebugLocation(MergedDebugLoc); + + AllocaInst *Alloc = B.CreateAlloca(Sin->getType(), nullptr, "__sincos_"); + + if (Instruction *ArgInst = dyn_cast(Arg)) { + // If the argument is an instruction, it must dominate all uses so put our + // sincos call there. Otherwise, right after the allocas works well enough + // if it's an argument or constant. + + B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator()); + B.SetCurrentDebugLocation(MergedDebugLoc); + } + + Value *P = Alloc; + Type *PTy = Fsincos.getFunctionType()->getParamType(1); + // The allocaInst allocates the memory in private address space. This need + // to be bitcasted to point to the address space of cos pointer type. + // In OpenCL 2.0 this is generic, while in 1.2 that is private. + if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) + P = B.CreateAddrSpaceCast(Alloc, PTy); + + // Intersect the two sets of flags. + FastMathFlags FMF = cast(Sin)->getFastMathFlags(); + FMF &= cast(Cos)->getFastMathFlags(); + B.setFastMathFlags(FMF); + + CallInst *Call = CreateCallEx2(B, Fsincos, Arg, P); + LoadInst *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); + Reload->setDebugLoc(Cos->getDebugLoc()); + + LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *Sin << ", " << *Cos + << ") with " << *Call << '\n'); + + Sin->replaceAllUsesWith(Call); + Sin->eraseFromParent(); + + Cos->replaceAllUsesWith(Reload); + Cos->eraseFromParent(); + + return true; +} + // fold sin, cos -> sincos. bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &fInfo) { @@ -1058,106 +1121,33 @@ Value *CArgVal = FPOp->getOperand(0); CallInst *CI = cast(FPOp); - BasicBlock * const CBB = CI->getParent(); - - int const MaxScan = 30; bool Changed = false; - Module *M = CI->getModule(); FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN, fInfo); const std::string PairName = PartnerInfo.mangle(); CallInst *UI = nullptr; + + // TODO: Handle repeated uses, the generic implementation does. for (User* U : CArgVal->users()) { - CallInst *XI = dyn_cast_or_null(U); - if (!XI || XI == CI || XI->getParent() != CBB) + CallInst *XI = dyn_cast(U); + if (!XI || XI->isNoBuiltin()) continue; Function *UCallee = XI->getCalledFunction(); - if (!UCallee || !UCallee->getName().equals(PairName)) - continue; - - BasicBlock::iterator BBI = CI->getIterator(); - if (BBI == CI->getParent()->begin()) - break; - --BBI; - for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) { - if (cast(BBI) == XI) { - UI = XI; - break; - } - } - if (UI) break; + if (UCallee && UCallee->getName().equals(PairName)) + UI = XI; + else if (UI) + return Changed; } if (!UI) return Changed; - // Merge the sin and cos. - - // for OpenCL 2.0 we have only generic implementation of sincos - // function. - AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo); - nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS); - FunctionCallee Fsincos = getFunction(M, nf); - if (!Fsincos) - return Changed; - - BasicBlock::iterator ItOld = B.GetInsertPoint(); - AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_"); - B.SetInsertPoint(UI); - - Value *P = Alloc; - Type *PTy = Fsincos.getFunctionType()->getParamType(1); - // The allocaInst allocates the memory in private address space. This need - // to be bitcasted to point to the address space of cos pointer type. - // In OpenCL 2.0 this is generic, while in 1.2 that is private. - if (PTy->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) - P = B.CreateAddrSpaceCast(Alloc, PTy); - CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P); - - LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with " - << *Call << "\n"); - - if (!isSin) { // CI->cos, UI->sin - B.SetInsertPoint(&*ItOld); - UI->replaceAllUsesWith(&*Call); - Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); - CI->replaceAllUsesWith(Reload); - UI->eraseFromParent(); - CI->eraseFromParent(); - } else { // CI->sin, UI->cos - Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); - UI->replaceAllUsesWith(Reload); - CI->replaceAllUsesWith(Call); - UI->eraseFromParent(); - CI->eraseFromParent(); - } - return true; -} - -// Get insertion point at entry. -BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) { - Function * Func = UI->getParent()->getParent(); - BasicBlock * BB = &Func->getEntryBlock(); - assert(BB && "Entry block not found!"); - BasicBlock::iterator ItNew = BB->begin(); - return ItNew; -} - -// Insert a AllocsInst at the beginning of function entry block. -AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B, - const char *prefix) { - BasicBlock::iterator ItNew = getEntryIns(UI); - Function *UCallee = UI->getCalledFunction(); - Type *RetType = UCallee->getReturnType(); - B.SetInsertPoint(&*ItNew); - AllocaInst *Alloc = - B.CreateAlloca(RetType, nullptr, std::string(prefix) + UI->getName()); - Alloc->setAlignment( - Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType))); - return Alloc; + CallInst *Sin = isSin ? CI : UI; + CallInst *Cos = isSin ? UI : CI; + return insertSinCos(Sin, Cos, B, fInfo) || Changed; } bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll @@ -105,10 +105,12 @@ ; CHECK-LABEL: define void @sincos_f32 ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]) -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]) -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -123,10 +125,12 @@ ; CHECK-LABEL: define void @sincos_f32_value_is_same_constantfp ; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float 4.200000e+01) -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float 4.200000e+01) -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float 4.200000e+01, ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -150,10 +154,12 @@ ; CHECK-LABEL: define void @sincos_v2f32 ; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> [[X]]) -; CHECK-NEXT: store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> [[X]]) -; CHECK-NEXT: store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x float> @_Z6sincosDv2_fPU3AS0S_(<2 x float> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.nobuiltin.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.nobuiltin.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.nobuiltin.ll @@ -56,10 +56,11 @@ ; CHECK-LABEL: define void @sincos_f32 ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]) -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]) -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll @@ -220,12 +220,14 @@ define void @sincos_f32_nocontract(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_nocontract -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call float @_Z3sinf(float [[X]]) -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = tail call float @_Z3cosf(float [[X]]) -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -239,12 +241,14 @@ define void @sincos_v2f32_nocontract(<2 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f32_nocontract -; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x float> @_Z3sinDv2_f(<2 x float> [[X]]) -; CHECK-NEXT: store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8 -; CHECK-NEXT: [[CALL1:%.*]] = tail call <2 x float> @_Z3cosDv2_f(<2 x float> [[X]]) -; CHECK-NEXT: store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x float> @_Z6sincosDv2_fPU3AS0S_(<2 x float> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -258,12 +262,14 @@ define void @sincos_f32(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]) -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]) -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -276,12 +282,14 @@ define void @sincos_f32_order1(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_order1 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]) -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 -; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]) -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -294,12 +302,14 @@ define void @sincos_v2f32(<2 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f32 -; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> [[X]]) -; CHECK-NEXT: store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> [[X]]) -; CHECK-NEXT: store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x float> @_Z6sincosDv2_fPU3AS0S_(<2 x float> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -312,13 +322,15 @@ define void @sincos_v3f32(<3 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v3f32 -; CHECK-SAME: (<3 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<3 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract <3 x float> @_Z3sinDv3_f(<3 x float> [[X]]) -; CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <3 x float> [[CALL]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <3 x float>, align 16, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract <3 x float> @_Z6sincosDv3_fPU3AS0S_(<3 x float> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr addrspace(5) [[__SINCOS_]], align 16 +; CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: store <4 x float> [[EXTRACTVEC2]], ptr addrspace(1) [[SIN_OUT]], align 16 -; CHECK-NEXT: [[CALL5:%.*]] = tail call contract <3 x float> @_Z3cosDv3_f(<3 x float> [[X]]) -; CHECK-NEXT: [[EXTRACTVEC6:%.*]] = shufflevector <3 x float> [[CALL5]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[EXTRACTVEC6:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: store <4 x float> [[EXTRACTVEC6]], ptr addrspace(1) [[COS_OUT]], align 16 ; CHECK-NEXT: ret void ; @@ -334,12 +346,14 @@ define void @sincos_v4f32(<4 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v4f32 -; CHECK-SAME: (<4 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<4 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract <4 x float> @_Z3sinDv4_f(<4 x float> [[X]]) -; CHECK-NEXT: store <4 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 16 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract <4 x float> @_Z3cosDv4_f(<4 x float> [[X]]) -; CHECK-NEXT: store <4 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 16 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <4 x float>, align 16, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract <4 x float> @_Z6sincosDv4_fPU3AS0S_(<4 x float> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr addrspace(5) [[__SINCOS_]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 16 +; CHECK-NEXT: store <4 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 16 ; CHECK-NEXT: ret void ; entry: @@ -352,12 +366,14 @@ define void @sincos_v8f32(<8 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v8f32 -; CHECK-SAME: (<8 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<8 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract <8 x float> @_Z3sinDv8_f(<8 x float> [[X]]) -; CHECK-NEXT: store <8 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 32 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract <8 x float> @_Z3cosDv8_f(<8 x float> [[X]]) -; CHECK-NEXT: store <8 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 32 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <8 x float>, align 32, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract <8 x float> @_Z6sincosDv8_fPU3AS0S_(<8 x float> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, ptr addrspace(5) [[__SINCOS_]], align 32 +; CHECK-NEXT: store <8 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 32 +; CHECK-NEXT: store <8 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 32 ; CHECK-NEXT: ret void ; entry: @@ -370,12 +386,14 @@ define void @sincos_v16f32(<16 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v16f32 -; CHECK-SAME: (<16 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<16 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract <16 x float> @_Z3sinDv16_f(<16 x float> [[X]]) -; CHECK-NEXT: store <16 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 64 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract <16 x float> @_Z3cosDv16_f(<16 x float> [[X]]) -; CHECK-NEXT: store <16 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 64 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <16 x float>, align 64, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract <16 x float> @_Z6sincosDv16_fPU3AS0S_(<16 x float> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x float>, ptr addrspace(5) [[__SINCOS_]], align 64 +; CHECK-NEXT: store <16 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 64 +; CHECK-NEXT: store <16 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 64 ; CHECK-NEXT: ret void ; entry: @@ -388,12 +406,14 @@ define void @sincos_f64_nocontract(double %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f64_nocontract -; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call double @_Z3sind(double [[X]]) -; CHECK-NEXT: store double [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8 -; CHECK-NEXT: [[CALL1:%.*]] = tail call double @_Z3cosd(double [[X]]) -; CHECK-NEXT: store double [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca double, align 8, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call double @_Z6sincosdPU3AS0d(double [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store double [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: store double [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -407,12 +427,14 @@ define void @sincos_v2f64_nocontract(<2 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f64_nocontract -; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call <2 x double> @_Z3sinDv2_d(<2 x double> [[X]]) -; CHECK-NEXT: store <2 x double> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 16 -; CHECK-NEXT: [[CALL1:%.*]] = tail call <2 x double> @_Z3cosDv2_d(<2 x double> [[X]]) -; CHECK-NEXT: store <2 x double> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 16 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x double>, align 16, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @_Z6sincosDv2_dPU3AS0S_(<2 x double> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr addrspace(5) [[__SINCOS_]], align 16 +; CHECK-NEXT: store <2 x double> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 16 +; CHECK-NEXT: store <2 x double> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 16 ; CHECK-NEXT: ret void ; entry: @@ -425,12 +447,14 @@ define void @sincos_f64(double %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f64 -; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract double @_Z3sind(double [[X]]) -; CHECK-NEXT: store double [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract double @_Z3cosd(double [[X]]) -; CHECK-NEXT: store double [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca double, align 8, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract double @_Z6sincosdPU3AS0d(double [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store double [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: store double [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -443,12 +467,14 @@ define void @sincos_f64_order1(double %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f64_order1 -; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract double @_Z3cosd(double [[X]]) -; CHECK-NEXT: store double [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8 -; CHECK-NEXT: [[CALL:%.*]] = tail call contract double @_Z3sind(double [[X]]) -; CHECK-NEXT: store double [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca double, align 8, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract double @_Z6sincosdPU3AS0d(double [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store double [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: store double [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -461,12 +487,14 @@ define void @sincos_v2f64(<2 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f64 -; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract <2 x double> @_Z3sinDv2_d(<2 x double> [[X]]) -; CHECK-NEXT: store <2 x double> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 16 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract <2 x double> @_Z3cosDv2_d(<2 x double> [[X]]) -; CHECK-NEXT: store <2 x double> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 16 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x double>, align 16, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @_Z6sincosDv2_dPU3AS0S_(<2 x double> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr addrspace(5) [[__SINCOS_]], align 16 +; CHECK-NEXT: store <2 x double> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 16 +; CHECK-NEXT: store <2 x double> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 16 ; CHECK-NEXT: ret void ; entry: @@ -479,13 +507,15 @@ define void @sincos_v3f64(<3 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v3f64 -; CHECK-SAME: (<3 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<3 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract <3 x double> @_Z3sinDv3_d(<3 x double> [[X]]) -; CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <3 x double> [[CALL]], <3 x double> poison, <4 x i32> +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <3 x double>, align 32, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract <3 x double> @_Z6sincosDv3_dPU3AS0S_(<3 x double> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <3 x double>, ptr addrspace(5) [[__SINCOS_]], align 32 +; CHECK-NEXT: [[EXTRACTVEC2:%.*]] = shufflevector <3 x double> [[TMP1]], <3 x double> poison, <4 x i32> ; CHECK-NEXT: store <4 x double> [[EXTRACTVEC2]], ptr addrspace(1) [[SIN_OUT]], align 32 -; CHECK-NEXT: [[CALL5:%.*]] = tail call contract <3 x double> @_Z3cosDv3_d(<3 x double> [[X]]) -; CHECK-NEXT: [[EXTRACTVEC6:%.*]] = shufflevector <3 x double> [[CALL5]], <3 x double> poison, <4 x i32> +; CHECK-NEXT: [[EXTRACTVEC6:%.*]] = shufflevector <3 x double> [[TMP2]], <3 x double> poison, <4 x i32> ; CHECK-NEXT: store <4 x double> [[EXTRACTVEC6]], ptr addrspace(1) [[COS_OUT]], align 32 ; CHECK-NEXT: ret void ; @@ -501,12 +531,14 @@ define void @sincos_v4f64(<4 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v4f64 -; CHECK-SAME: (<4 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<4 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract <4 x double> @_Z3sinDv4_d(<4 x double> [[X]]) -; CHECK-NEXT: store <4 x double> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 32 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract <4 x double> @_Z3cosDv4_d(<4 x double> [[X]]) -; CHECK-NEXT: store <4 x double> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 32 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <4 x double>, align 32, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract <4 x double> @_Z6sincosDv4_dPU3AS0S_(<4 x double> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, ptr addrspace(5) [[__SINCOS_]], align 32 +; CHECK-NEXT: store <4 x double> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 32 +; CHECK-NEXT: store <4 x double> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 32 ; CHECK-NEXT: ret void ; entry: @@ -519,12 +551,14 @@ define void @sincos_v8f64(<8 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v8f64 -; CHECK-SAME: (<8 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<8 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract <8 x double> @_Z3sinDv8_d(<8 x double> [[X]]) -; CHECK-NEXT: store <8 x double> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 64 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract <8 x double> @_Z3cosDv8_d(<8 x double> [[X]]) -; CHECK-NEXT: store <8 x double> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 64 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <8 x double>, align 64, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract <8 x double> @_Z6sincosDv8_dPU3AS0S_(<8 x double> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x double>, ptr addrspace(5) [[__SINCOS_]], align 64 +; CHECK-NEXT: store <8 x double> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 64 +; CHECK-NEXT: store <8 x double> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 64 ; CHECK-NEXT: ret void ; entry: @@ -537,12 +571,14 @@ define void @sincos_v16f64(<16 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v16f64 -; CHECK-SAME: (<16 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<16 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract <16 x double> @_Z3sinDv16_d(<16 x double> [[X]]) -; CHECK-NEXT: store <16 x double> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 128 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract <16 x double> @_Z3cosDv16_d(<16 x double> [[X]]) -; CHECK-NEXT: store <16 x double> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 128 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <16 x double>, align 128, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract <16 x double> @_Z6sincosDv16_dPU3AS0S_(<16 x double> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x double>, ptr addrspace(5) [[__SINCOS_]], align 128 +; CHECK-NEXT: store <16 x double> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 128 +; CHECK-NEXT: store <16 x double> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 128 ; CHECK-NEXT: ret void ; entry: @@ -557,18 +593,20 @@ define void @sincos_f32_different_blocks(i1 %cond, float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_different_blocks -; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) ; CHECK-NEXT: br i1 [[COND]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: ret void ; CHECK: bb0: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]) -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: br label [[COMMON_RET:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]) -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: br label [[COMMON_RET]] ; entry: @@ -588,17 +626,19 @@ ; The sin and cos are in different blocks but always execute define void @sincos_f32_different_blocks_dominating_always_execute(i1 %cond, float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out, ptr addrspace(1) %other) { ; CHECK-LABEL: define void @sincos_f32_different_blocks_dominating_always_execute -; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[OTHER:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[OTHER:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]) -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: br i1 [[COND]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: bb0: ; CHECK-NEXT: store i32 0, ptr addrspace(1) [[OTHER]], align 4 ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb1: -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]) -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -619,12 +659,14 @@ ; sin dominates cos but cos doesn't always execute. define void @sincos_f32_different_blocks_dominating_conditional_execute(i1 %cond, float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out, ptr addrspace(1) %other) { ; CHECK-LABEL: define void @sincos_f32_different_blocks_dominating_conditional_execute -; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr addrspace(1) nocapture readnone [[OTHER:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr addrspace(1) nocapture readnone [[OTHER:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]) -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]) -; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], float [[CALL1]], float 0.000000e+00 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], float [[TMP2]], float 0.000000e+00 ; CHECK-NEXT: store float [[SPEC_SELECT]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; @@ -645,11 +687,13 @@ define float @select_sin_or_cos_f32(i1 %cond, float %x) { ; CHECK-LABEL: define float @select_sin_or_cos_f32 -; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] { +; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SIN:%.*]] = tail call contract float @_Z3sinf(float [[X]]) -; CHECK-NEXT: [[COS:%.*]] = tail call contract float @_Z3cosf(float [[X]]) -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[SIN]], float [[COS]] +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], float [[TMP1]], float [[TMP2]] ; CHECK-NEXT: ret float [[SELECT]] ; entry: @@ -665,13 +709,13 @@ ; CHECK-LABEL: define void @sincos_f32_value_is_instr ; CHECK-SAME: (ptr addrspace(1) [[VALUE_PTR:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__SINCOS_CALL:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: tail call void @func(ptr addrspace(1) [[VALUE_PTR]]) ; CHECK-NEXT: [[X:%.*]] = load float, ptr addrspace(1) [[VALUE_PTR]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_CALL]] to ptr +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr ; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 ; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_CALL]], align 4 ; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; @@ -777,12 +821,14 @@ define void @sincos_f32_flag_intersect0(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_flag_intersect0 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan contract float @_Z3sinf(float [[X]]) -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = tail call ninf contract float @_Z3cosf(float [[X]]) -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -795,12 +841,14 @@ define void @sincos_f32_flag_intersect1(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_flag_intersect1 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan nsz contract float @_Z3sinf(float [[X]]) -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = tail call nnan ninf contract float @_Z3cosf(float [[X]]) -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call nnan contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -813,12 +861,14 @@ define void @sincos_v2f32_flag_intersect1(<2 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_v2f32_flag_intersect1 -; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call nnan ninf contract <2 x float> @_Z3sinDv2_f(<2 x float> [[X]]) -; CHECK-NEXT: store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8 -; CHECK-NEXT: [[CALL1:%.*]] = tail call nnan nsz contract <2 x float> @_Z3cosDv2_f(<2 x float> [[X]]) -; CHECK-NEXT: store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call nnan contract <2 x float> @_Z6sincosDv2_fPU3AS0S_(<2 x float> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -835,12 +885,12 @@ ; CHECK-LABEL: define void @sincos_f32_alloca_insertpt ; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__SINCOS_SIN:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5) ; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca i32, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_SIN]] to ptr +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr ; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_SIN]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 ; CHECK-NEXT: store float [[TMP1]], ptr addrspace(5) [[ALLOCA0]], align 4 ; CHECK-NEXT: store float [[TMP2]], ptr addrspace(5) [[ALLOCA1]], align 4 ; CHECK-NEXT: call void @use_stack_ptrs(ptr addrspace(5) [[ALLOCA0]], ptr addrspace(5) [[ALLOCA1]]) @@ -859,10 +909,10 @@ define float @sincos_f32_unused_result_cos(float %x) { ; CHECK-LABEL: define float @sincos_f32_unused_result_cos -; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__SINCOS_SIN:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_SIN]] to ptr +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr ; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) ; CHECK-NEXT: ret float [[TMP1]] ; @@ -876,12 +926,12 @@ define float @sincos_f32_unused_result_sin(float %x) { ; CHECK-LABEL: define float @sincos_f32_unused_result_sin -; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__SINCOS_DEAD:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_DEAD]] to ptr +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr ; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_DEAD]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 ; CHECK-NEXT: ret float [[TMP2]] ; entry: @@ -894,22 +944,15 @@ define void @sincos_f32_repeated_uses(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define void @sincos_f32_repeated_uses -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) [[SIN_OUT:%.*]], ptr addrspace(1) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) [[SIN_OUT:%.*]], ptr addrspace(1) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__SINCOS_SIN0:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[__SINCOS_SIN1:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_SIN0]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: store volatile float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_SIN1]] to ptr -; CHECK-NEXT: [[TMP3:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP2]]) -; CHECK-NEXT: store volatile float [[TMP3]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(5) [[__SINCOS_SIN1]], align 4 -; CHECK-NEXT: store volatile float [[TMP4]], ptr addrspace(1) [[COS_OUT]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(5) [[__SINCOS_SIN0]], align 4 -; CHECK-NEXT: store volatile float [[TMP5]], ptr addrspace(1) [[COS_OUT]], align 4 -; CHECK-NEXT: [[SIN2:%.*]] = tail call contract float @_Z3sinf(float [[X]]) -; CHECK-NEXT: store volatile float [[SIN2]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[SIN0:%.*]] = tail call contract float @_Z3sinf(float [[X]]) +; CHECK-NEXT: store volatile float [[SIN0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store volatile float [[SIN0]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: [[COS0:%.*]] = tail call contract float @_Z3cosf(float [[X]]) +; CHECK-NEXT: store volatile float [[COS0]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store volatile float [[COS0]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store volatile float [[SIN0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -969,12 +1012,14 @@ define void @sincos_f32_preserve_fpmath_0(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_preserve_fpmath_0 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]), !fpmath !5 -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]), !fpmath !5 -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -987,12 +1032,14 @@ define void @sincos_f32_preserve_fpmath_1(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_preserve_fpmath_1 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]), !fpmath !5 -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]), !fpmath !6 -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -1006,12 +1053,14 @@ ; Should drop the metadata define void @sincos_f32_drop_fpmath(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) { ; CHECK-LABEL: define void @sincos_f32_drop_fpmath -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]), !fpmath !5 -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]) -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -1024,15 +1073,17 @@ define void @sincos_f32_debuginfo(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) !dbg !15 { ; CHECK-LABEL: define void @sincos_f32_debuginfo -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] !dbg [[DBG7:![0-9]+]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] !dbg [[DBG5:![0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]), !dbg [[DBG14:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata float [[CALL]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14]] -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4, !dbg [[DBG15:![0-9]+]] -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]), !dbg [[DBG16:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata float [[CALL1]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG16]] -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4, !dbg [[DBG17:![0-9]+]] -; CHECK-NEXT: ret void, !dbg [[DBG18:![0-9]+]] +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5), !dbg [[DBG12:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr, !dbg [[DBG12]] +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]), !dbg [[DBG12]] +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4, !dbg [[DBG13:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata float [[TMP1]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]] +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4, !dbg [[DBG15:![0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata float [[TMP2]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13]] +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4, !dbg [[DBG16:![0-9]+]] +; CHECK-NEXT: ret void, !dbg [[DBG17:![0-9]+]] ; entry: %call = tail call contract float @_Z3sinf(float %x), !dbg !19 @@ -1046,12 +1097,12 @@ define float @sin_sincos_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sin_sincos_private_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[SIN0:%.*]] = tail call nnan ninf nsz contract float @_Z3sinf(float [[X]]), !fpmath !5 +; CHECK-NEXT: [[SIN0:%.*]] = tail call nnan ninf nsz contract float @_Z3sinf(float [[X]]), !fpmath !18 ; CHECK-NEXT: store float [[SIN0]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[SIN1:%.*]] = call nnan contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[COS_TMP]]), !fpmath !6 +; CHECK-NEXT: [[SIN1:%.*]] = call nnan contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[COS_TMP]]), !fpmath !19 ; CHECK-NEXT: [[COS1:%.*]] = load float, ptr addrspace(5) [[COS_TMP]], align 4 ; CHECK-NEXT: store float [[COS1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret float [[SIN1]] @@ -1068,13 +1119,13 @@ define float @sin_sincos_generic_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sin_sincos_generic_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[SIN0:%.*]] = tail call nsz contract float @_Z3sinf(float [[X]]), !fpmath !5 +; CHECK-NEXT: [[SIN0:%.*]] = tail call nsz contract float @_Z3sinf(float [[X]]), !fpmath !18 ; CHECK-NEXT: store float [[SIN0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: [[COS_TMP_CAST:%.*]] = addrspacecast ptr addrspace(5) [[COS_TMP]] to ptr -; CHECK-NEXT: [[SIN1:%.*]] = call ninf nsz contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[COS_TMP_CAST]]), !fpmath !6 +; CHECK-NEXT: [[SIN1:%.*]] = call ninf nsz contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[COS_TMP_CAST]]), !fpmath !19 ; CHECK-NEXT: [[COS1:%.*]] = load float, ptr addrspace(5) [[COS_TMP]], align 4 ; CHECK-NEXT: store float [[COS1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret float [[SIN1]] @@ -1092,7 +1143,7 @@ define float @cos_sincos_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @cos_sincos_private_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS0:%.*]] = tail call contract float @_Z3cosf(float [[X]]) @@ -1114,7 +1165,7 @@ define float @cos_sincos_generic_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @cos_sincos_generic_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS0:%.*]] = tail call contract float @_Z3cosf(float [[X]]) @@ -1138,7 +1189,7 @@ define float @sincos_private_f32_x2(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sincos_private_f32_x2 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS_TMP1:%.*]] = alloca float, align 4, addrspace(5) @@ -1166,7 +1217,7 @@ define float @sincos_generic_f32_x2(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sincos_generic_f32_x2 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS_TMP1:%.*]] = alloca float, align 4, addrspace(5) @@ -1195,7 +1246,7 @@ define float @sincos_generic_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sincos_generic_private_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS_TMP1:%.*]] = alloca float, align 4, addrspace(5) @@ -1222,22 +1273,19 @@ define float @sincos_mixed_sin_cos_generic_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define float @sincos_mixed_sin_cos_generic_private_f32 -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR3]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__SINCOS_SIN1:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[COS_TMP1:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[SIN0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[COS_TMP0]]) -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_SIN1]] to ptr -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) -; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[COS0:%.*]] = load float, ptr addrspace(5) [[COS_TMP0]], align 4 -; CHECK-NEXT: store float [[COS0]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[SIN1:%.*]] = call contract float @_Z3sinf(float [[X]]) +; CHECK-NEXT: store float [[SIN1]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: [[COS_TMP1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[COS_TMP1]] to ptr -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_SIN1]], align 4 -; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[COS1:%.*]] = call contract float @_Z3cosf(float [[X]]) +; CHECK-NEXT: store float [[COS1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: [[SIN2:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[COS_TMP1_CAST]]) -; CHECK-NEXT: store float [[COS0]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[COS2:%.*]] = load float, ptr addrspace(5) [[COS_TMP0]], align 4 +; CHECK-NEXT: store float [[COS2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret float [[SIN2]] ; entry: Index: llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll @@ -13,10 +13,12 @@ ; CHECK-LABEL: define void @sincos_f32_nobuiltin ; CHECK-SAME: (float noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float noundef [[X]]) -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float noundef [[X]]) -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -32,10 +34,12 @@ ; CHECK-LABEL: define void @sincos_v2f32_nobuiltin ; CHECK-SAME: (<2 x float> noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> noundef [[X]]) -; CHECK-NEXT: store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef [[X]]) -; CHECK-NEXT: store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x float> @_Z6sincosDv2_fPU3AS0S_(<2 x float> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: @@ -50,10 +54,12 @@ ; CHECK-LABEL: define void @sincos_f32_no_builtins ; CHECK-SAME: (float noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract float @_Z3sinf(float noundef [[X]]) -; CHECK-NEXT: store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract float @_Z3cosf(float noundef [[X]]) -; CHECK-NEXT: store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4 +; CHECK-NEXT: store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -68,10 +74,12 @@ ; CHECK-LABEL: define void @sincos_v2f32_no_builtins ; CHECK-SAME: (<2 x float> noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> noundef [[X]]) -; CHECK-NEXT: store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8 -; CHECK-NEXT: [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef [[X]]) -; CHECK-NEXT: store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca <2 x float>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x float> @_Z6sincosDv2_fPU3AS0S_(<2 x float> [[X]], ptr [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr addrspace(5) [[__SINCOS_]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 8 ; CHECK-NEXT: ret void ; entry: