Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -1054,7 +1054,10 @@ dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); ); - assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); + assert((BotRPTracker.getPos() == RegionEnd || + (RegionEnd->isDebugValue() && + BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) && + "Can't find the region bottom"); // Cache the list of excess pressure sets in this region. This will also track // the max pressure in the scheduled code for these sets. @@ -1460,7 +1463,8 @@ RegOpers.detectDeadDefs(*MI, *LIS); } - BotRPTracker.recedeSkipDebugValues(); + if (BotRPTracker.getPos() != CurrentBottom) + BotRPTracker.recedeSkipDebugValues(); SmallVector LiveUses; BotRPTracker.recede(RegOpers, &LiveUses); assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); Index: lib/CodeGen/ScheduleDAGInstrs.cpp =================================================================== --- lib/CodeGen/ScheduleDAGInstrs.cpp +++ lib/CodeGen/ScheduleDAGInstrs.cpp @@ -776,7 +776,8 @@ if (PDiffs != nullptr) PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI); - RPTracker->recedeSkipDebugValues(); + if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI) + RPTracker->recedeSkipDebugValues(); assert(&*RPTracker->getPos() == &MI && "RPTracker in sync"); RPTracker->recede(RegOpers); } Index: lib/Target/AMDGPU/GCNSchedStrategy.cpp =================================================================== --- lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -390,7 +390,8 @@ if (MI->getIterator() != RegionEnd) { BB->remove(MI); BB->insert(RegionEnd, MI); - LIS->handleMove(*MI, true); + if (!MI->isDebugValue()) + LIS->handleMove(*MI, true); } // Reset read-undef flags and update them later. for (auto &Op : MI->operands()) @@ -398,13 +399,15 @@ Op.setIsUndef(false); RegisterOperands RegOpers; RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); - if (ShouldTrackLaneMasks) { - // Adjust liveness and add missing dead+read-undef flags. - SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); - RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); - } else { - // Adjust for missing dead-def flags. - RegOpers.detectDeadDefs(*MI, *LIS); + if (!MI->isDebugValue()) { + if (ShouldTrackLaneMasks) { + // Adjust liveness and add missing dead+read-undef flags. + SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); + RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); + } else { + // Adjust for missing dead-def flags. + RegOpers.detectDeadDefs(*MI, *LIS); + } } RegionEnd = MI->getIterator(); ++RegionEnd; Index: test/CodeGen/AMDGPU/debug-value.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/debug-value.ll @@ -0,0 +1,376 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -mcpu=fi < %s +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" +target triple = "amdgcn-amd-amdhsa-amdgizcl" + +define amdgpu_kernel void @binomial_options(i32 %numSteps, <4 x float> %inRand, <4 x float> addrspace(1)* nocapture readnone %output, <4 x float> addrspace(3)* nocapture %callA, <4 x float> addrspace(3)* nocapture %callB) local_unnamed_addr #0 !dbg !11 !kernel_arg_addr_space !47 !kernel_arg_access_qual !48 !kernel_arg_type !49 !kernel_arg_base_type !50 !kernel_arg_type_qual !51 { +entry: + call void @llvm.dbg.value(metadata i32 %numSteps, metadata !23, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !52 + call void @llvm.dbg.value(metadata <4 x float> %inRand, metadata !24, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !53 + call void @llvm.dbg.value(metadata <4 x float> addrspace(1)* %output, metadata !25, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !54 + call void @llvm.dbg.value(metadata <4 x float> addrspace(3)* %callA, metadata !26, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !55 + call void @llvm.dbg.value(metadata <4 x float> addrspace(3)* %callB, metadata !27, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !56 + %tmp = tail call i32 @llvm.amdgcn.workitem.id.x(), !dbg !57, !range !58 + call void @llvm.dbg.value(metadata i32 %tmp, metadata !28, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !59 + %sub = fsub <4 x float> , %inRand, !dbg !60 + %mul1 = fmul <4 x float> %inRand, , !dbg !61 + %tmp1 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %sub, <4 x float> , <4 x float> %mul1), !dbg !62 + call void @llvm.dbg.value(metadata <4 x float> %tmp1, metadata !30, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !63 + %mul3 = fmul <4 x float> %inRand, , !dbg !64 + %tmp2 = fadd <4 x float> %sub, %mul3, !dbg !65 + call void @llvm.dbg.value(metadata <4 x float> %tmp2, metadata !31, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !66 + %mul5 = fmul <4 x float> %inRand, , !dbg !67 + %tmp3 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %sub, <4 x float> , <4 x float> %mul5), !dbg !68 + call void @llvm.dbg.value(metadata <4 x float> %tmp3, metadata !32, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !69 + %conv = sitofp i32 %numSteps to float, !dbg !70 + %div = fdiv float 1.000000e+00, %conv, !dbg !71, !fpmath !72 + %splat.splatinsert = insertelement <4 x float> undef, float %div, i32 0, !dbg !73 + %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer, !dbg !73 + %mul = fmul <4 x float> %tmp3, %splat.splat, !dbg !74 + call void @llvm.dbg.value(metadata <4 x float> %mul, metadata !33, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !75 + call void @llvm.dbg.value(metadata <4 x float> %mul, metadata !76, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #3, !dbg !81 + %tmp4 = extractelement <4 x float> %mul, i64 0, !dbg !83 + call void @llvm.dbg.value(metadata float %tmp4, metadata !84, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #3, !dbg !89 + %tmp5 = fcmp olt float %tmp4, 0x39F0000000000000, !dbg !91 + %tmp6 = select i1 %tmp5, float 0x41F0000000000000, float 1.000000e+00, !dbg !91 + %tmp7 = fmul float %tmp4, %tmp6, !dbg !91 + %tmp8 = tail call float @llvm.amdgcn.rsq.f32(float %tmp7) #4, !dbg !91 + %tmp9 = fmul float %tmp7, %tmp8, !dbg !91 + %tmp10 = fmul float %tmp8, 5.000000e-01, !dbg !91 + %tmp11 = fsub float -0.000000e+00, %tmp10, !dbg !91 + %tmp12 = tail call float @llvm.fma.f32(float %tmp11, float %tmp9, float 5.000000e-01) #4, !dbg !91 + %tmp13 = tail call float @llvm.fma.f32(float %tmp10, float %tmp12, float %tmp10) #4, !dbg !91 + %tmp14 = tail call float @llvm.fma.f32(float %tmp9, float %tmp12, float %tmp9) #4, !dbg !91 + %tmp15 = fsub float -0.000000e+00, %tmp14, !dbg !91 + %tmp16 = tail call float @llvm.fma.f32(float %tmp15, float %tmp14, float %tmp7) #4, !dbg !91 + %tmp17 = tail call float @llvm.fma.f32(float %tmp16, float %tmp13, float %tmp14) #4, !dbg !91 + %tmp18 = select i1 %tmp5, float 0x3EF0000000000000, float 1.000000e+00, !dbg !91 + %tmp19 = fmul float %tmp18, %tmp17, !dbg !91 + %tmp20 = tail call zeroext i1 @llvm.amdgcn.class.f32(float %tmp7, i32 608) #4, !dbg !91 + %tmp21 = select i1 %tmp20, float %tmp7, float %tmp19, !dbg !91 + %vecinit.i = insertelement <4 x float> undef, float %tmp21, i32 0, !dbg !92 + %tmp22 = extractelement <4 x float> %mul, i64 1, !dbg !93 + call void @llvm.dbg.value(metadata float %tmp22, metadata !84, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #3, !dbg !94 + %tmp23 = fcmp olt float %tmp22, 0x39F0000000000000, !dbg !96 + %tmp24 = select i1 %tmp23, float 0x41F0000000000000, float 1.000000e+00, !dbg !96 + %tmp25 = fmul float %tmp22, %tmp24, !dbg !96 + %tmp26 = tail call float @llvm.amdgcn.rsq.f32(float %tmp25) #4, !dbg !96 + %tmp27 = fmul float %tmp25, %tmp26, !dbg !96 + %tmp28 = fmul float %tmp26, 5.000000e-01, !dbg !96 + %tmp29 = fsub float -0.000000e+00, %tmp28, !dbg !96 + %tmp30 = tail call float @llvm.fma.f32(float %tmp29, float %tmp27, float 5.000000e-01) #4, !dbg !96 + %tmp31 = tail call float @llvm.fma.f32(float %tmp28, float %tmp30, float %tmp28) #4, !dbg !96 + %tmp32 = tail call float @llvm.fma.f32(float %tmp27, float %tmp30, float %tmp27) #4, !dbg !96 + %tmp33 = fsub float -0.000000e+00, %tmp32, !dbg !96 + %tmp34 = tail call float @llvm.fma.f32(float %tmp33, float %tmp32, float %tmp25) #4, !dbg !96 + %tmp35 = tail call float @llvm.fma.f32(float %tmp34, float %tmp31, float %tmp32) #4, !dbg !96 + %tmp36 = select i1 %tmp23, float 0x3EF0000000000000, float 1.000000e+00, !dbg !96 + %tmp37 = fmul float %tmp36, %tmp35, !dbg !96 + %tmp38 = tail call zeroext i1 @llvm.amdgcn.class.f32(float %tmp25, i32 608) #4, !dbg !96 + %tmp39 = select i1 %tmp38, float %tmp25, float %tmp37, !dbg !96 + %vecinit2.i = insertelement <4 x float> %vecinit.i, float %tmp39, i32 1, !dbg !92 + %tmp40 = extractelement <4 x float> %mul, i64 2, !dbg !97 + call void @llvm.dbg.value(metadata float %tmp40, metadata !84, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #3, !dbg !98 + %tmp41 = fcmp olt float %tmp40, 0x39F0000000000000, !dbg !100 + %tmp42 = select i1 %tmp41, float 0x41F0000000000000, float 1.000000e+00, !dbg !100 + %tmp43 = fmul float %tmp40, %tmp42, !dbg !100 + %tmp44 = tail call float @llvm.amdgcn.rsq.f32(float %tmp43) #4, !dbg !100 + %tmp45 = fmul float %tmp43, %tmp44, !dbg !100 + %tmp46 = fmul float %tmp44, 5.000000e-01, !dbg !100 + %tmp47 = fsub float -0.000000e+00, %tmp46, !dbg !100 + %tmp48 = tail call float @llvm.fma.f32(float %tmp47, float %tmp45, float 5.000000e-01) #4, !dbg !100 + %tmp49 = tail call float @llvm.fma.f32(float %tmp46, float %tmp48, float %tmp46) #4, !dbg !100 + %tmp50 = tail call float @llvm.fma.f32(float %tmp45, float %tmp48, float %tmp45) #4, !dbg !100 + %tmp51 = fsub float -0.000000e+00, %tmp50, !dbg !100 + %tmp52 = tail call float @llvm.fma.f32(float %tmp51, float %tmp50, float %tmp43) #4, !dbg !100 + %tmp53 = tail call float @llvm.fma.f32(float %tmp52, float %tmp49, float %tmp50) #4, !dbg !100 + %tmp54 = select i1 %tmp41, float 0x3EF0000000000000, float 1.000000e+00, !dbg !100 + %tmp55 = fmul float %tmp54, %tmp53, !dbg !100 + %tmp56 = tail call zeroext i1 @llvm.amdgcn.class.f32(float %tmp43, i32 608) #4, !dbg !100 + %tmp57 = select i1 %tmp56, float %tmp43, float %tmp55, !dbg !100 + %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %tmp57, i32 2, !dbg !92 + %tmp58 = extractelement <4 x float> %mul, i64 3, !dbg !101 + call void @llvm.dbg.value(metadata float %tmp58, metadata !84, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #3, !dbg !102 + %tmp59 = fcmp olt float %tmp58, 0x39F0000000000000, !dbg !104 + %tmp60 = select i1 %tmp59, float 0x41F0000000000000, float 1.000000e+00, !dbg !104 + %tmp61 = fmul float %tmp58, %tmp60, !dbg !104 + %tmp62 = tail call float @llvm.amdgcn.rsq.f32(float %tmp61) #4, !dbg !104 + %tmp63 = fmul float %tmp61, %tmp62, !dbg !104 + %tmp64 = fmul float %tmp62, 5.000000e-01, !dbg !104 + %tmp65 = fsub float -0.000000e+00, %tmp64, !dbg !104 + %tmp66 = tail call float @llvm.fma.f32(float %tmp65, float %tmp63, float 5.000000e-01) #4, !dbg !104 + %tmp67 = tail call float @llvm.fma.f32(float %tmp64, float %tmp66, float %tmp64) #4, !dbg !104 + %tmp68 = tail call float @llvm.fma.f32(float %tmp63, float %tmp66, float %tmp63) #4, !dbg !104 + %tmp69 = fsub float -0.000000e+00, %tmp68, !dbg !104 + %tmp70 = tail call float @llvm.fma.f32(float %tmp69, float %tmp68, float %tmp61) #4, !dbg !104 + %tmp71 = tail call float @llvm.fma.f32(float %tmp70, float %tmp67, float %tmp68) #4, !dbg !104 + %tmp72 = select i1 %tmp59, float 0x3EF0000000000000, float 1.000000e+00, !dbg !104 + %tmp73 = fmul float %tmp72, %tmp71, !dbg !104 + %tmp74 = tail call zeroext i1 @llvm.amdgcn.class.f32(float %tmp61, i32 608) #4, !dbg !104 + %tmp75 = select i1 %tmp74, float %tmp61, float %tmp73, !dbg !104 + %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %tmp75, i32 3, !dbg !92 + %mul6 = fmul <4 x float> %vecinit6.i, , !dbg !105 + call void @llvm.dbg.value(metadata <4 x float> %mul6, metadata !34, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !106 + %mul7 = fmul <4 x float> %mul, , !dbg !107 + call void @llvm.dbg.value(metadata <4 x float> %mul7, metadata !35, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !108 + call void @llvm.dbg.value(metadata <4 x float> %mul7, metadata !36, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !109 + %div8 = fdiv <4 x float> , %mul7, !dbg !110, !fpmath !72 + call void @llvm.dbg.value(metadata <4 x float> %div8, metadata !37, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !111 + call void @llvm.dbg.value(metadata <4 x float> %mul6, metadata !38, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !112 + %div9 = fdiv <4 x float> , %mul6, !dbg !113, !fpmath !72 + call void @llvm.dbg.value(metadata <4 x float> %div9, metadata !39, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !114 + %sub10 = fsub <4 x float> %mul7, %div9, !dbg !115 + %sub11 = fsub <4 x float> %mul6, %div9, !dbg !116 + %div12 = fdiv <4 x float> %sub10, %sub11, !dbg !117, !fpmath !72 + call void @llvm.dbg.value(metadata <4 x float> %div12, metadata !40, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !118 + %mul14 = fmul <4 x float> %div8, %div12, !dbg !119 + call void @llvm.dbg.value(metadata <4 x float> %mul14, metadata !42, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !120 + %conv16 = uitofp i32 %tmp to float, !dbg !121 + %neg = fsub float -0.000000e+00, %conv, !dbg !122 + %tmp76 = tail call float @llvm.fmuladd.f32(float %conv16, float 2.000000e+00, float %neg), !dbg !122 + %splat.splatinsert19 = insertelement <4 x float> undef, float %tmp76, i32 0, !dbg !123 + %splat.splat20 = shufflevector <4 x float> %splat.splatinsert19, <4 x float> undef, <4 x i32> zeroinitializer, !dbg !123 + %mul21 = fmul <4 x float> %splat.splat20, %mul6, !dbg !124 + %neg23 = fsub <4 x float> , %tmp2, !dbg !125 + %tmp77 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %tmp1, <4 x float> %mul21, <4 x float> %neg23), !dbg !125 + call void @llvm.dbg.value(metadata <4 x float> %tmp77, metadata !44, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !126 + %tmp78 = extractelement <4 x float> %tmp77, i64 0, !dbg !127 + %cmp = fcmp ogt float %tmp78, 0.000000e+00, !dbg !128 + %cond = select i1 %cmp, float %tmp78, float 0.000000e+00, !dbg !127 + %arrayidx = getelementptr inbounds <4 x float>, <4 x float> addrspace(3)* %callA, i32 %tmp, !dbg !129 + %tmp79 = insertelement <4 x float> undef, float %cond, i64 0, !dbg !130 + %tmp80 = extractelement <4 x float> %tmp77, i64 1, !dbg !131 + %cmp25 = fcmp ogt float %tmp80, 0.000000e+00, !dbg !132 + %cond30 = select i1 %cmp25, float %tmp80, float 0.000000e+00, !dbg !131 + %tmp81 = insertelement <4 x float> %tmp79, float %cond30, i64 1, !dbg !133 + %tmp82 = extractelement <4 x float> %tmp77, i64 2, !dbg !134 + %cmp33 = fcmp ogt float %tmp82, 0.000000e+00, !dbg !135 + %cond38 = select i1 %cmp33, float %tmp82, float 0.000000e+00, !dbg !134 + %tmp83 = insertelement <4 x float> %tmp81, float %cond38, i64 2, !dbg !136 + %tmp84 = extractelement <4 x float> %tmp77, i64 3, !dbg !137 + %cmp41 = fcmp ogt float %tmp84, 0.000000e+00, !dbg !138 + %cond46 = select i1 %cmp41, float %tmp84, float 0.000000e+00, !dbg !137 + %tmp85 = insertelement <4 x float> %tmp83, float %cond46, i64 3, !dbg !139 + store <4 x float> %tmp85, <4 x float> addrspace(3)* %arrayidx, align 16, !dbg !139 + call void @llvm.dbg.value(metadata i32 %numSteps, metadata !45, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !141 + %cmp49110 = icmp sgt i32 %numSteps, 0, !dbg !142 + br i1 %cmp49110, label %for.body.lr.ph, label %for.cond.cleanup, !dbg !144 + +for.body.lr.ph: ; preds = %entry + %arrayidx57 = getelementptr inbounds <4 x float>, <4 x float> addrspace(3)* %callB, i32 %tmp + br label %for.body, !dbg !144 + +for.cond.cleanup: ; preds = %if.end, %entry + ret void, !dbg !145 + +for.body: ; preds = %if.end, %for.body.lr.ph + %j.0111 = phi i32 [ %numSteps, %for.body.lr.ph ], [ %sub58, %if.end ] + call void @llvm.dbg.value(metadata i32 %j.0111, metadata !45, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !141 + %cmp51 = icmp ult i32 %tmp, %j.0111, !dbg !146 + br i1 %cmp51, label %if.then, label %if.end, !dbg !149 + +if.then: ; preds = %for.body + %tmp86 = load <4 x float>, <4 x float> addrspace(3)* %arrayidx, align 16, !dbg !150, !tbaa !152 + %mul55 = fmul <4 x float> %mul14, %tmp86, !dbg !155 + store <4 x float> %mul55, <4 x float> addrspace(3)* %arrayidx57, align 16, !dbg !156, !tbaa !152 + br label %if.end, !dbg !157 + +if.end: ; preds = %if.then, %for.body + %sub58 = add nsw i32 %j.0111, -2, !dbg !159 + call void @llvm.dbg.value(metadata i32 %sub58, metadata !45, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !141 + %cmp49 = icmp sgt i32 %j.0111, 2, !dbg !142 + br i1 %cmp49, label %for.body, label %for.cond.cleanup, !dbg !144, !llvm.loop !160 +} + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +; Function Attrs: nounwind readnone speculatable +declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 + +; Function Attrs: nounwind readnone speculatable +declare float @llvm.fmuladd.f32(float, float, float) #1 + +; Function Attrs: convergent nounwind + +; Function Attrs: nounwind readnone speculatable +declare float @llvm.fma.f32(float, float, float) #1 + +; Function Attrs: nounwind readnone speculatable +declare float @llvm.amdgcn.rsq.f32(float) #1 + +; Function Attrs: nounwind readnone speculatable +declare i1 @llvm.amdgcn.class.f32(float, i32) #1 + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { convergent nounwind } +attributes #3 = { nounwind } +attributes #4 = { convergent nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!5, !6, !7} +!opencl.ocl.version = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 (https://github.com/llvm-mirror/clang.git c3671386f79520aa31e2ba9addcb2eb29733dec4) (https://github.com/llvm-mirror/llvm.git ff07d65649bda484686d12d9fc8df5738c659463)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3) +!1 = !DIFile(filename: "sched.cl", directory: "/tmp") +!2 = !{} +!3 = !{!4} +!4 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) +!5 = !{i32 2, !"Dwarf Version", i32 2} +!6 = !{i32 2, !"Debug Info Version", i32 3} +!7 = !{i32 1, !"wchar_size", i32 4} +!8 = !{i32 2, i32 0} +!11 = distinct !DISubprogram(name: "binomial_options", scope: !1, file: !1, line: 14, type: !12, isLocal: false, isDefinition: true, scopeLine: 21, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !22) +!12 = !DISubroutineType(types: !13) +!13 = !{null, !14, !15, !20, !21, !21} +!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!15 = !DIDerivedType(tag: DW_TAG_typedef, name: "float4", file: !16, line: 127, baseType: !17) +!16 = !DIFile(filename: "/tmp/opencl-c.h", directory: "/tmp") +!17 = !DICompositeType(tag: DW_TAG_array_type, baseType: !4, size: 128, flags: DIFlagVector, elements: !18) +!18 = !{!19} +!19 = !DISubrange(count: 4) +!20 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64) +!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 32, dwarfAddressSpace: 2) +!22 = !{!23, !24, !25, !26, !27, !28, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45} +!23 = !DILocalVariable(name: "numSteps", arg: 1, scope: !11, file: !1, line: 15, type: !14) +!24 = !DILocalVariable(name: "inRand", arg: 2, scope: !11, file: !1, line: 16, type: !15) +!25 = !DILocalVariable(name: "output", arg: 3, scope: !11, file: !1, line: 17, type: !20) +!26 = !DILocalVariable(name: "callA", arg: 4, scope: !11, file: !1, line: 18, type: !21) +!27 = !DILocalVariable(name: "callB", arg: 5, scope: !11, file: !1, line: 19, type: !21) +!28 = !DILocalVariable(name: "tid", scope: !11, file: !1, line: 22, type: !29) +!29 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!30 = !DILocalVariable(name: "s", scope: !11, file: !1, line: 24, type: !15) +!31 = !DILocalVariable(name: "x", scope: !11, file: !1, line: 25, type: !15) +!32 = !DILocalVariable(name: "optionYears", scope: !11, file: !1, line: 26, type: !15) +!33 = !DILocalVariable(name: "dt", scope: !11, file: !1, line: 27, type: !15) +!34 = !DILocalVariable(name: "vsdt", scope: !11, file: !1, line: 28, type: !15) +!35 = !DILocalVariable(name: "rdt", scope: !11, file: !1, line: 29, type: !15) +!36 = !DILocalVariable(name: "r", scope: !11, file: !1, line: 30, type: !15) +!37 = !DILocalVariable(name: "rInv", scope: !11, file: !1, line: 31, type: !15) +!38 = !DILocalVariable(name: "u", scope: !11, file: !1, line: 32, type: !15) +!39 = !DILocalVariable(name: "d", scope: !11, file: !1, line: 33, type: !15) +!40 = !DILocalVariable(name: "pu", scope: !11, file: !1, line: 34, type: !15) +!41 = !DILocalVariable(name: "pd", scope: !11, file: !1, line: 35, type: !15) +!42 = !DILocalVariable(name: "puByr", scope: !11, file: !1, line: 36, type: !15) +!43 = !DILocalVariable(name: "pdByr", scope: !11, file: !1, line: 37, type: !15) +!44 = !DILocalVariable(name: "profit", scope: !11, file: !1, line: 39, type: !15) +!45 = !DILocalVariable(name: "j", scope: !46, file: !1, line: 46, type: !14) +!46 = distinct !DILexicalBlock(scope: !11, file: !1, line: 46, column: 5) +!47 = !{i32 0, i32 0, i32 1, i32 3, i32 3} +!48 = !{!"none", !"none", !"none", !"none", !"none"} +!49 = !{!"int", !"float4", !"float4*", !"float4*", !"float4*"} +!50 = !{!"int", !"float __attribute__((ext_vector_type(4)))", !"float __attribute__((ext_vector_type(4)))*", !"float __attribute__((ext_vector_type(4)))*", !"float __attribute__((ext_vector_type(4)))*"} +!51 = !{!"", !"", !"", !"", !""} +!52 = !DILocation(line: 15, column: 9, scope: !11) +!53 = !DILocation(line: 16, column: 12, scope: !11) +!54 = !DILocation(line: 17, column: 22, scope: !11) +!55 = !DILocation(line: 18, column: 21, scope: !11) +!56 = !DILocation(line: 19, column: 21, scope: !11) +!57 = !DILocation(line: 22, column: 24, scope: !11) +!58 = !{i32 0, i32 1024} +!59 = !DILocation(line: 22, column: 18, scope: !11) +!60 = !DILocation(line: 24, column: 22, scope: !11) +!61 = !DILocation(line: 24, column: 48, scope: !11) +!62 = !DILocation(line: 24, column: 39, scope: !11) +!63 = !DILocation(line: 24, column: 12, scope: !11) +!64 = !DILocation(line: 25, column: 48, scope: !11) +!65 = !DILocation(line: 25, column: 39, scope: !11) +!66 = !DILocation(line: 25, column: 12, scope: !11) +!67 = !DILocation(line: 26, column: 59, scope: !11) +!68 = !DILocation(line: 26, column: 50, scope: !11) +!69 = !DILocation(line: 26, column: 12, scope: !11) +!70 = !DILocation(line: 27, column: 39, scope: !11) +!71 = !DILocation(line: 27, column: 37, scope: !11) +!72 = !{float 2.500000e+00} +!73 = !DILocation(line: 27, column: 31, scope: !11) +!74 = !DILocation(line: 27, column: 29, scope: !11) +!75 = !DILocation(line: 27, column: 12, scope: !11) +!76 = !DILocalVariable(name: "x", arg: 1, scope: !77, file: !1, line: 8, type: !15) +!77 = distinct !DISubprogram(name: "sqrt4", scope: !1, file: !1, line: 8, type: !78, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !80) +!78 = !DISubroutineType(types: !79) +!79 = !{!15, !15} +!80 = !{!76} +!81 = !DILocation(line: 8, column: 21, scope: !77, inlinedAt: !82) +!82 = distinct !DILocation(line: 28, column: 32, scope: !11) +!83 = !DILocation(line: 9, column: 26, scope: !77, inlinedAt: !82) +!84 = !DILocalVariable(name: "x", arg: 1, scope: !85, file: !1, line: 4, type: !4) +!85 = distinct !DISubprogram(name: "mysqrt", scope: !1, file: !1, line: 4, type: !86, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !88) +!86 = !DISubroutineType(types: !87) +!87 = !{!4, !4} +!88 = !{!84} +!89 = !DILocation(line: 4, column: 20, scope: !85, inlinedAt: !90) +!90 = distinct !DILocation(line: 9, column: 19, scope: !77, inlinedAt: !82) +!91 = !DILocation(line: 5, column: 10, scope: !85, inlinedAt: !90) +!92 = !DILocation(line: 9, column: 18, scope: !77, inlinedAt: !82) +!93 = !DILocation(line: 9, column: 39, scope: !77, inlinedAt: !82) +!94 = !DILocation(line: 4, column: 20, scope: !85, inlinedAt: !95) +!95 = distinct !DILocation(line: 9, column: 32, scope: !77, inlinedAt: !82) +!96 = !DILocation(line: 5, column: 10, scope: !85, inlinedAt: !95) +!97 = !DILocation(line: 9, column: 52, scope: !77, inlinedAt: !82) +!98 = !DILocation(line: 4, column: 20, scope: !85, inlinedAt: !99) +!99 = distinct !DILocation(line: 9, column: 45, scope: !77, inlinedAt: !82) +!100 = !DILocation(line: 5, column: 10, scope: !85, inlinedAt: !99) +!101 = !DILocation(line: 9, column: 65, scope: !77, inlinedAt: !82) +!102 = !DILocation(line: 4, column: 20, scope: !85, inlinedAt: !103) +!103 = distinct !DILocation(line: 9, column: 58, scope: !77, inlinedAt: !82) +!104 = !DILocation(line: 5, column: 10, scope: !85, inlinedAt: !103) +!105 = !DILocation(line: 28, column: 30, scope: !11) +!106 = !DILocation(line: 28, column: 12, scope: !11) +!107 = !DILocation(line: 29, column: 27, scope: !11) +!108 = !DILocation(line: 29, column: 12, scope: !11) +!109 = !DILocation(line: 30, column: 12, scope: !11) +!110 = !DILocation(line: 31, column: 24, scope: !11) +!111 = !DILocation(line: 31, column: 12, scope: !11) +!112 = !DILocation(line: 32, column: 12, scope: !11) +!113 = !DILocation(line: 33, column: 21, scope: !11) +!114 = !DILocation(line: 33, column: 12, scope: !11) +!115 = !DILocation(line: 34, column: 20, scope: !11) +!116 = !DILocation(line: 34, column: 28, scope: !11) +!117 = !DILocation(line: 34, column: 24, scope: !11) +!118 = !DILocation(line: 34, column: 12, scope: !11) +!119 = !DILocation(line: 36, column: 23, scope: !11) +!120 = !DILocation(line: 36, column: 12, scope: !11) +!121 = !DILocation(line: 39, column: 41, scope: !11) +!122 = !DILocation(line: 39, column: 45, scope: !11) +!123 = !DILocation(line: 39, column: 33, scope: !11) +!124 = !DILocation(line: 39, column: 31, scope: !11) +!125 = !DILocation(line: 39, column: 65, scope: !11) +!126 = !DILocation(line: 39, column: 12, scope: !11) +!127 = !DILocation(line: 40, column: 20, scope: !11) +!128 = !DILocation(line: 40, column: 29, scope: !11) +!129 = !DILocation(line: 40, column: 5, scope: !11) +!130 = !DILocation(line: 40, column: 18, scope: !11) +!131 = !DILocation(line: 41, column: 20, scope: !11) +!132 = !DILocation(line: 41, column: 29, scope: !11) +!133 = !DILocation(line: 41, column: 18, scope: !11) +!134 = !DILocation(line: 42, column: 20, scope: !11) +!135 = !DILocation(line: 42, column: 29, scope: !11) +!136 = !DILocation(line: 42, column: 18, scope: !11) +!137 = !DILocation(line: 43, column: 20, scope: !11) +!138 = !DILocation(line: 43, column: 29, scope: !11) +!139 = !DILocation(line: 43, column: 18, scope: !11) +!140 = !DILocation(line: 45, column: 5, scope: !11) +!141 = !DILocation(line: 46, column: 13, scope: !46) +!142 = !DILocation(line: 46, column: 29, scope: !143) +!143 = distinct !DILexicalBlock(scope: !46, file: !1, line: 46, column: 5) +!144 = !DILocation(line: 46, column: 5, scope: !46) +!145 = !DILocation(line: 54, column: 1, scope: !11) +!146 = !DILocation(line: 48, column: 16, scope: !147) +!147 = distinct !DILexicalBlock(scope: !148, file: !1, line: 48, column: 12) +!148 = distinct !DILexicalBlock(scope: !143, file: !1, line: 47, column: 5) +!149 = !DILocation(line: 48, column: 12, scope: !148) +!150 = !DILocation(line: 50, column: 34, scope: !151) +!151 = distinct !DILexicalBlock(scope: !147, file: !1, line: 49, column: 9) +!152 = !{!153, !153, i64 0} +!153 = !{!"omnipotent char", !154, i64 0} +!154 = !{!"Simple C/C++ TBAA"} +!155 = !DILocation(line: 50, column: 32, scope: !151) +!156 = !DILocation(line: 50, column: 24, scope: !151) +!157 = !DILocation(line: 51, column: 9, scope: !151) +!158 = !DILocation(line: 52, column: 9, scope: !148) +!159 = !DILocation(line: 46, column: 36, scope: !143) +!160 = distinct !{!160, !144, !161} +!161 = !DILocation(line: 53, column: 5, scope: !46)