Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -1072,6 +1072,16 @@ assert(BValNo && "All sublanes should be live"); LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints); BValNo->markUnused(); + // If any endpoint is the copy itself, meaning it was dead in this lane, + // then remove it. + for (unsigned I = 0; I != EndPoints.size(); ) { + if (!EndPoints[I].getInstrDistance(CopyIdx)) { + EndPoints[I] = EndPoints.back(); + EndPoints.pop_back(); + continue; + } + ++I; + } LIS->extendToIndices(SR, EndPoints); } // If any dead defs were extended, truncate them. Index: test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/regcoalescing-remove-partial-redundancy-assert.mir @@ -0,0 +1,638 @@ +# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s +# +# This test gave "Use not jointly dominated by defs" when +# removePartialRedundancy attempted to prune and then re-extend a subrange. +# +# GCN: {{^body}} + +--- | + ; ModuleID = '' + source_filename = "llpcPipeline" + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" + target triple = "amdgcn--amdpal" + + ; Function Attrs: nounwind readnone + declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i1) #0 + + ; Function Attrs: nounwind + define dllexport amdgpu_ps void @_amdgpu_ps_main() local_unnamed_addr #1 !spirv.ExecutionModel !1 { + .entry: + %tmp = call <2 x float> @llvm.trunc.v2f32(<2 x float> undef) + %tmp3 = fptoui <2 x float> %tmp to <2 x i32> + %tmp4 = lshr <2 x i32> %tmp3, + %tmp5 = shufflevector <2 x i32> %tmp4, <2 x i32> undef, <4 x i32> + %tmp6 = shufflevector <4 x i32> undef, <4 x i32> %tmp5, <4 x i32> + %__llpc_global_proxy_r2.0.vec.insert = insertelement <4 x i32> %tmp6, i32 undef, i32 0 + %__llpc_global_proxy_r2.8.vec.insert = insertelement <4 x i32> %__llpc_global_proxy_r2.0.vec.insert, i32 undef, i32 2 + %tmp8 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 3044, i1 false) #5 + %tmp9 = icmp eq i32 %tmp8, 0 + br i1 %tmp9, label %bb57, label %bb10, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb10: ; preds = %.entry + br i1 undef, label %bb12, label %bb11, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb11: ; preds = %bb10 + %__llpc_global_proxy_r2.12.vec.insert961 = insertelement <4 x i32> %__llpc_global_proxy_r2.8.vec.insert, i32 undef, i32 3 + br label %bb12, !structurizecfg.uniform !2 + + bb12: ; preds = %bb10, %bb11 + %__llpc_global_proxy_r2.0 = phi <4 x i32> [ %__llpc_global_proxy_r2.12.vec.insert961, %bb11 ], [ %__llpc_global_proxy_r2.8.vec.insert, %bb10 ] + %__llpc_global_proxy_r2.12.vec.insert951 = insertelement <4 x i32> %__llpc_global_proxy_r2.0, i32 0, i32 3 + %__llpc_global_proxy_r2.12.vec.insert953 = insertelement <4 x i32> %__llpc_global_proxy_r2.12.vec.insert951, i32 undef, i32 3 + br i1 undef, label %.lr.ph2435, label %._crit_edge2436, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + .lr.ph2435: ; preds = %bb12 + %tmp13 = shufflevector <4 x i32> %__llpc_global_proxy_r2.12.vec.insert953, <4 x i32> undef, <3 x i32> + %tmp14 = bitcast <3 x i32> %tmp13 to <3 x float> + %bc2311 = bitcast <4 x i32> %__llpc_global_proxy_r2.0 to <4 x float> + %tmp16 = extractelement <4 x float> %bc2311, i32 1 + %tmp17 = fmul reassoc nnan arcp contract <3 x float> zeroinitializer, %tmp14 + %tmp18 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 2708, i1 false) #5 + %tmp30 = fmul reassoc nnan arcp contract float %tmp16, 0.000000e+00 + %tmp31 = bitcast float %tmp30 to i32 + %__llpc_global_proxy_r14.0.vec.insert = insertelement <4 x i32> undef, i32 %tmp31, i32 0 + %tmp32 = shufflevector <4 x i32> %__llpc_global_proxy_r14.0.vec.insert, <4 x i32> undef, <3 x i32> zeroinitializer + %tmp33 = bitcast <3 x i32> %tmp32 to <3 x float> + %tmp34 = fadd reassoc nnan arcp contract <3 x float> zeroinitializer, %tmp33 + %tmp35 = fadd reassoc nnan arcp contract <3 x float> %tmp17, zeroinitializer + %y2.i463 = extractelement <3 x float> %tmp35, i32 2 + %tmp36 = fmul float 0.000000e+00, %y2.i463 + %tmp37 = call float @llvm.amdgcn.fmed3.f32(float %tmp36, float 0.000000e+00, float 1.000000e+00) #5 + %tmp38 = insertelement <3 x float> undef, float %tmp37, i32 2 + %tmp39 = fmul reassoc nnan arcp contract <3 x float> %tmp38, zeroinitializer + %tmp40 = fmul reassoc nnan arcp contract <3 x float> %tmp39, + %tmp41 = fmul reassoc nnan arcp contract <3 x float> %tmp40, zeroinitializer + %tmp42 = fmul reassoc nnan arcp contract <3 x float> zeroinitializer, %tmp34 + %tmp43 = fmul reassoc nnan arcp contract <3 x float> %tmp42, zeroinitializer + %tmp44 = fmul reassoc nnan arcp contract <3 x float> %tmp41, zeroinitializer + %tmp45 = fadd reassoc nnan arcp contract <3 x float> %tmp44, %tmp43 + %tmp52 = icmp ult i32 0, %tmp18 + br label %bb19, !structurizecfg.uniform !2 + + bb19: ; preds = %bb19, %.lr.ph2435 + %__llpc_global_proxy_r13.02429 = phi <4 x i32> [ undef, %.lr.ph2435 ], [ %tmp51, %bb19 ] + %__llpc_global_proxy_r13.12.vec.insert1895 = insertelement <4 x i32> %__llpc_global_proxy_r13.02429, i32 0, i32 3 + %tmp46 = shufflevector <4 x i32> %__llpc_global_proxy_r13.12.vec.insert1895, <4 x i32> undef, <3 x i32> + %tmp47 = bitcast <3 x i32> %tmp46 to <3 x float> + %tmp48 = fadd reassoc nnan arcp contract <3 x float> %tmp45, %tmp47 + %tmp49 = bitcast <3 x float> %tmp48 to <3 x i32> + %tmp50 = shufflevector <3 x i32> %tmp49, <3 x i32> undef, <4 x i32> + %tmp51 = shufflevector <4 x i32> %tmp50, <4 x i32> %__llpc_global_proxy_r13.12.vec.insert1895, <4 x i32> + br i1 %tmp52, label %bb19, label %._crit_edge2436, !llvm.loop !3, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + ._crit_edge2436: ; preds = %bb19, %bb12 + %tmp15 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 2704, i1 false) #5 + %__llpc_global_proxy_r10.12.vec.insert1492 = insertelement <4 x i32> undef, i32 %tmp15, i32 3 + %tmp53 = shufflevector <4 x i32> %__llpc_global_proxy_r2.12.vec.insert953, <4 x i32> undef, <3 x i32> + br label %bb57, !structurizecfg.uniform !2 + + bb57: ; preds = %._crit_edge2436, %.entry + %__llpc_global_proxy_r10.2 = phi <4 x i32> [ undef, %.entry ], [ %__llpc_global_proxy_r10.12.vec.insert1492, %._crit_edge2436 ] + %__llpc_global_proxy_r2.1 = phi <4 x i32> [ %__llpc_global_proxy_r2.8.vec.insert, %.entry ], [ %__llpc_global_proxy_r2.12.vec.insert953, %._crit_edge2436 ] + br i1 undef, label %bb72, label %bb58, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb58: ; preds = %bb57 + br i1 undef, label %bb64, label %bb59, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb59: ; preds = %bb58 + br label %bb64, !structurizecfg.uniform !2 + + bb64: ; preds = %bb59, %bb58 + br i1 undef, label %.lr.ph2410, label %bb72, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + .lr.ph2410: ; preds = %bb64 + br i1 undef, label %.lr.ph2381, label %._crit_edge2382, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + .lr.ph2381: ; preds = %.lr.ph2410 + br i1 undef, label %bb71, label %bb67, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb67: ; preds = %.lr.ph2381 + br label %bb71, !structurizecfg.uniform !2 + + bb71: ; preds = %bb67, %.lr.ph2381 + br label %._crit_edge2382, !structurizecfg.uniform !2 + + ._crit_edge2382: ; preds = %bb71, %.lr.ph2410 + br label %bb72, !structurizecfg.uniform !2 + + bb72: ; preds = %._crit_edge2382, %bb64, %bb57 + br i1 undef, label %bb84, label %bb73, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb73: ; preds = %bb72 + br i1 undef, label %bb84, label %.lr.ph2364, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + .lr.ph2364: ; preds = %bb73 + br i1 undef, label %._crit_edge2345, label %.lr.ph2344, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + .lr.ph2344: ; preds = %.lr.ph2364 + br i1 undef, label %bb77, label %bb83, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb77: ; preds = %.lr.ph2344 + br i1 undef, label %bb79, label %bb83, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb79: ; preds = %bb77 + br label %bb83, !structurizecfg.uniform !2 + + bb83: ; preds = %bb79, %bb77, %.lr.ph2344 + br label %._crit_edge2345, !structurizecfg.uniform !2 + + ._crit_edge2345: ; preds = %bb83, %.lr.ph2364 + br label %bb84, !structurizecfg.uniform !2 + + bb84: ; preds = %._crit_edge2345, %bb73, %bb72 + br i1 undef, label %bb126, label %bb85, !amdgpu.uniform !2 + + bb85: ; preds = %bb84 + br i1 undef, label %bb126, label %.lr.ph2332, !amdgpu.uniform !2 + + .lr.ph2332: ; preds = %bb85 + %__llpc_global_proxy_r2.0.vec.insert905 = insertelement <4 x i32> %__llpc_global_proxy_r2.1, i32 undef, i32 0 + %__llpc_global_proxy_r2.12.vec.insert943 = insertelement <4 x i32> %__llpc_global_proxy_r2.0.vec.insert905, i32 undef, i32 3 + %tmp89 = shufflevector <4 x i32> %__llpc_global_proxy_r2.12.vec.insert943, <4 x i32> undef, <3 x i32> + %tmp90 = bitcast <3 x i32> %tmp89 to <3 x float> + %tmp91 = fmul reassoc nnan arcp contract <3 x float> zeroinitializer, %tmp90 + %tmp92 = fadd reassoc nnan arcp contract <3 x float> %tmp91, + %tmp93 = shufflevector <4 x i32> , <4 x i32> %__llpc_global_proxy_r10.2, <4 x i32> + %tmp94 = fmul reassoc nnan arcp contract <3 x float> %tmp92, %tmp92 + %tmp95 = bitcast <3 x float> %tmp94 to <3 x i32> + %tmp96 = shufflevector <3 x i32> %tmp95, <3 x i32> undef, <4 x i32> + %tmp97 = shufflevector <4 x i32> %tmp96, <4 x i32> undef, <4 x i32> + br label %bb98 + + bb98: ; preds = %._crit_edge, %.lr.ph2332 + %__llpc_global_proxy_r9.62327 = phi <4 x i32> [ %tmp97, %.lr.ph2332 ], [ %__llpc_global_proxy_r9.12.vec.insert1453, %._crit_edge ] + %__llpc_global_proxy_r10.92326 = phi <4 x i32> [ %tmp93, %.lr.ph2332 ], [ %tmp124, %._crit_edge ] + %__llpc_global_proxy_r10.12.vec.insert1494 = insertelement <4 x i32> %__llpc_global_proxy_r10.92326, i32 undef, i32 3 + %__llpc_global_proxy_r11.12.vec.insert1563 = shufflevector <4 x i32> %__llpc_global_proxy_r10.12.vec.insert1494, <4 x i32> zeroinitializer, <4 x i32> + br i1 undef, label %._crit_edge, label %.lr.ph, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + .lr.ph: ; preds = %bb98 + br i1 true, label %._crit_edge, label %DummyReturnBlock, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + ._crit_edge: ; preds = %.lr.ph, %bb98 + %__llpc_global_proxy_r11.10.lcssa = phi <4 x i32> [ %__llpc_global_proxy_r11.12.vec.insert1563, %bb98 ], [ , %.lr.ph ] + %__llpc_global_proxy_r9.12.vec.insert1453 = shufflevector <4 x i32> %__llpc_global_proxy_r9.62327, <4 x i32> zeroinitializer, <4 x i32> + %tmp124 = shufflevector <4 x i32> %__llpc_global_proxy_r11.10.lcssa, <4 x i32> %__llpc_global_proxy_r10.12.vec.insert1494, <4 x i32> + br label %bb98, !structurizecfg.uniform !2 + + bb126: ; preds = %bb85, %bb84 + ret void + + DummyReturnBlock: ; preds = %.lr.ph + %tmp99 = shufflevector <4 x i32> %__llpc_global_proxy_r9.62327, <4 x i32> undef, <3 x i32> + %tmp100 = bitcast <3 x i32> %tmp99 to <3 x float> + ret void + } + + ; Function Attrs: nounwind readnone speculatable + declare float @llvm.amdgcn.fmed3.f32(float, float, float) #2 + + ; Function Attrs: nounwind readnone speculatable + declare <2 x float> @llvm.trunc.v2f32(<2 x float>) #2 + + ; Function Attrs: convergent nounwind + declare { i1, i64 } @llvm.amdgcn.if(i1) #3 + + ; Function Attrs: convergent nounwind + declare { i1, i64 } @llvm.amdgcn.else(i64) #3 + + ; Function Attrs: convergent nounwind readnone + declare i64 @llvm.amdgcn.break(i64) #4 + + ; Function Attrs: convergent nounwind readnone + declare i64 @llvm.amdgcn.if.break(i1, i64) #4 + + ; Function Attrs: convergent nounwind readnone + declare i64 @llvm.amdgcn.else.break(i64, i64) #4 + + ; Function Attrs: convergent nounwind + declare i1 @llvm.amdgcn.loop(i64) #3 + + ; Function Attrs: convergent nounwind + declare void @llvm.amdgcn.end.cf(i64) #3 + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #5 + + attributes #0 = { nounwind readnone "target-cpu"="gfx803" } + attributes #1 = { nounwind "InitialPSInputAddr"="3841" "target-cpu"="gfx803" } + attributes #2 = { nounwind readnone speculatable "target-cpu"="gfx803" } + attributes #3 = { convergent nounwind } + attributes #4 = { convergent nounwind readnone } + attributes #5 = { nounwind } + + !amdgpu.pal.metadata = !{!0} + + !0 = !{i32 268435482, i32 7, i32 268435488, i32 -1, i32 268435480, i32 512185805, i32 268435481, i32 1750951361, i32 268435538, i32 4096, i32 268435539, i32 8192, i32 11338, i32 53215232, i32 11339, i32 20, i32 41411, i32 4, i32 41393, i32 6, i32 41479, i32 0, i32 41476, i32 17301504, i32 41478, i32 1087, i32 41721, i32 45, i32 41633, i32 0, i32 41645, i32 0, i32 41750, i32 14, i32 268435528, i32 0, i32 268435493, i32 0, i32 268435500, i32 0, i32 268435507, i32 256, i32 268435514, i32 104, i32 268435536, i32 0, i32 11274, i32 2883584, i32 11275, i32 6, i32 41412, i32 0, i32 41413, i32 4, i32 41400, i32 16908288, i32 41398, i32 4, i32 41395, i32 0, i32 41396, i32 0, i32 41397, i32 0, i32 41619, i32 100860300, i32 41475, i32 4112, i32 41103, i32 15, i32 268435485, i32 0, i32 268435529, i32 0, i32 268435494, i32 0, i32 268435501, i32 0, i32 268435508, i32 256, i32 268435515, i32 104, i32 41685, i32 0, i32 268435460, i32 1242857205, i32 268435461, i32 -892218289, i32 268435476, i32 264355721, i32 268435477, i32 2121436645, i32 268435532, i32 7, i32 41642, i32 127, i32 11348, i32 268435459, i32 11349, i32 268435460, i32 11340, i32 268435456, i32 11342, i32 0, i32 11343, i32 1, i32 11344, i32 2, i32 11345, i32 3, i32 11346, i32 4, i32 11347, i32 6, i32 41361, i32 0, i32 41362, i32 1, i32 41363, i32 2, i32 41364, i32 3, i32 11276, i32 268435456, i32 11278, i32 5} + !1 = !{i32 4} + !2 = !{} + !3 = distinct !{!3} + +... +--- +name: _amdgpu_ps_main +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_128, preferred-register: '' } + - { id: 1, class: sreg_128, preferred-register: '' } + - { id: 2, class: sreg_128, preferred-register: '' } + - { id: 3, class: sreg_128, preferred-register: '' } + - { id: 4, class: vreg_128, preferred-register: '' } + - { id: 5, class: vgpr_32, preferred-register: '' } + - { id: 6, class: sreg_128, preferred-register: '' } + - { id: 7, class: sreg_128, preferred-register: '' } + - { id: 8, class: sreg_128, preferred-register: '' } + - { id: 9, class: sreg_128, preferred-register: '' } + - { id: 10, class: sreg_128, preferred-register: '' } + - { id: 11, class: sreg_128, preferred-register: '' } + - { id: 12, class: sreg_128, preferred-register: '' } + - { id: 13, class: sreg_128, preferred-register: '' } + - { id: 14, class: sreg_128, preferred-register: '' } + - { id: 15, class: sreg_128, preferred-register: '' } + - { id: 16, class: sreg_128, preferred-register: '' } + - { id: 17, class: sreg_128, preferred-register: '' } + - { id: 18, class: sreg_128, preferred-register: '' } + - { id: 19, class: sreg_128, preferred-register: '' } + - { id: 20, class: sreg_128, preferred-register: '' } + - { id: 21, class: vgpr_32, preferred-register: '' } + - { id: 22, class: vgpr_32, preferred-register: '' } + - { id: 23, class: vgpr_32, preferred-register: '' } + - { id: 24, class: sreg_32_xm0, preferred-register: '' } + - { id: 25, class: sreg_32_xm0, preferred-register: '' } + - { id: 26, class: sreg_32, preferred-register: '' } + - { id: 27, class: sreg_128, preferred-register: '' } + - { id: 28, class: sreg_32_xm0_xexec, preferred-register: '' } + - { id: 29, class: sreg_128, preferred-register: '' } + - { id: 30, class: sreg_128, preferred-register: '' } + - { id: 31, class: sreg_32_xm0, preferred-register: '' } + - { id: 32, class: sreg_32_xm0, preferred-register: '' } + - { id: 33, class: sreg_128, preferred-register: '' } + - { id: 34, class: sreg_32_xm0, preferred-register: '' } + - { id: 35, class: vgpr_32, preferred-register: '' } + - { id: 36, class: sreg_32_xm0_xexec, preferred-register: '' } + - { id: 37, class: sreg_128, preferred-register: '' } + - { id: 38, class: vgpr_32, preferred-register: '' } + - { id: 39, class: vgpr_32, preferred-register: '' } + - { id: 40, class: vgpr_32, preferred-register: '' } + - { id: 41, class: vgpr_32, preferred-register: '' } + - { id: 42, class: vgpr_32, preferred-register: '' } + - { id: 43, class: vgpr_32, preferred-register: '' } + - { id: 44, class: vgpr_32, preferred-register: '' } + - { id: 45, class: sreg_32, preferred-register: '' } + - { id: 46, class: vgpr_32, preferred-register: '' } + - { id: 47, class: vgpr_32, preferred-register: '' } + - { id: 48, class: sreg_128, preferred-register: '' } + - { id: 49, class: vgpr_32, preferred-register: '' } + - { id: 50, class: sreg_64_xexec, preferred-register: '' } + - { id: 51, class: sreg_128, preferred-register: '' } + - { id: 52, class: sreg_32_xm0, preferred-register: '' } + - { id: 53, class: sreg_128, preferred-register: '' } + - { id: 54, class: sgpr_32, preferred-register: '' } + - { id: 55, class: vgpr_32, preferred-register: '' } + - { id: 56, class: vgpr_32, preferred-register: '' } + - { id: 57, class: sgpr_32, preferred-register: '' } + - { id: 58, class: vgpr_32, preferred-register: '' } + - { id: 59, class: vgpr_32, preferred-register: '' } + - { id: 60, class: sgpr_32, preferred-register: '' } + - { id: 61, class: vgpr_32, preferred-register: '' } + - { id: 62, class: vgpr_32, preferred-register: '' } + - { id: 63, class: sreg_128, preferred-register: '' } + - { id: 64, class: sreg_64, preferred-register: '' } + - { id: 65, class: sreg_64, preferred-register: '' } + - { id: 66, class: sreg_32_xm0_xexec, preferred-register: '' } + - { id: 67, class: sreg_128, preferred-register: '' } + - { id: 68, class: sreg_128, preferred-register: '' } + - { id: 69, class: sreg_32_xm0, preferred-register: '' } + - { id: 70, class: sreg_32_xm0, preferred-register: '' } + - { id: 71, class: sreg_32_xm0, preferred-register: '' } + - { id: 72, class: sreg_32_xm0, preferred-register: '' } + - { id: 73, class: sreg_32_xm0, preferred-register: '' } + - { id: 74, class: sreg_32_xm0, preferred-register: '' } + - { id: 75, class: sreg_32_xm0, preferred-register: '' } + - { id: 76, class: vgpr_32, preferred-register: '' } + - { id: 77, class: vgpr_32, preferred-register: '' } + - { id: 78, class: vgpr_32, preferred-register: '' } + - { id: 79, class: sreg_32_xm0, preferred-register: '' } + - { id: 80, class: sreg_32_xm0, preferred-register: '' } + - { id: 81, class: sreg_128, preferred-register: '' } + - { id: 82, class: vgpr_32, preferred-register: '' } + - { id: 83, class: sreg_32_xm0, preferred-register: '' } + - { id: 84, class: sreg_128, preferred-register: '' } + - { id: 85, class: sreg_32_xm0, preferred-register: '' } + - { id: 86, class: sreg_32_xm0, preferred-register: '' } + - { id: 87, class: sreg_32_xm0, preferred-register: '' } + - { id: 88, class: sreg_32_xm0, preferred-register: '' } + - { id: 89, class: sreg_128, preferred-register: '' } + - { id: 90, class: sreg_128, preferred-register: '' } + - { id: 91, class: sreg_32_xm0, preferred-register: '' } + - { id: 92, class: sreg_128, preferred-register: '' } + - { id: 93, class: sreg_32_xm0, preferred-register: '' } + - { id: 94, class: sreg_32_xm0, preferred-register: '' } + - { id: 95, class: sreg_64, preferred-register: '' } + - { id: 96, class: sreg_64, preferred-register: '' } + - { id: 97, class: sreg_32_xm0, preferred-register: '' } + - { id: 98, class: sreg_32_xm0, preferred-register: '' } + - { id: 99, class: sreg_32_xm0, preferred-register: '' } + - { id: 100, class: sreg_32_xm0, preferred-register: '' } + - { id: 101, class: sreg_128, preferred-register: '' } + - { id: 102, class: sreg_32_xm0, preferred-register: '' } + - { id: 103, class: sreg_32_xm0, preferred-register: '' } + - { id: 104, class: sreg_32_xm0, preferred-register: '' } + - { id: 105, class: sreg_32_xm0, preferred-register: '' } + - { id: 106, class: sreg_128, preferred-register: '' } + - { id: 107, class: sreg_64_xexec, preferred-register: '' } + - { id: 108, class: vgpr_32, preferred-register: '' } + - { id: 109, class: vreg_128, preferred-register: '' } + - { id: 110, class: vreg_128, preferred-register: '' } + - { id: 111, class: vreg_128, preferred-register: '' } + - { id: 112, class: vgpr_32, preferred-register: '' } + - { id: 113, class: vreg_128, preferred-register: '' } + - { id: 114, class: vgpr_32, preferred-register: '' } + - { id: 115, class: vreg_128, preferred-register: '' } + - { id: 116, class: vgpr_32, preferred-register: '' } + - { id: 117, class: vreg_128, preferred-register: '' } + - { id: 118, class: vgpr_32, preferred-register: '' } + - { id: 119, class: vreg_128, preferred-register: '' } + - { id: 120, class: vreg_128, preferred-register: '' } + - { id: 121, class: vreg_128, preferred-register: '' } + - { id: 122, class: vgpr_32, preferred-register: '' } + - { id: 123, class: vgpr_32, preferred-register: '' } + - { id: 124, class: vgpr_32, preferred-register: '' } + - { id: 125, class: vreg_128, preferred-register: '' } + - { id: 126, class: vgpr_32, preferred-register: '' } + - { id: 127, class: vreg_128, preferred-register: '' } + - { id: 128, class: vreg_128, preferred-register: '' } + - { id: 129, class: vgpr_32, preferred-register: '' } + - { id: 130, class: vreg_128, preferred-register: '' } + - { id: 131, class: vgpr_32, preferred-register: '' } + - { id: 132, class: vgpr_32, preferred-register: '' } + - { id: 133, class: vgpr_32, preferred-register: '' } + - { id: 134, class: vgpr_32, preferred-register: '' } + - { id: 135, class: vgpr_32, preferred-register: '' } + - { id: 136, class: vreg_128, preferred-register: '' } + - { id: 137, class: vreg_128, preferred-register: '' } + - { id: 138, class: vreg_128, preferred-register: '' } + - { id: 139, class: vreg_128, preferred-register: '' } +liveins: +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0..entry: + successors: %bb.1(0x30000000), %bb.2(0x50000000) + + %21:vgpr_32 = V_TRUNC_F32_e32 undef %22:vgpr_32, implicit $exec + %23:vgpr_32 = V_CVT_U32_F32_e32 killed %21, implicit $exec + %108:vgpr_32 = V_LSHRREV_B32_e32 4, killed %23, implicit $exec + undef %109.sub1:vreg_128 = COPY %108 + %28:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %29:sreg_128, 3044, 0 :: (dereferenceable invariant load 4) + S_CMP_EQ_U32 killed %28, 0, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit killed $scc + + bb.1: + successors: %bb.8(0x80000000) + + %138:vreg_128 = COPY killed %109 + S_BRANCH %bb.8 + + bb.2.bb10: + successors: %bb.34(0x40000000), %bb.3(0x40000000) + + S_CBRANCH_SCC0 %bb.3, implicit undef $scc + + bb.34: + successors: %bb.4(0x80000000) + + %136:vreg_128 = COPY killed %109 + S_BRANCH %bb.4 + + bb.3.bb11: + successors: %bb.4(0x80000000) + + %136:vreg_128 = COPY killed %109 + + bb.4.bb12: + successors: %bb.5(0x40000000), %bb.7(0x40000000) + + %110:vreg_128 = COPY killed %136 + dead %32:sreg_32_xm0 = S_MOV_B32 0 + %111:vreg_128 = COPY %110 + %111.sub3:vreg_128 = COPY undef %32 + S_CBRANCH_SCC1 %bb.7, implicit undef $scc + S_BRANCH %bb.5 + + bb.5..lr.ph2435: + successors: %bb.6(0x80000000) + + %36:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %37:sreg_128, 2708, 0 :: (dereferenceable invariant load 4) + %39:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, killed %110.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $exec + %40:vgpr_32 = V_MAD_F32 0, %111.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 0, 0, 0, implicit $exec + %41:vgpr_32 = V_MUL_F32_e64 0, 0, 0, killed %40, 1, 0, implicit $exec + %43:vgpr_32 = V_MUL_F32_e32 0, %39, implicit $exec + %44:vgpr_32 = COPY killed %43 + %44:vgpr_32 = V_MAC_F32_e32 0, killed %41, %44, implicit $exec + %47:vgpr_32 = V_MOV_B32_e32 2143289344, implicit $exec + %46:vgpr_32 = COPY killed %47 + %46:vgpr_32 = V_MAC_F32_e32 0, killed %39, %46, implicit $exec + undef %115.sub0:vreg_128 = COPY %46 + %115.sub1:vreg_128 = COPY killed %46 + %115.sub2:vreg_128 = COPY killed %44 + %50:sreg_64_xexec = V_CMP_NE_U32_e64 0, killed %36, implicit $exec + dead %118:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %137:vreg_128 = IMPLICIT_DEF + + bb.6.bb19: + successors: %bb.6(0x7c000000), %bb.7(0x04000000) + + %119:vreg_128 = COPY killed %137 + %121:vreg_128 = COPY killed %119 + %121.sub3:vreg_128 = COPY undef %32 + %56:vgpr_32 = V_ADD_F32_e32 %115.sub2, %121.sub2, implicit $exec + %59:vgpr_32 = V_ADD_F32_e32 %115.sub1, %121.sub1, implicit $exec + %62:vgpr_32 = V_ADD_F32_e32 %115.sub0, killed %121.sub0, implicit $exec + undef %117.sub0:vreg_128 = COPY killed %62 + %117.sub1:vreg_128 = COPY killed %59 + %117.sub2:vreg_128 = COPY killed %56 + %64:sreg_64 = S_AND_B64 $exec, %50, implicit-def dead $scc + $vcc = COPY killed %64 + %137:vreg_128 = COPY killed %117 + S_CBRANCH_VCCNZ %bb.6, implicit killed $vcc + S_BRANCH %bb.7 + + bb.7.._crit_edge2436: + successors: %bb.8(0x80000000) + + dead %66:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %67:sreg_128, 2704, 0 :: (dereferenceable invariant load 4) + %138:vreg_128 = COPY killed %111 + + bb.8.bb57: + successors: %bb.17(0x40000000), %bb.9(0x40000000) + + %113:vreg_128 = COPY killed %138 + S_CBRANCH_SCC1 %bb.17, implicit undef $scc + S_BRANCH %bb.9 + + bb.9.bb58: + successors: %bb.11(0x40000000), %bb.10(0x40000000) + + S_CBRANCH_SCC1 %bb.11, implicit undef $scc + S_BRANCH %bb.10 + + bb.10.bb59: + successors: %bb.11(0x80000000) + + + bb.11.bb64: + successors: %bb.12(0x40000000), %bb.17(0x40000000) + + S_CBRANCH_SCC1 %bb.17, implicit undef $scc + S_BRANCH %bb.12 + + bb.12..lr.ph2410: + successors: %bb.13(0x40000000), %bb.16(0x40000000) + + S_CBRANCH_SCC1 %bb.16, implicit undef $scc + S_BRANCH %bb.13 + + bb.13..lr.ph2381: + successors: %bb.15(0x40000000), %bb.14(0x40000000) + + S_CBRANCH_SCC1 %bb.15, implicit undef $scc + S_BRANCH %bb.14 + + bb.14.bb67: + successors: %bb.15(0x80000000) + + + bb.15.bb71: + successors: %bb.16(0x80000000) + + + bb.16.._crit_edge2382: + successors: %bb.17(0x80000000) + + + bb.17.bb72: + successors: %bb.25(0x40000000), %bb.18(0x40000000) + + S_CBRANCH_SCC1 %bb.25, implicit undef $scc + S_BRANCH %bb.18 + + bb.18.bb73: + successors: %bb.25(0x40000000), %bb.19(0x40000000) + + S_CBRANCH_SCC1 %bb.25, implicit undef $scc + S_BRANCH %bb.19 + + bb.19..lr.ph2364: + successors: %bb.24(0x40000000), %bb.20(0x40000000) + + S_CBRANCH_SCC1 %bb.24, implicit undef $scc + S_BRANCH %bb.20 + + bb.20..lr.ph2344: + successors: %bb.21(0x40000000), %bb.23(0x40000000) + + S_CBRANCH_SCC1 %bb.23, implicit undef $scc + S_BRANCH %bb.21 + + bb.21.bb77: + successors: %bb.22(0x40000000), %bb.23(0x40000000) + + S_CBRANCH_SCC1 %bb.23, implicit undef $scc + S_BRANCH %bb.22 + + bb.22.bb79: + successors: %bb.23(0x80000000) + + + bb.23.bb83: + successors: %bb.24(0x80000000) + + + bb.24.._crit_edge2345: + successors: %bb.25(0x80000000) + + + bb.25.bb84: + successors: %bb.32(0x40000000), %bb.26(0x40000000) + + S_CBRANCH_SCC1 %bb.32, implicit undef $scc + S_BRANCH %bb.26 + + bb.26.bb85: + successors: %bb.32(0x40000000), %bb.27(0x40000000) + + S_CBRANCH_SCC1 %bb.32, implicit undef $scc + S_BRANCH %bb.27 + + bb.27..lr.ph2332: + successors: %bb.28(0x80000000) + + dead %77:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %78:vgpr_32 = nnan arcp contract reassoc V_MAD_F32 0, killed %113.sub1, 0, target-flags(amdgpu-gotprel32-lo) 0, 0, 1065353216, 0, 0, implicit $exec + dead %80:sreg_32_xm0 = S_MOV_B32 0 + dead %82:vgpr_32 = V_MUL_F32_e32 killed %78, %78, implicit $exec + dead %126:vgpr_32 = V_MOV_B32_e32 2143289344, implicit $exec + dead %125:vreg_128 = IMPLICIT_DEF + dead %91:sreg_32_xm0 = S_MOV_B32 2143289344 + %96:sreg_64 = S_AND_B64 $exec, 0, implicit-def dead $scc + %139:vreg_128 = IMPLICIT_DEF + + bb.28.bb98: + successors: %bb.29(0x40000000), %bb.30(0x40000000) + + dead %127:vreg_128 = COPY killed %139 + S_CBRANCH_SCC0 %bb.30, implicit undef $scc + + bb.29: + successors: %bb.31(0x80000000) + + S_BRANCH %bb.31 + + bb.30..lr.ph: + successors: %bb.31(0x7c000000), %bb.33(0x04000000) + + $vcc = COPY %96 + S_CBRANCH_VCCNZ %bb.33, implicit killed $vcc + S_BRANCH %bb.31 + + bb.31.._crit_edge: + successors: %bb.28(0x80000000) + + dead %130:vreg_128 = IMPLICIT_DEF + dead %128:vreg_128 = COPY undef %130 + %139:vreg_128 = IMPLICIT_DEF + S_BRANCH %bb.28 + + bb.32.bb126: + S_ENDPGM + + bb.33.DummyReturnBlock: + S_ENDPGM + +...