Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -2143,7 +2143,8 @@ /// Find the ultimate value that VNI was copied from. std::pair followCopyChain(const VNInfo *VNI) const; - bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const; + bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other, + bool AllowOneUndef) const; /// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. /// Return a conflict resolution when possible, but leave the hard cases as @@ -2313,7 +2314,8 @@ } bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, - const JoinVals &Other) const { + const JoinVals &Other, + bool AllowOneUndef) const { const VNInfo *Orig0; unsigned Reg0; std::tie(Orig0, Reg0) = followCopyChain(Value0); @@ -2325,9 +2327,11 @@ std::tie(Orig1, Reg1) = Other.followCopyChain(Value1); // If both values are undefined, and the source registers are the same // register, the values are identical. Filter out cases where only one - // value is defined. + // value is defined, unless AllowOneUndef (which is used when analyzing a + // subrange, to allow for the fact that a whole register may be traceable + // through a copy chain but a subreg is undefined). if (Orig0 == nullptr || Orig1 == nullptr) - return Orig0 == Orig1 && Reg0 == Reg1; + return (AllowOneUndef || Orig0 == Orig1) && Reg0 == Reg1; // The values are equal if they are defined at the same place and use the // same register. Note that we cannot compare VNInfos directly as some of @@ -2504,7 +2508,7 @@ // %this = COPY %ext <-- Erase this copy // if (DefMI->isFullCopy() && !CP.isPartial() && - valuesIdentical(VNI, V.OtherVNI, Other)) { + valuesIdentical(VNI, V.OtherVNI, Other, SubRangeJoin)) { V.Identical = true; return CR_Erase; } Index: test/CodeGen/AMDGPU/coalescing-subreg-removed-undef-copy.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/coalescing-subreg-removed-undef-copy.mir @@ -0,0 +1,450 @@ +# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s +# +# This is another example of a test giving "Couldn't join subrange!" +# +# This shows a case where a subreg was defined by a copy from an undef register, +# and that copy got removed and the subreg became undef at a later whole reg use. +# +# GCN: {{^body}} + +--- | + ; ModuleID = '' + source_filename = "llpcPipeline" + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" + target triple = "amdgcn--amdpal" + + ; Function Attrs: nounwind + declare dllexport amdgpu_vs void @_amdgpu_vs_main(i32 inreg, i32 inreg, <4 x i32> inreg, i32 inreg, i32 inreg, i32 inreg, i32) local_unnamed_addr #0 + + ; Function Attrs: nounwind readnone speculatable + declare i64 @llvm.amdgcn.s.getpc() #1 + + ; Function Attrs: nounwind readonly + declare <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #2 + + ; Function Attrs: nounwind + declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 + + ; Function Attrs: nounwind + define dllexport amdgpu_ps void @_amdgpu_ps_main(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, <2 x float> %arg4, <2 x float> %arg5, <2 x float> %arg6, <3 x float> %arg7, <2 x float> %arg8, <2 x float> %arg9, <2 x float> %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19) local_unnamed_addr #3 !spirv.ExecutionModel !1 { + .entry: + br i1 undef, label %bb47, label %bb46, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb46: ; preds = %.entry + br label %bb47, !structurizecfg.uniform !2 + + bb47: ; preds = %.entry, %bb46 + %__llpc_global_proxy_r3.5 = phi <4 x i32> [ , %bb46 ], [ zeroinitializer, %.entry ] + %tmp = insertelement <2 x float> undef, float %arg13, i32 1 + %tmp20 = fptoui <2 x float> %tmp to <2 x i32> + %tmp21 = uitofp <2 x i32> %tmp20 to <2 x float> + %tmp22 = bitcast <2 x float> %tmp21 to <2 x i32> + %tmp23 = shufflevector <2 x i32> %tmp22, <2 x i32> undef, <4 x i32> + %tmp24 = shufflevector <4 x i32> %tmp23, <4 x i32> , <4 x i32> + %tmp25 = bitcast <4 x i32> %tmp24 to <4 x float> + %tmp26 = fptosi <4 x float> %tmp25 to <4 x i32> + %__llpc_global_proxy_r0.12.vec.extract474 = extractelement <4 x i32> %tmp26, i32 3 + %tmp52 = extractelement <4 x i32> %__llpc_global_proxy_r3.5, i32 1 + %tmp53 = insertelement <4 x i32> undef, i32 %tmp52, i32 1 + %__llpc_global_proxy_r3.8.vec.insert879 = insertelement <4 x i32> %tmp53, i32 undef, i32 2 + %__llpc_global_proxy_r3.12.vec.insert1176 = insertelement <4 x i32> %__llpc_global_proxy_r3.8.vec.insert879, i32 undef, i32 3 + br i1 undef, label %bb54, label %bb58, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb54: ; preds = %bb47 + %tmp48 = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 undef, i32 undef, i32 %__llpc_global_proxy_r0.12.vec.extract474, <8 x i32> undef, i32 0, i32 0) #7 + %tmp49 = bitcast <4 x float> %tmp48 to <4 x i32> + %tmp50 = shufflevector <4 x i32> %tmp49, <4 x i32> undef, <4 x i32> zeroinitializer + %tmp51 = and <4 x i32> %tmp50, + %__llpc_global_proxy_r4.8.vec.extract1310 = extractelement <4 x i32> %tmp51, i32 2 + %tmp55 = icmp ne i32 %__llpc_global_proxy_r4.8.vec.extract1310, 0 + %tmp56 = zext i1 %tmp55 to i32 + %tmp57 = insertelement <4 x i32> %__llpc_global_proxy_r3.12.vec.insert1176, i32 1, i32 2 + %__llpc_global_proxy_r3.8.vec.insert933 = insertelement <4 x i32> %tmp57, i32 %tmp56, i32 3 + br label %bb59, !structurizecfg.uniform !2 + + bb58: ; preds = %bb47 + %__llpc_global_proxy_r3.8.vec.insert935 = insertelement <4 x i32> %__llpc_global_proxy_r3.12.vec.insert1176, i32 0, i32 2 + br label %bb59, !structurizecfg.uniform !2 + + bb59: ; preds = %bb58, %bb54 + %__llpc_global_proxy_r1.7 = phi <4 x i32> [ , %bb54 ], [ zeroinitializer, %bb58 ] + %__llpc_global_proxy_r3.6 = phi <4 x i32> [ %__llpc_global_proxy_r3.8.vec.insert933, %bb54 ], [ %__llpc_global_proxy_r3.8.vec.insert935, %bb58 ] + %tmp60 = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 undef, i32 undef, i32 %__llpc_global_proxy_r0.12.vec.extract474, <8 x i32> undef, i32 0, i32 0) #7 + %tmp61 = bitcast <4 x float> %tmp60 to <4 x i32> + %tmp62 = shufflevector <4 x i32> %tmp61, <4 x i32> undef, <4 x i32> zeroinitializer + %tmp63 = and <4 x i32> %tmp62, + %tmp64 = shufflevector <4 x i32> %tmp63, <4 x i32> undef, <2 x i32> + %tmp65 = icmp eq <2 x i32> %tmp64, + %tmp66 = sext <2 x i1> %tmp65 to <2 x i32> + %tmp67 = shufflevector <4 x i32> undef, <4 x i32> %__llpc_global_proxy_r3.6, <4 x i32> + %tmp68 = extractelement <2 x i32> %tmp66, i32 1 + %tmp70 = icmp eq i32 %tmp68, 0 + %0 = xor i1 %tmp70, true + %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0) + %2 = extractvalue { i1, i64 } %1, 0 + %3 = extractvalue { i1, i64 } %1, 1 + br i1 %2, label %bb73, label %Flow + + Flow: ; preds = %bb73, %bb59 + %4 = phi <4 x i32> [ %__llpc_global_proxy_r3.8.vec.insert939, %bb73 ], [ undef, %bb59 ] + %5 = call { i1, i64 } @llvm.amdgcn.else(i64 %3) + %6 = extractvalue { i1, i64 } %5, 0 + %7 = extractvalue { i1, i64 } %5, 1 + br i1 %6, label %bb71, label %bb75 + + bb71: ; preds = %Flow + %tmp72 = shufflevector <4 x i32> %tmp67, <4 x i32> zeroinitializer, <4 x i32> + br label %bb75 + + bb73: ; preds = %bb59 + %tmp74 = insertelement <4 x i32> %tmp67, i32 %tmp68, i32 3 + %__llpc_global_proxy_r3.8.vec.insert939 = insertelement <4 x i32> %tmp74, i32 %tmp68, i32 2 + br label %Flow + + bb75: ; preds = %bb71, %Flow + %__llpc_global_proxy_r3.7 = phi <4 x i32> [ %4, %Flow ], [ %tmp72, %bb71 ] + call void @llvm.amdgcn.end.cf(i64 %7) + %tmp78 = shufflevector <4 x i32> %__llpc_global_proxy_r3.7, <4 x i32> undef, <2 x i32> + %tmp79 = icmp eq <2 x i32> %tmp78, zeroinitializer + %tmp80 = select <2 x i1> %tmp79, <2 x i32> , <2 x i32> + %tmp81 = extractelement <2 x i32> %tmp80, i32 0 + %tmp84 = bitcast i32 %tmp81 to float + %tmp85 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp84 + %tmp86 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp85) #8 + call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> undef, <2 x half> %tmp86, i1 true, i1 true) #7 + ret void + } + + ; Function Attrs: nounwind readonly + declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #2 + + ; Function Attrs: nounwind readonly + declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #2 + + ; Function Attrs: nounwind readnone + declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i1) #4 + + ; Function Attrs: nounwind readnone speculatable + declare float @llvm.minnum.f32(float, float) #1 + + ; Function Attrs: nounwind readnone speculatable + declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1 + + ; Function Attrs: nounwind + declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 + + ; Function Attrs: convergent nounwind + declare { i1, i64 } @llvm.amdgcn.if(i1) #5 + + ; Function Attrs: convergent nounwind + declare { i1, i64 } @llvm.amdgcn.else(i64) #5 + + ; Function Attrs: convergent nounwind readnone + declare i64 @llvm.amdgcn.break(i64) #6 + + ; Function Attrs: convergent nounwind readnone + declare i64 @llvm.amdgcn.if.break(i1, i64) #6 + + ; Function Attrs: convergent nounwind readnone + declare i64 @llvm.amdgcn.else.break(i64, i64) #6 + + ; Function Attrs: convergent nounwind + declare i1 @llvm.amdgcn.loop(i64) #5 + + ; Function Attrs: convergent nounwind + declare void @llvm.amdgcn.end.cf(i64) #5 + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #7 + + attributes #0 = { nounwind "target-cpu"="gfx803" } + attributes #1 = { nounwind readnone speculatable "target-cpu"="gfx803" } + attributes #2 = { nounwind readonly "target-cpu"="gfx803" } + attributes #3 = { nounwind "InitialPSInputAddr"="3840" "target-cpu"="gfx803" } + attributes #4 = { nounwind readnone "target-cpu"="gfx803" } + attributes #5 = { convergent nounwind } + attributes #6 = { convergent nounwind readnone } + attributes #7 = { nounwind } + attributes #8 = { nounwind readnone speculatable } + + !amdgpu.pal.metadata = !{!0} + + !0 = !{i32 268435482, i32 6, i32 268435488, i32 -1, i32 268435480, i32 1543035635, i32 268435481, i32 -1223759529, i32 268435538, i32 4096, i32 268435539, i32 8192, i32 11338, i32 2883584, i32 11339, i32 18, i32 41411, i32 4, i32 41393, i32 0, i32 41479, i32 0, i32 41476, i32 17301504, i32 41478, i32 1087, i32 41721, i32 45, i32 41633, i32 0, i32 41645, i32 0, i32 41750, i32 14, i32 268435528, i32 0, i32 268435493, i32 0, i32 268435500, i32 0, i32 268435507, i32 256, i32 268435514, i32 104, i32 268435536, i32 0, i32 11274, i32 2883584, i32 11275, i32 6, i32 41412, i32 0, i32 41413, i32 4, i32 41400, i32 16777216, i32 41398, i32 0, i32 41395, i32 0, i32 41396, i32 0, i32 41397, i32 0, i32 41619, i32 100794764, i32 41475, i32 16, i32 41103, i32 15, i32 268435485, i32 0, i32 268435529, i32 0, i32 268435494, i32 0, i32 268435501, i32 0, i32 268435508, i32 256, i32 268435515, i32 104, i32 41685, i32 0, i32 268435460, i32 -492026296, i32 268435461, i32 897356955, i32 268435476, i32 585988482, i32 268435477, i32 -883997925, i32 268435532, i32 6, i32 41642, i32 127, i32 11347, i32 268435459, i32 11348, i32 268435460, i32 11340, i32 268435456, i32 11342, i32 0, i32 11343, i32 1, i32 11344, i32 2, i32 11345, i32 3, i32 11346, i32 5, i32 11276, i32 268435456, i32 11278, i32 4} + !1 = !{i32 4} + !2 = !{} + +... +--- +name: _amdgpu_ps_main +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_128, preferred-register: '' } + - { id: 1, class: sreg_32_xm0, preferred-register: '' } + - { id: 2, class: sreg_128, preferred-register: '' } + - { id: 3, class: sreg_128, preferred-register: '' } + - { id: 4, class: sreg_128, preferred-register: '' } + - { id: 5, class: sreg_128, preferred-register: '' } + - { id: 6, class: sreg_128, preferred-register: '' } + - { id: 7, class: sreg_32_xm0, preferred-register: '' } + - { id: 8, class: sreg_64, preferred-register: '' } + - { id: 9, class: sreg_128, preferred-register: '' } + - { id: 10, class: sreg_64, preferred-register: '' } + - { id: 11, class: sreg_128, preferred-register: '' } + - { id: 12, class: sreg_128, preferred-register: '' } + - { id: 13, class: sreg_128, preferred-register: '' } + - { id: 14, class: sgpr_32, preferred-register: '' } + - { id: 15, class: sgpr_32, preferred-register: '' } + - { id: 16, class: sgpr_32, preferred-register: '' } + - { id: 17, class: sgpr_32, preferred-register: '' } + - { id: 18, class: vgpr_32, preferred-register: '' } + - { id: 19, class: vgpr_32, preferred-register: '' } + - { id: 20, class: vgpr_32, preferred-register: '' } + - { id: 21, class: vgpr_32, preferred-register: '' } + - { id: 22, class: sreg_128, preferred-register: '' } + - { id: 23, class: sreg_32_xm0, preferred-register: '' } + - { id: 24, class: sreg_128, preferred-register: '' } + - { id: 25, class: sreg_128, preferred-register: '' } + - { id: 26, class: sreg_32_xm0, preferred-register: '' } + - { id: 27, class: sreg_128, preferred-register: '' } + - { id: 28, class: sreg_32_xm0, preferred-register: '' } + - { id: 29, class: sreg_32_xm0, preferred-register: '' } + - { id: 30, class: sreg_32_xm0, preferred-register: '' } + - { id: 31, class: sreg_32_xm0, preferred-register: '' } + - { id: 32, class: sreg_32_xm0, preferred-register: '' } + - { id: 33, class: sreg_32_xm0, preferred-register: '' } + - { id: 34, class: sreg_32_xm0, preferred-register: '' } + - { id: 35, class: sreg_32, preferred-register: '' } + - { id: 36, class: sreg_128, preferred-register: '' } + - { id: 37, class: vgpr_32, preferred-register: '' } + - { id: 38, class: vgpr_32, preferred-register: '' } + - { id: 39, class: vgpr_32, preferred-register: '' } + - { id: 40, class: vgpr_32, preferred-register: '' } + - { id: 41, class: vgpr_32, preferred-register: '' } + - { id: 42, class: vgpr_32, preferred-register: '' } + - { id: 43, class: sreg_32_xm0, preferred-register: '' } + - { id: 44, class: sreg_256, preferred-register: '' } + - { id: 45, class: vgpr_32, preferred-register: '' } + - { id: 46, class: vreg_128, preferred-register: '' } + - { id: 47, class: sreg_32_xm0, preferred-register: '' } + - { id: 48, class: sreg_32_xm0, preferred-register: '' } + - { id: 49, class: sreg_32_xm0, preferred-register: '' } + - { id: 50, class: sreg_128, preferred-register: '' } + - { id: 51, class: sreg_128, preferred-register: '' } + - { id: 52, class: sreg_32, preferred-register: '' } + - { id: 53, class: sreg_128, preferred-register: '' } + - { id: 54, class: vgpr_32, preferred-register: '' } + - { id: 55, class: vgpr_32, preferred-register: '' } + - { id: 56, class: vgpr_32, preferred-register: '' } + - { id: 57, class: vgpr_32, preferred-register: '' } + - { id: 58, class: vgpr_32, preferred-register: '' } + - { id: 59, class: vgpr_32, preferred-register: '' } + - { id: 60, class: sgpr_32, preferred-register: '' } + - { id: 61, class: sreg_256, preferred-register: '' } + - { id: 62, class: vgpr_32, preferred-register: '' } + - { id: 63, class: vreg_128, preferred-register: '' } + - { id: 64, class: sreg_32_xm0, preferred-register: '' } + - { id: 65, class: sreg_32_xm0, preferred-register: '' } + - { id: 66, class: sreg_32_xm0, preferred-register: '' } + - { id: 67, class: sreg_64_xexec, preferred-register: '$vcc' } + - { id: 68, class: sgpr_32, preferred-register: '' } + - { id: 69, class: vgpr_32, preferred-register: '' } + - { id: 70, class: sreg_128, preferred-register: '' } + - { id: 71, class: sreg_32_xm0, preferred-register: '' } + - { id: 72, class: sreg_32_xm0, preferred-register: '' } + - { id: 73, class: sreg_32_xm0, preferred-register: '' } + - { id: 74, class: sreg_128, preferred-register: '' } + - { id: 75, class: sreg_32_xm0, preferred-register: '' } + - { id: 76, class: vgpr_32, preferred-register: '' } + - { id: 77, class: sreg_64_xexec, preferred-register: '$vcc' } + - { id: 78, class: vgpr_32, preferred-register: '' } + - { id: 79, class: vgpr_32, preferred-register: '' } + - { id: 80, class: vgpr_32, preferred-register: '' } + - { id: 81, class: vgpr_32, preferred-register: '' } + - { id: 82, class: vgpr_32, preferred-register: '' } + - { id: 83, class: vgpr_32, preferred-register: '' } + - { id: 84, class: vgpr_32, preferred-register: '' } + - { id: 85, class: vgpr_32, preferred-register: '' } + - { id: 86, class: vgpr_32, preferred-register: '' } + - { id: 87, class: vreg_128, preferred-register: '' } + - { id: 88, class: vgpr_32, preferred-register: '' } + - { id: 89, class: vgpr_32, preferred-register: '' } + - { id: 90, class: vreg_128, preferred-register: '' } + - { id: 91, class: vreg_128, preferred-register: '' } + - { id: 92, class: vreg_128, preferred-register: '' } + - { id: 93, class: vreg_128, preferred-register: '' } + - { id: 94, class: vgpr_32, preferred-register: '' } + - { id: 95, class: vreg_128, preferred-register: '' } + - { id: 96, class: vgpr_32, preferred-register: '' } + - { id: 97, class: vgpr_32, preferred-register: '' } + - { id: 98, class: vgpr_32, preferred-register: '' } + - { id: 99, class: vreg_128, preferred-register: '' } + - { id: 100, class: vreg_128, preferred-register: '' } + - { id: 101, class: vgpr_32, preferred-register: '' } + - { id: 102, class: vgpr_32, preferred-register: '' } + - { id: 103, class: vreg_128, preferred-register: '' } + - { id: 104, class: vreg_128, preferred-register: '' } + - { id: 105, class: vreg_128, preferred-register: '' } + - { id: 106, class: vreg_128, preferred-register: '' } + - { id: 107, class: vreg_128, preferred-register: '' } + - { id: 108, class: vgpr_32, preferred-register: '' } + - { id: 109, class: vgpr_32, preferred-register: '' } + - { id: 110, class: sreg_128, preferred-register: '' } + - { id: 111, class: vreg_128, preferred-register: '' } + - { id: 112, class: vreg_128, preferred-register: '' } + - { id: 113, class: vreg_128, preferred-register: '' } + - { id: 114, class: sreg_64, preferred-register: '' } + - { id: 115, class: sreg_64, preferred-register: '' } + - { id: 116, class: sreg_64, preferred-register: '' } +liveins: +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0..entry: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + + %23:sreg_32_xm0 = S_MOV_B32 0 + undef %24.sub0:sreg_128 = COPY %23 + %24.sub1:sreg_128 = COPY %23 + %24.sub3:sreg_128 = COPY %23 + %110:sreg_128 = COPY killed %24 + S_CBRANCH_SCC1 %bb.2, implicit undef $scc + S_BRANCH %bb.1 + + bb.1.bb46: + successors: %bb.2(0x80000000) + + %110:sreg_128 = IMPLICIT_DEF + + bb.2.bb47: + successors: %bb.3(0x40000000), %bb.4(0x40000000) + + %0:sreg_128 = COPY killed %110 + S_CBRANCH_SCC1 %bb.4, implicit undef $scc + S_BRANCH %bb.3 + + bb.3.bb54: + successors: %bb.5(0x80000000) + + %88:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + undef %87.sub2:vreg_128 = COPY killed %88 + %43:sreg_32_xm0 = S_MOV_B32 0 + undef %44.sub0:sreg_256 = COPY %43 + %44.sub1:sreg_256 = COPY %43 + %44.sub2:sreg_256 = COPY %43 + %44.sub3:sreg_256 = COPY %43 + %44.sub4:sreg_256 = COPY %43 + %44.sub5:sreg_256 = COPY %43 + %44.sub6:sreg_256 = COPY %43 + %44.sub7:sreg_256 = COPY killed %43 + %45:vgpr_32 = IMAGE_LOAD_MIP_V1_V4 killed %87, killed %44, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) + dead %89:vgpr_32 = V_BFE_U32 killed %45, 7, 1, implicit $exec + %49:sreg_32_xm0 = S_MOV_B32 1 + %50:sreg_128 = COPY killed %0 + %50.sub2:sreg_128 = COPY killed %49 + %91:vreg_128 = COPY killed %50 + %90:vreg_128 = COPY killed %91 + %90.sub3:vreg_128 = COPY undef %89 + %111:vreg_128 = COPY killed %90 + S_BRANCH %bb.5 + + bb.4.bb58: + successors: %bb.5(0x80000000) + + %34:sreg_32_xm0 = S_MOV_B32 0 + %4:sreg_128 = COPY killed %0 + %4.sub2:sreg_128 = COPY killed %34 + %93:vreg_128 = COPY killed %4 + %111:vreg_128 = COPY killed %93 + + bb.5.bb59: + successors: %bb.8(0x40000000), %bb.6(0x40000000) + + %92:vreg_128 = COPY killed %111 + %108:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + undef %107.sub2:vreg_128 = COPY killed %108 + %60:sgpr_32 = S_MOV_B32 0 + undef %61.sub0:sreg_256 = COPY %60 + %61.sub1:sreg_256 = COPY %60 + %61.sub2:sreg_256 = COPY %60 + %61.sub3:sreg_256 = COPY %60 + %61.sub4:sreg_256 = COPY %60 + %61.sub5:sreg_256 = COPY %60 + %61.sub6:sreg_256 = COPY %60 + %61.sub7:sreg_256 = COPY killed %60 + %62:vgpr_32 = IMAGE_LOAD_MIP_V1_V4 killed %107, killed %61, 1, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4) + %109:vgpr_32 = V_AND_B32_e32 24, killed %62, implicit $exec + %67:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed %109, implicit $exec + dead %69:vgpr_32 = V_CNDMASK_B32_e64 0, -1, %67, implicit $exec + %112:vreg_128 = IMPLICIT_DEF + %114:sreg_64 = COPY $exec, implicit-def $exec + %115:sreg_64 = S_AND_B64 %114, %67, implicit-def dead $scc + %8:sreg_64 = S_XOR_B64 %115, %114, implicit-def dead $scc + $exec = S_MOV_B64_term killed %115 + SI_MASK_BRANCH %bb.6, implicit $exec + S_BRANCH %bb.8 + + bb.6.Flow: + successors: %bb.7(0x40000000), %bb.9(0x40000000) + + %116:sreg_64 = COPY killed %8 + %10:sreg_64 = S_OR_SAVEEXEC_B64 %116, implicit-def $exec, implicit-def $scc, implicit $exec + %105:vreg_128 = COPY killed %112 + %113:vreg_128 = COPY killed %105 + $exec = S_XOR_B64_term $exec, %10, implicit-def $scc + SI_MASK_BRANCH %bb.9, implicit $exec + S_BRANCH %bb.7 + + bb.7.bb71: + successors: %bb.9(0x80000000) + + dead %97:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + undef %95.sub1:vreg_128 = COPY killed %92.sub1 + %113:vreg_128 = COPY killed %95 + S_BRANCH %bb.9 + + bb.8.bb73: + successors: %bb.6(0x80000000) + + %103:vreg_128 = COPY %92 + %103.sub2:vreg_128 = COPY undef %69 + %104:vreg_128 = COPY killed %103 + %104.sub3:vreg_128 = COPY undef %69 + %112:vreg_128 = COPY killed %104 + S_BRANCH %bb.6 + + bb.9.bb75: + $exec = S_OR_B64 $exec, killed %10, implicit-def $scc + %99:vreg_128 = COPY killed %113 + %77:sreg_64_xexec = V_CMP_EQ_U32_e64 target-flags(amdgpu-gotprel32-lo) 0, killed %99.sub1, implicit $exec + %79:vgpr_32 = V_CNDMASK_B32_e64 0, 1065353216, killed %77, implicit $exec + %81:vgpr_32 = V_MUL_F32_e32 0, killed %79, implicit $exec + %82:vgpr_32 = V_CVT_PKRTZ_F16_F32_e64 0, undef %83:vgpr_32, 0, killed %81, 0, implicit $exec + EXP_DONE 0, undef %84:vgpr_32, killed %82, undef %85:vgpr_32, undef %86:vgpr_32, -1, -1, 15, implicit $exec + S_ENDPGM + +... Index: test/CodeGen/AMDGPU/coalescing-subreg-was-undef-but-became-def.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/coalescing-subreg-was-undef-but-became-def.mir @@ -0,0 +1,448 @@ +# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s +# +# This is another example of a test giving "Couldn't join subrange!" +# +# This shows a case where a whole reg use point of the whole register was +# undefined in one subreg, but after coalescing it became defined but the +# subrange was not updated to reflect that. It is arguable that this is correct, +# in that the subreg is defined but the use does not care what the value is +# there, so the use does not need to be in the subrange. +# +# GCN: {{^body}} + +--- | + ; ModuleID = 'cutdown.ll' + source_filename = "cutdown.ll" + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" + target triple = "amdgcn--amdpal" + + ; Function Attrs: nounwind + define dllexport amdgpu_cs void @_amdgpu_cs_main() local_unnamed_addr #0 !spirv.ExecutionModel !1 { + bb1: + br i1 undef, label %"myprint.exit", label %bb2, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb2: ; preds = %bb1 + br i1 undef, label %bb3, label %.lr.ph, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb3: ; preds = %bb2 + br label %.loopexit, !structurizecfg.uniform !2 + + .lr.ph: ; preds = %bb2 + br i1 undef, label %bb7, label %bb6, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb6: ; preds = %.lr.ph + br label %bb7, !structurizecfg.uniform !2 + + bb7: ; preds = %bb6, %.lr.ph + br i1 undef, label %bb11, label %bb8, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb8: ; preds = %bb7 + br label %bb11, !structurizecfg.uniform !2 + + bb11: ; preds = %bb7, %bb8 + br i1 undef, label %bb14, label %bb13, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb13: ; preds = %bb11 + br i1 undef, label %bb135, label %.lr.ph.1, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb14: ; preds = %bb163, %bb11, %bb135, %bb142, %bb149, %bb156 + br i1 undef, label %.lr.ph3897.preheader, label %bb19, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + .lr.ph3897.preheader: ; preds = %bb14 + br i1 undef, label %._crit_edge3898.unr-lcssa, label %.lr.ph3897, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + .lr.ph3897: ; preds = %.lr.ph3897.preheader + br label %._crit_edge3898.unr-lcssa, !structurizecfg.uniform !2 + + ._crit_edge3898.unr-lcssa: ; preds = %.lr.ph3897, %.lr.ph3897.preheader + br i1 undef, label %bb19, label %.lr.ph3897.epil, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + .lr.ph3897.epil: ; preds = %._crit_edge3898.unr-lcssa + br label %bb19, !structurizecfg.uniform !2 + + bb19: ; preds = %.lr.ph3897.epil, %._crit_edge3898.unr-lcssa, %bb14 + %__llpc_global_proxy_r0.4.vec.extract969 = extractelement <4 x i32> zeroinitializer, i32 1 + %tmp21 = and i32 1, %__llpc_global_proxy_r0.4.vec.extract969 + %__llpc_global_proxy_r1.0.vec.insert1262 = insertelement <4 x i32> undef, i32 %tmp21, i32 0 + br i1 undef, label %bb24, label %bb22, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb22: ; preds = %bb19 + br label %bb24, !structurizecfg.uniform !2 + + bb24: ; preds = %bb22, %bb19 + %__llpc_global_proxy_r0.6 = phi <4 x i32> [ zeroinitializer, %bb22 ], [ zeroinitializer, %bb19 ] + %__llpc_global_proxy_r1.1 = phi <4 x i32> [ , %bb22 ], [ %__llpc_global_proxy_r1.0.vec.insert1262, %bb19 ] + %__llpc_global_proxy_r0.8.vec.extract1082 = extractelement <4 x i32> %__llpc_global_proxy_r0.6, i32 2 + %tmp25 = add i32 %__llpc_global_proxy_r0.8.vec.extract1082, 1 + %tmp26 = icmp sgt i32 %tmp25, 7 + %__llpc_global_proxy_r1.0.vec.insert = insertelement <4 x i32> %__llpc_global_proxy_r1.1, i32 undef, i32 0 + br i1 %tmp26, label %.loopexit, label %bb39, !llvm.loop !3, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + .loopexit: ; preds = %bb39, %bb3, %bb24 + %__llpc_global_proxy_r1.2 = phi <4 x i32> [ undef, %bb3 ], [ %__llpc_global_proxy_r1.0.vec.insert, %bb24 ], [ %__llpc_global_proxy_r1.0.vec.insert, %bb39 ] + %__llpc_global_proxy_r1.12.vec.extract1295 = extractelement <4 x i32> %__llpc_global_proxy_r1.2, i32 3 + %tmp27 = icmp ne i32 %__llpc_global_proxy_r1.12.vec.extract1295, 0 + %tmp28 = sext i1 %tmp27 to i32 + %tmp30 = icmp eq i32 %tmp28, 0 + br i1 %tmp30, label %bb32, label %bb36, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb32: ; preds = %.loopexit + br label %bb36, !structurizecfg.uniform !2 + + bb36: ; preds = %bb32, %.loopexit + br label %"myprint.exit", !structurizecfg.uniform !2 + + "myprint.exit": ; preds = %bb36, %bb1 + ret void + + bb39: ; preds = %bb24 + br label %.loopexit, !structurizecfg.uniform !2 + + .lr.ph.1: ; preds = %bb13 + br i1 undef, label %bb131, label %bb130, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb130: ; preds = %.lr.ph.1 + br label %bb131, !structurizecfg.uniform !2 + + bb131: ; preds = %bb130, %.lr.ph.1 + br i1 undef, label %bb132, label %bb134, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb132: ; preds = %bb131 + br label %bb134, !structurizecfg.uniform !2 + + bb134: ; preds = %bb132, %bb131 + br label %bb135, !structurizecfg.uniform !2 + + bb135: ; preds = %bb134, %bb13 + br i1 undef, label %bb14, label %bb136, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb136: ; preds = %bb135 + br i1 undef, label %bb142, label %.lr.ph.2, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + .lr.ph.2: ; preds = %bb136 + br i1 undef, label %bb138, label %bb137, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb137: ; preds = %.lr.ph.2 + br label %bb138, !structurizecfg.uniform !2 + + bb138: ; preds = %bb137, %.lr.ph.2 + br i1 undef, label %bb139, label %bb141, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb139: ; preds = %bb138 + br label %bb141, !structurizecfg.uniform !2 + + bb141: ; preds = %bb139, %bb138 + br label %bb142, !structurizecfg.uniform !2 + + bb142: ; preds = %bb141, %bb136 + br i1 undef, label %bb14, label %bb143, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb143: ; preds = %bb142 + br i1 undef, label %bb149, label %.lr.ph.3, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + .lr.ph.3: ; preds = %bb143 + br i1 undef, label %bb145, label %bb144, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb144: ; preds = %.lr.ph.3 + br label %bb145, !structurizecfg.uniform !2 + + bb145: ; preds = %bb144, %.lr.ph.3 + br i1 undef, label %bb146, label %bb148, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb146: ; preds = %bb145 + br label %bb148, !structurizecfg.uniform !2 + + bb148: ; preds = %bb146, %bb145 + br label %bb149, !structurizecfg.uniform !2 + + bb149: ; preds = %bb148, %bb143 + br i1 undef, label %bb14, label %bb150, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb150: ; preds = %bb149 + br i1 undef, label %bb156, label %.lr.ph.4, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + .lr.ph.4: ; preds = %bb150 + br i1 undef, label %bb152, label %bb151, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb151: ; preds = %.lr.ph.4 + br label %bb152, !structurizecfg.uniform !2 + + bb152: ; preds = %bb151, %.lr.ph.4 + br i1 undef, label %bb153, label %bb155, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb153: ; preds = %bb152 + br label %bb155, !structurizecfg.uniform !2 + + bb155: ; preds = %bb153, %bb152 + br label %bb156, !structurizecfg.uniform !2 + + bb156: ; preds = %bb155, %bb150 + br i1 undef, label %bb14, label %bb157, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb157: ; preds = %bb156 + br i1 undef, label %bb163, label %.lr.ph.5, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + .lr.ph.5: ; preds = %bb157 + br i1 undef, label %bb159, label %bb158, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb158: ; preds = %.lr.ph.5 + br label %bb159, !structurizecfg.uniform !2 + + bb159: ; preds = %bb158, %.lr.ph.5 + br i1 undef, label %bb160, label %bb162, !structurizecfg.uniform !2, !amdgpu.uniform !2 + + bb160: ; preds = %bb159 + br label %bb162, !structurizecfg.uniform !2 + + bb162: ; preds = %bb160, %bb159 + br label %bb163, !structurizecfg.uniform !2 + + bb163: ; preds = %bb162, %bb157 + br label %bb14, !structurizecfg.uniform !2 + } + + ; Function Attrs: convergent nounwind + declare { i1, i64 } @llvm.amdgcn.if(i1) #1 + + ; Function Attrs: convergent nounwind + declare { i1, i64 } @llvm.amdgcn.else(i64) #1 + + ; Function Attrs: convergent nounwind readnone + declare i64 @llvm.amdgcn.break(i64) #2 + + ; Function Attrs: convergent nounwind readnone + declare i64 @llvm.amdgcn.if.break(i1, i64) #2 + + ; Function Attrs: convergent nounwind readnone + declare i64 @llvm.amdgcn.else.break(i64, i64) #2 + + ; Function Attrs: convergent nounwind + declare i1 @llvm.amdgcn.loop(i64) #1 + + ; Function Attrs: convergent nounwind + declare void @llvm.amdgcn.end.cf(i64) #1 + + attributes #0 = { nounwind "target-cpu"="gfx803" } + attributes #1 = { convergent nounwind } + attributes #2 = { convergent nounwind readnone } + + !amdgpu.pal.metadata = !{!0} + + !0 = !{i32 268435482, i32 1, i32 268435488, i32 -1, i32 268435480, i32 1319776600, i32 268435481, i32 1655589334, i32 268435538, i32 64, i32 268435539, i32 0, i32 11794, i32 2883584, i32 11795, i32 6022, i32 11783, i32 64, i32 11784, i32 1, i32 11785, i32 1, i32 268435530, i32 0, i32 268435495, i32 0, i32 268435502, i32 0, i32 268435509, i32 256, i32 268435516, i32 104, i32 268435456, i32 -1737113002, i32 268435457, i32 -1389682907, i32 11840, i32 268435456, i32 11842, i32 0} + !1 = !{i32 5} + !2 = !{} + !3 = distinct !{!3, !4} + !4 = !{!"llvm.loop.unroll.count", i32 32} + +... +--- +name: _amdgpu_cs_main +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_128, preferred-register: '' } + - { id: 1, class: sreg_128, preferred-register: '%24' } + - { id: 2, class: sreg_128, preferred-register: '' } + - { id: 3, class: sreg_128, preferred-register: '' } + - { id: 4, class: sreg_128, preferred-register: '' } + - { id: 5, class: sreg_128, preferred-register: '' } + - { id: 6, class: sreg_32_xm0, preferred-register: '' } + - { id: 7, class: sreg_128, preferred-register: '' } + - { id: 8, class: sreg_128, preferred-register: '' } + - { id: 9, class: sreg_32_xm0, preferred-register: '' } + - { id: 10, class: sreg_32_xm0, preferred-register: '' } + - { id: 11, class: sreg_32_xm0, preferred-register: '' } + - { id: 12, class: sreg_32_xm0, preferred-register: '' } + - { id: 13, class: sreg_32_xm0, preferred-register: '' } + - { id: 14, class: sreg_32_xm0, preferred-register: '' } + - { id: 15, class: sreg_128, preferred-register: '' } + - { id: 16, class: sreg_128, preferred-register: '' } + - { id: 17, class: sreg_32_xm0, preferred-register: '' } + - { id: 18, class: sreg_128, preferred-register: '' } + - { id: 19, class: sreg_32_xm0, preferred-register: '' } + - { id: 20, class: sreg_32_xm0, preferred-register: '' } + - { id: 21, class: sreg_128, preferred-register: '' } + - { id: 22, class: sreg_32_xm0, preferred-register: '' } + - { id: 23, class: sreg_32_xm0, preferred-register: '' } + - { id: 24, class: sreg_32_xm0, preferred-register: '%1' } + - { id: 25, class: sreg_32_xm0, preferred-register: '' } + - { id: 26, class: sreg_128, preferred-register: '' } + - { id: 27, class: sreg_32_xm0, preferred-register: '' } + - { id: 28, class: sreg_32_xm0, preferred-register: '' } + - { id: 29, class: vreg_128, preferred-register: '' } + - { id: 30, class: vreg_128, preferred-register: '' } + - { id: 31, class: vreg_128, preferred-register: '' } + - { id: 32, class: vreg_128, preferred-register: '' } + - { id: 33, class: vgpr_32, preferred-register: '' } + - { id: 34, class: sreg_128, preferred-register: '' } + - { id: 35, class: sreg_128, preferred-register: '' } + - { id: 36, class: vreg_128, preferred-register: '' } +liveins: +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0.bb1: + successors: %bb.21(0x40000000), %bb.1(0x40000000) + + S_CBRANCH_SCC1 %bb.21, implicit undef $scc + S_BRANCH %bb.1 + + bb.1.bb2: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + S_CBRANCH_SCC1 %bb.3, implicit undef $scc + S_BRANCH %bb.2 + + bb.2.bb3: + successors: %bb.18(0x80000000) + + %36:vreg_128 = IMPLICIT_DEF + S_BRANCH %bb.18 + + bb.3..lr.ph: + successors: %bb.5(0x40000000), %bb.4(0x40000000) + + S_CBRANCH_SCC1 %bb.5, implicit undef $scc + S_BRANCH %bb.4 + + bb.4.bb6: + successors: %bb.5(0x80000000) + + + bb.5.bb7: + successors: %bb.7(0x40000000), %bb.6(0x40000000) + + S_CBRANCH_SCC1 %bb.7, implicit undef $scc + S_BRANCH %bb.6 + + bb.6.bb8: + successors: %bb.7(0x80000000) + + + bb.7.bb11: + successors: %bb.9(0x40000000), %bb.8(0x40000000) + + S_CBRANCH_SCC1 %bb.9, implicit undef $scc + S_BRANCH %bb.8 + + bb.8.bb13: + successors: %bb.9(0x80000000) + + + bb.9.bb14: + successors: %bb.10(0x40000000), %bb.14(0x40000000) + + S_CBRANCH_SCC1 %bb.14, implicit undef $scc + S_BRANCH %bb.10 + + bb.10..lr.ph3897.preheader: + successors: %bb.12(0x40000000), %bb.11(0x40000000) + + S_CBRANCH_SCC1 %bb.12, implicit undef $scc + S_BRANCH %bb.11 + + bb.11..lr.ph3897: + successors: %bb.12(0x80000000) + + + bb.12.._crit_edge3898.unr-lcssa: + successors: %bb.14(0x40000000), %bb.13(0x40000000) + + S_CBRANCH_SCC1 %bb.14, implicit undef $scc + S_BRANCH %bb.13 + + bb.13..lr.ph3897.epil: + successors: %bb.14(0x80000000) + + + bb.14.bb19: + successors: %bb.15(0x40000000), %bb.16(0x40000000) + + %6:sreg_32_xm0 = S_MOV_B32 0 + undef %7.sub2:sreg_128 = COPY %6 + S_CBRANCH_SCC0 %bb.16, implicit undef $scc + + bb.15: + successors: %bb.17(0x80000000) + + undef %8.sub0:sreg_128 = COPY killed %6 + %5:sreg_128 = COPY killed %7 + %34:sreg_128 = COPY killed %5 + %35:sreg_128 = COPY killed %8 + S_BRANCH %bb.17 + + bb.16.bb22: + successors: %bb.17(0x80000000) + + undef %18.sub0:sreg_128 = COPY %6 + %18.sub1:sreg_128 = COPY %6 + %18.sub2:sreg_128 = COPY killed %6 + %16:sreg_128 = COPY killed %18 + %15:sreg_128 = COPY killed %7 + %34:sreg_128 = COPY killed %15 + %35:sreg_128 = COPY killed %16 + + bb.17.bb24: + successors: %bb.18(0x40000000), %bb.22(0x40000000) + + %2:sreg_128 = COPY killed %35 + %1:sreg_128 = COPY killed %34 + %24:sreg_32_xm0 = S_ADD_I32 killed %1.sub2, target-flags(amdgpu-gotprel32-hi) 1, implicit-def dead $scc + S_CMP_LT_I32 killed %24, 8, implicit-def $scc + %30:vreg_128 = COPY %2 + %36:vreg_128 = COPY killed %30 + S_CBRANCH_SCC1 %bb.22, implicit killed $scc + S_BRANCH %bb.18 + + bb.18..loopexit: + successors: %bb.19(0x30000000), %bb.20(0x50000000) + + %29:vreg_128 = COPY killed %36 + V_CMP_NE_U32_e32 0, killed %29.sub3, implicit-def $vcc, implicit $exec + $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.20, implicit killed $vcc + S_BRANCH %bb.19 + + bb.19.bb32: + successors: %bb.20(0x80000000) + + + bb.20.bb36: + successors: %bb.21(0x80000000) + + + bb.21.myprint.exit: + S_ENDPGM + + bb.22.bb39: + successors: %bb.18(0x80000000) + + %31:vreg_128 = COPY killed %2 + %36:vreg_128 = COPY killed %31 + S_BRANCH %bb.18 + +...