diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -857,6 +857,9 @@ LiveRegs.clear(); LiveRegs.addLiveOuts(*Pred); MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator(); + // TODO: Optimize out redundant IMPLICIT_DEFs of subregs. NewLiveIns has + // not been through the "Skip the register if we are about to add one of + // its super registers" optimization from addLiveIns() yet. for (Register Reg : NewLiveIns) { if (!LiveRegs.available(*MRI, Reg)) continue; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp @@ -483,13 +483,6 @@ IsSGPR = false; IsAGPR = true; Width = 32; - } else { - assert((AMDGPU::TTMP_32RegClass.contains(Reg) || - AMDGPU::TTMP_64RegClass.contains(Reg) || - AMDGPU::TTMP_128RegClass.contains(Reg) || - AMDGPU::TTMP_256RegClass.contains(Reg) || - AMDGPU::TTMP_512RegClass.contains(Reg)) && - "Unknown register class"); } unsigned HWReg = TRI.getHWRegIndex(Reg); int MaxUsed = HWReg + Width - 1; diff --git a/llvm/test/CodeGen/AMDGPU/resource-usage-agpr-hi16.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-agpr-hi16.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/resource-usage-agpr-hi16.ll @@ -0,0 +1,308 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -O3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s -check-prefix=GFX90A + +; Check that implicit defs of AGPR hi16 regs do not cause +; AMDGPUResourceUsageAnalysis to crash, even though there is no AGPR_HI16 +; register class. +define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 %arg2, i1 %arg3, i1 %arg4, i1 %arg5, i1 %arg6, ptr addrspace(3) %arg7, ptr addrspace(3) %arg8, ptr addrspace(3) %arg9, ptr addrspace(3) %arg10) { +; GFX90A: implicit-def: $agpr{{[0-9]+}}_hi16 +bb: + %i = tail call i32 @llvm.amdgcn.workitem.id.x() + %i11 = icmp eq i32 %i, 0 + %i12 = load i32, ptr addrspace(3) null, align 8 + %i13 = zext i32 %i12 to i64 + %i14 = getelementptr i32, ptr addrspace(1) %arg, i64 %i13 + br i1 %arg3, label %bb15, label %bb103 + +bb15: + %i16 = zext i32 %i to i64 + %i17 = getelementptr i32, ptr addrspace(1) %i14, i64 %i16 + %i18 = ptrtoint ptr addrspace(1) %i17 to i64 + br i1 %arg4, label %bb19, label %bb20 + +bb19: + store i64 %i18, ptr addrspace(5) null, align 8 + unreachable + +bb20: + %i21 = getelementptr i32, ptr addrspace(1) %i17, i64 256 + %i22 = ptrtoint ptr addrspace(1) %i21 to i64 + %i23 = inttoptr i64 %i22 to ptr + %i24 = load i8, ptr %i23, align 1 + %i25 = icmp sge i8 0, %i24 + br i1 %i25, label %bb26, label %bb27 + +bb26: + store i64 %i22, ptr addrspace(5) null, align 8 + unreachable + +bb27: + %i28 = getelementptr i32, ptr addrspace(1) %i17, i64 512 + %i29 = ptrtoint ptr addrspace(1) %i28 to i64 + %i30 = inttoptr i64 %i29 to ptr + %i31 = load i8, ptr %i30, align 1 + %i32 = icmp ne i8 %i31, 0 + br i1 %i32, label %bb33, label %bb34 + +bb33: + store i64 %i29, ptr addrspace(5) null, align 8 + unreachable + +bb34: + %i35 = getelementptr i32, ptr addrspace(1) %i17, i64 768 + %i36 = ptrtoint ptr addrspace(1) %i35 to i64 + %i37 = inttoptr i64 %i36 to ptr + %i38 = load i8, ptr %i37, align 1 + %i39 = icmp ne i8 %i38, 0 + br i1 %i39, label %bb40, label %bb41 + +bb40: + store i64 %i36, ptr addrspace(5) null, align 8 + unreachable + +bb41: + %i42 = getelementptr i32, ptr addrspace(1) %i17, i64 1024 + %i43 = ptrtoint ptr addrspace(1) %i42 to i64 + %i44 = inttoptr i64 %i43 to ptr + %i45 = load i8, ptr %i44, align 1 + %i46 = icmp ne i8 %i45, 0 + br i1 %i46, label %bb47, label %bb48 + +bb47: + store i64 %i43, ptr addrspace(5) null, align 8 + unreachable + +bb48: + %i49 = getelementptr i32, ptr addrspace(1) %i17, i64 1280 + %i50 = ptrtoint ptr addrspace(1) %i49 to i64 + %i51 = inttoptr i64 %i50 to ptr + %i52 = load i8, ptr %i51, align 1 + %i53 = icmp ne i8 %i52, 0 + br i1 %i53, label %bb54, label %bb55 + +bb54: + store i64 %i50, ptr addrspace(5) null, align 8 + unreachable + +bb55: + %i56 = getelementptr i32, ptr addrspace(1) %i17, i64 1536 + %i57 = ptrtoint ptr addrspace(1) %i56 to i64 + %i58 = or i64 %i57, 1 + %i59 = inttoptr i64 %i58 to ptr + br i1 true, label %bb61, label %bb60 + +bb60: + br label %bb63 + +bb61: + br i1 %arg5, label %bb62, label %bb63 + +bb62: + store i64 %i57, ptr addrspace(5) null, align 8 + unreachable + +bb63: + %i64 = ptrtoint ptr addrspace(1) %i14 to i64 + br i1 true, label %bb66, label %bb65 + +bb65: + br label %bb68 + +bb66: + br i1 %arg5, label %bb67, label %bb68 + +bb67: + store i64 %i64, ptr addrspace(5) null, align 8 + unreachable + +bb68: + %i69 = zext i1 %arg5 to i8 + %i70 = getelementptr [2 x i32], ptr addrspace(1) null, i64 %i16 + %i71 = ptrtoint ptr addrspace(1) %i70 to i64 + br i1 %arg5, label %bb72, label %bb73 + +bb72: + call void @f2(i64 %i71) + unreachable + +bb73: + %i74 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 256 + %i75 = ptrtoint ptr addrspace(1) %i74 to i64 + %i76 = inttoptr i64 %i75 to ptr + %i77 = load i8, ptr %i76, align 1 + %i78 = icmp ne i8 %i77, 0 + br i1 %i78, label %bb79, label %bb80 + +bb79: + store i64 %i75, ptr addrspace(5) null, align 8 + unreachable + +bb80: + %i81 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 512 + %i82 = ptrtoint ptr addrspace(1) %i81 to i64 + %i83 = or i64 %i82, 1 + br i1 %arg6, label %bb84, label %bb85 + +bb84: + store i64 %i82, ptr addrspace(5) null, align 8 + unreachable + +bb85: + %i86 = inttoptr i64 %i83 to ptr + %i87 = load i8, ptr %i86, align 1 + %i88 = icmp ne i8 %i87, 0 + br i1 %i88, label %bb89, label %bb90 + +bb89: + store i64 %i83, ptr addrspace(5) null, align 8 + unreachable + +bb90: + %i91 = load i64, ptr addrspace(3) null, align 8 + %i92 = load i64, ptr addrspace(3) %arg8, align 8 + %i93 = load i64, ptr addrspace(3) %arg7, align 8 + %i94 = trunc i64 %i91 to i32 + %i95 = lshr i64 %arg2, 1 + %i96 = trunc i64 %i95 to i32 + %i97 = trunc i64 %i92 to i32 + %i98 = lshr i64 %i92, 32 + %i99 = trunc i64 %i98 to i32 + %i100 = trunc i64 %i93 to i32 + %i101 = lshr i64 %i93, 1 + %i102 = trunc i64 %i101 to i32 + br label %bb127 + +bb103: + br i1 %arg4, label %bb104, label %bb105 + +bb104: + ret void + +bb105: + %i106 = load i64, ptr addrspace(3) null, align 8 + %i107 = load i64, ptr addrspace(3) %arg9, align 8 + %i108 = load i64, ptr addrspace(3) %arg7, align 8 + %i109 = load i64, ptr addrspace(3) %arg10, align 8 + %i110 = load i64, ptr addrspace(3) %arg8, align 8 + %i111 = trunc i64 %i110 to i32 + %i112 = lshr i64 %i110, 32 + %i113 = trunc i64 %i112 to i32 + %i114 = trunc i64 %i106 to i32 + %i115 = lshr i64 %i106, 32 + %i116 = trunc i64 %i115 to i32 + %i117 = trunc i64 %i107 to i32 + %i118 = lshr i64 %i107, 32 + %i119 = trunc i64 %i118 to i32 + %i120 = trunc i64 %i108 to i32 + %i121 = lshr i64 %i108, 32 + %i122 = trunc i64 %i121 to i32 + %i123 = trunc i64 %i109 to i32 + %i124 = lshr i64 %i109, 32 + %i125 = trunc i64 %i124 to i32 + br i1 false, label %bb105.bb127_crit_edge, label %bb140 + +bb105.bb127_crit_edge: + br label %bb127 + +bb127: + %i128 = phi i32 [ %i94, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i129 = phi i32 [ %i96, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i130 = phi i32 [ %i97, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i131 = phi i32 [ %i99, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i132 = phi i8 [ %i69, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i133 = phi i32 [ %i100, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i134 = phi i32 [ %i102, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i135 = phi i64 [ %i91, %bb90 ], [ 0, %bb105.bb127_crit_edge ] + %i136 = zext i1 %arg3 to i8 + %i137 = trunc i64 %i135 to i32 + %i138 = lshr i64 %i135, 1 + %i139 = trunc i64 %i138 to i32 + br label %bb140 + +bb140: + %i141 = phi i32 [ 0, %bb127 ], [ %i111, %bb105 ] + %i142 = phi i32 [ 0, %bb127 ], [ %i113, %bb105 ] + %i143 = phi i32 [ 0, %bb127 ], [ %i114, %bb105 ] + %i144 = phi i32 [ 0, %bb127 ], [ %i116, %bb105 ] + %i145 = phi i32 [ 0, %bb127 ], [ %i117, %bb105 ] + %i146 = phi i32 [ 0, %bb127 ], [ %i119, %bb105 ] + %i147 = phi i32 [ 0, %bb127 ], [ %i120, %bb105 ] + %i148 = phi i32 [ 0, %bb127 ], [ %i122, %bb105 ] + %i149 = phi i32 [ %i128, %bb127 ], [ %i123, %bb105 ] + %i150 = phi i32 [ %i129, %bb127 ], [ %i125, %bb105 ] + %i151 = phi i32 [ %i130, %bb127 ], [ 0, %bb105 ] + %i152 = phi i32 [ %i131, %bb127 ], [ 0, %bb105 ] + %i153 = phi i8 [ %i132, %bb127 ], [ 0, %bb105 ] + %i154 = phi i32 [ %i133, %bb127 ], [ 0, %bb105 ] + %i155 = phi i32 [ %i134, %bb127 ], [ 0, %bb105 ] + %i156 = phi i32 [ %i137, %bb127 ], [ 0, %bb105 ] + %i157 = phi i32 [ %i139, %bb127 ], [ 0, %bb105 ] + %i158 = phi i8 [ %i136, %bb127 ], [ 0, %bb105 ] + br i1 %arg4, label %bb159, label %bb174 + +bb159: + br i1 %i11, label %bb160, label %bb161 + +bb160: + unreachable + +bb161: + %i162 = or i32 %i146, %i144 + %i163 = or i32 %i162, %i142 + %i164 = or i32 %i150, %i148 + %i165 = or i32 %i164, %i163 + %i166 = icmp ne i8 %i153, 0 + %i167 = select i1 %i166, i32 0, i32 %i165 + %i168 = or i32 %i155, %i152 + %i169 = or i32 %i168, %i167 + %i170 = icmp ne i8 %i158, 0 + %i171 = select i1 %i170, i32 0, i32 %i169 + %i172 = or i32 %i171, %i157 + %i173 = zext i32 %i172 to i64 + store i64 %i173, ptr addrspace(3) null, align 4 + unreachable + +bb174: + %i175 = or i32 1, %i141 + %i176 = or i32 %i175, %i143 + %i177 = or i32 %i176, %i145 + %i178 = select i1 %arg3, i32 0, i32 %i177 + %i179 = or i32 %i178, %i147 + %i180 = or i32 %i179, %i149 + %i181 = or i32 %i180, %i151 + %i182 = select i1 %arg3, i32 %i181, i32 0 + %i183 = or i32 %i182, %i154 + %i184 = or i32 %i183, %i156 + %i185 = getelementptr [2 x i32], ptr addrspace(1) %arg1, i64 %i13 + br i1 %arg3, label %bb186, label %bb196 + +bb186: + %i187 = zext i32 %i175 to i64 + %i188 = zext i32 %i176 to i64 + %i189 = zext i32 %i177 to i64 + %i190 = zext i32 %i179 to i64 + %i191 = zext i32 %i180 to i64 + %i192 = zext i32 %i178 to i64 + %i193 = zext i32 %i182 to i64 + %i194 = zext i32 %i181 to i64 + store i64 %i187, ptr addrspace(3) null, align 8 + store i64 %i188, ptr addrspace(3) %arg7, align 8 + store i64 %i189, ptr addrspace(3) %arg8, align 8 + store i64 %i190, ptr addrspace(3) null, align 8 + store i64 %i191, ptr addrspace(3) %arg7, align 8 + store i64 %i192, ptr addrspace(3) null, align 8 + store i64 %i193, ptr addrspace(3) %arg7, align 8 + store i64 %i194, ptr addrspace(3) null, align 8 + %i195 = ptrtoint ptr addrspace(1) %i185 to i64 + store i64 %i195, ptr addrspace(5) null, align 8 + unreachable + +bb196: + %i197 = zext i32 %i184 to i64 + store i64 %i197, ptr addrspace(3) null, align 8 + unreachable +} + +declare void @f2(i64) + +declare i32 @llvm.amdgcn.workitem.id.x()