Index: llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp +++ llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp @@ -1571,11 +1571,17 @@ // Deny any overlapping intervals. This depends on all the reserved // register live ranges to look like dead defs. if (!MRI->isConstantPhysReg(DstReg)) { - for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) + for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) { + // Abort if not all the regunits are reserved. + for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) { + if (!MRI->isReserved(*RI)) + return false; + } if (RHS.overlaps(LIS->getRegUnit(*UI))) { DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); return false; } + } } // Skip any value computations, we are not adding new values to the Index: llvm/trunk/test/CodeGen/AArch64/regcoal-constreg.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/regcoal-constreg.mir +++ llvm/trunk/test/CodeGen/AArch64/regcoal-constreg.mir @@ -1,31 +0,0 @@ -# RUN: llc -mtriple=aarch64-- -run-pass=simple-register-coalescing %s -o - | FileCheck %s ---- | - define void @func() { ret void } -... ---- -# Check that we eliminate copies to/from constant physregs regardless of -# "interfering" reads/writes. -# CHECK: name: func -# CHECK-NOT: COPY -# CHECK: STRWui %wzr, %x1 -# CHECK-NOT: COPY -# CHECK: STRXui %xzr, %x1 -# CHECK: %wzr = SUBSWri %w1, 0, 0 -name: func -registers: - - { id: 0, class: gpr32 } - - { id: 1, class: gpr64 } - - { id: 2, class: gpr32 } -body: | - bb.0: - %0 = COPY %wzr - dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv - STRWui %0, %x1, 0 - - %1 = COPY %xzr - dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv - STRXui %1, %x1, 0 - - %2 = SUBSWri %w1, 0, 0, implicit-def %nzcv - %wzr = COPY %2 -... Index: llvm/trunk/test/CodeGen/AArch64/regcoal-physreg.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/regcoal-physreg.mir +++ llvm/trunk/test/CodeGen/AArch64/regcoal-physreg.mir @@ -0,0 +1,67 @@ +# RUN: llc -mtriple=aarch64-apple-ios -run-pass=simple-register-coalescing %s -o - | FileCheck %s +--- | + define void @func() { ret void } +... +--- +# Check coalescing of COPYs from reserved physregs. +# CHECK-LABEL: name: func +name: func +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr64 } + - { id: 2, class: gpr64 } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr64 } + - { id: 5, class: gpr32 } + - { id: 6, class: xseqpairsclass } +body: | + bb.0: + ; We usually should not coalesce copies from allocatable physregs. + ; CHECK: %0 = COPY %w7 + ; CHECK: STRWui %0, %x1, 0 + %0 = COPY %w7 + STRWui %0, %x1, 0 + + ; It is fine to coalesce copies from reserved physregs + ; CHECK-NOT: COPY + ; CHECK: STRXui %fp, %x1, 0 + %1 = COPY %fp + STRXui %1, %x1, 0 + + ; It is not fine to coalesce copies from reserved physregs when they are + ; clobbered. + ; CHECK: %2 = COPY %fp + ; CHECK: STRXui %2, %x1, 0 + %2 = COPY %fp + %fp = SUBXri %fp, 4, 0 + STRXui %2, %x1, 0 + + ; Is is fine to coalesce copies from constant physregs even when they are + ; clobbered. + ; CHECK-NOT: COPY + ; CHECK: STRWui %wzr, %x1 + %3 = COPY %wzr + dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv + STRWui %3, %x1, 0 + + ; Is is fine to coalesce copies from constant physregs even when they are + ; clobbered. + ; CHECK-NOT: COPY + ; CHECK: STRXui %xzr, %x1 + %4 = COPY %xzr + dead %wzr = SUBSWri %w1, 0, 0, implicit-def %nzcv + STRXui %4, %x1, 0 + + ; Coalescing COPYs into constant physregs. + ; CHECK: %wzr = SUBSWri %w1, 0, 0 + %5 = SUBSWri %w1, 0, 0, implicit-def %nzcv + %wzr = COPY %5 + + ; Only coalesce when the source register is reserved as a whole (this is + ; a limitation of the current code which cannot update liveness information + ; of the non-reserved part). + ; CHECK: %6 = COPY %xzr_x0 + ; CHECK: HINT 0, implicit %6 + %6 = COPY %xzr_x0 + HINT 0, implicit %6 +... Index: llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll +++ llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll @@ -78,40 +78,41 @@ ret void } -; ALL-LABEL: max_12_sgprs_12_input_sgprs{{$}} +; The following test is commented out for now; http://llvm.org/PR31230 +; XALL-LABEL: max_12_sgprs_12_input_sgprs{{$}} ; ; Make sure copies for input buffer are not clobbered. This requires ; ; swapping the order the registers are copied from what normally ; ; happens. -; TOSMEM: s_mov_b32 s5, s11 -; TOSMEM: s_add_u32 m0, s5, -; TOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0 - -; ALL: SGPRBlocks: 2 -; ALL: NumSGPRsForWavesPerEU: 18 -define void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1, - i32 addrspace(1)* %out2, - i32 addrspace(1)* %out3, - i32 addrspace(1)* %out4, - i32 %one, i32 %two, i32 %three, i32 %four) #2 { - store volatile i32 0, i32* undef - %x.0 = call i32 @llvm.amdgcn.workgroup.id.x() - store volatile i32 %x.0, i32 addrspace(1)* undef - %x.1 = call i32 @llvm.amdgcn.workgroup.id.y() - store volatile i32 %x.0, i32 addrspace(1)* undef - %x.2 = call i32 @llvm.amdgcn.workgroup.id.z() - store volatile i32 %x.0, i32 addrspace(1)* undef - %x.3 = call i64 @llvm.amdgcn.dispatch.id() - store volatile i64 %x.3, i64 addrspace(1)* undef - %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() - store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef - - store i32 %one, i32 addrspace(1)* %out1 - store i32 %two, i32 addrspace(1)* %out2 - store i32 %three, i32 addrspace(1)* %out3 - store i32 %four, i32 addrspace(1)* %out4 - ret void -} +; XTOSMEM: s_mov_b32 s5, s11 +; XTOSMEM: s_add_u32 m0, s5, +; XTOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0 + +; XALL: SGPRBlocks: 2 +; XALL: NumSGPRsForWavesPerEU: 18 +;define void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1, +; i32 addrspace(1)* %out2, +; i32 addrspace(1)* %out3, +; i32 addrspace(1)* %out4, +; i32 %one, i32 %two, i32 %three, i32 %four) #2 { +; store volatile i32 0, i32* undef +; %x.0 = call i32 @llvm.amdgcn.workgroup.id.x() +; store volatile i32 %x.0, i32 addrspace(1)* undef +; %x.1 = call i32 @llvm.amdgcn.workgroup.id.y() +; store volatile i32 %x.0, i32 addrspace(1)* undef +; %x.2 = call i32 @llvm.amdgcn.workgroup.id.z() +; store volatile i32 %x.0, i32 addrspace(1)* undef +; %x.3 = call i64 @llvm.amdgcn.dispatch.id() +; store volatile i64 %x.3, i64 addrspace(1)* undef +; %x.4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() +; store volatile i8 addrspace(2)* %x.4, i8 addrspace(2)* addrspace(1)* undef +; +; store i32 %one, i32 addrspace(1)* %out1 +; store i32 %two, i32 addrspace(1)* %out2 +; store i32 %three, i32 addrspace(1)* %out3 +; store i32 %four, i32 addrspace(1)* %out4 +; ret void +;} declare i32 @llvm.amdgcn.workgroup.id.x() #1 declare i32 @llvm.amdgcn.workgroup.id.y() #1