diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp --- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -86,8 +86,13 @@ LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent(); dbgs() << "Checking use: " << MIUse << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); - if (isLocalUse(MOUse, MI, InsertMBB)) + if (isLocalUse(MOUse, MI, InsertMBB)) { + // Even if we're in the same block, if the block is very large we could + // still have many long live ranges. Try to do intra-block localization + // too. + LocalizedInstrs.insert(&MI); continue; + } LLVM_DEBUG(dbgs() << "Fixing non-local use\n"); Changed = true; auto MBBAndReg = std::make_pair(InsertMBB, Reg); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll @@ -19,14 +19,14 @@ ; CHECK: bb.1.entry: ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; CHECK: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1 - ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 ; CHECK: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 - ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 + ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 ; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 - ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (dereferenceable load 4 from @var1) - ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(ne), [[LOAD]](s32), [[C]] + ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.3 ; CHECK: bb.2.if.then: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir b/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer.mir @@ -39,6 +39,7 @@ } define void @test_inttoptr() { ret void } + define void @many_local_use_intra_block() { ret void } ... @@ -335,15 +336,15 @@ ; CHECK-LABEL: name: intrablock_with_globalvalue ; CHECK: bb.0.entry: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1 - ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 - ; CHECK: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 - ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 - ; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 - ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV]](p0) :: (load 4 from @var1) - ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C]] + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 + ; CHECK: [[GV:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var2 + ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3 + ; CHECK: [[GV1:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var3 + ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[GV2:%[0-9]+]]:gpr(p0) = G_GLOBAL_VALUE @var1 + ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[GV2]](p0) :: (load 4 from @var1) + ; CHECK: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]] ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1 ; CHECK: G_BR %bb.2 @@ -403,12 +404,12 @@ ; CHECK: liveins: $w0, $x1 ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; CHECK: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $x1 - ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 128 - ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 0 - ; CHECK: [[INTTOPTR:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C2]](s64) - ; CHECK: [[INTTOPTR1:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C]](s64) - ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] + ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 0 + ; CHECK: [[INTTOPTR:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C]](s64) + ; CHECK: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 128 + ; CHECK: [[INTTOPTR1:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[C1]](s64) + ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C2]] ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1 ; CHECK: G_BR %bb.2 @@ -450,3 +451,28 @@ RET_ReallyLR implicit $x0 ... + +--- +name: many_local_use_intra_block +legalized: true +regBankSelected: true +body: | + bb.0: + ; CHECK-LABEL: name: many_local_use_intra_block + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; CHECK: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]] + ; CHECK: [[ADD1:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]] + ; CHECK: [[ADD2:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]] + ; CHECK: [[ADD3:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]] + ; CHECK: [[ADD4:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 2 + ; CHECK: [[ADD5:%[0-9]+]]:gpr(s32) = G_ADD [[C1]], [[C1]] + %0:gpr(s32) = G_CONSTANT i32 1 + %1:gpr(s32) = G_CONSTANT i32 2 + %2:gpr(s32) = G_ADD %0, %0 + %3:gpr(s32) = G_ADD %0, %0 + %4:gpr(s32) = G_ADD %0, %0 + %5:gpr(s32) = G_ADD %0, %0 + %6:gpr(s32) = G_ADD %0, %0 + %7:gpr(s32) = G_ADD %1, %1 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll @@ -8,9 +8,9 @@ ; that takes a swifterror parameter and "caller" is the caller of "foo". define float @foo(%swift_error** swifterror %error_ptr_ref) { ; CHECK-LABEL: foo: -; CHECK: mov [[ID:w[0-9]+]], #1 ; CHECK: mov w0, #16 ; CHECK: malloc +; CHECK: mov [[ID:w[0-9]+]], #1 ; CHECK: strb [[ID]], [x0, #8] ; CHECK: mov x21, x0 ; CHECK-NOT: x21 @@ -164,9 +164,9 @@ define void @foo_sret(%struct.S* sret %agg.result, i32 %val1, %swift_error** swifterror %error_ptr_ref) { ; CHECK-LABEL: foo_sret: ; CHECK: mov [[SRET:x[0-9]+]], x8 -; CHECK: mov [[ID:w[0-9]+]], #1 ; CHECK: mov w0, #16 ; CHECK: malloc +; CHECK: mov [[ID:w[0-9]+]], #1 ; CHECK: strb [[ID]], [x0, #8] ; CHECK: str w{{.*}}, [{{.*}}[[SRET]], #4] ; CHECK: mov x21, x0 @@ -220,9 +220,9 @@ declare void @llvm.va_start(i8*) nounwind define float @foo_vararg(%swift_error** swifterror %error_ptr_ref, ...) { ; CHECK-LABEL: foo_vararg: -; CHECK-DAG: mov [[ID:w[0-9]+]], #1 ; CHECK: mov w0, #16 ; CHECK: malloc +; CHECK-DAG: mov [[ID:w[0-9]+]], #1 ; CHECK-DAG: strb [[ID]], [x0, #8] ; First vararg diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll b/llvm/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/translate-constant-dag.ll @@ -9,12 +9,12 @@ ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 10 - ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 20 - ; CHECK: [[MOVi32imm2:%[0-9]+]]:gpr32 = MOVi32imm 50 ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 0 :: (store 1 into %ir.dst) + ; CHECK: [[MOVi32imm1:%[0-9]+]]:gpr32 = MOVi32imm 20 ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 1 :: (store 1 into %ir.dst + 1) ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 2 :: (store 1 into %ir.dst + 2) ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 3 :: (store 1 into %ir.dst + 3) + ; CHECK: [[MOVi32imm2:%[0-9]+]]:gpr32 = MOVi32imm 50 ; CHECK: STRBBui [[MOVi32imm2]], [[COPY]], 4 :: (store 1 into %ir.dst + 4) ; CHECK: STRBBui [[MOVi32imm]], [[COPY]], 5 :: (store 1 into %ir.dst + 5) ; CHECK: STRBBui [[MOVi32imm1]], [[COPY]], 6 :: (store 1 into %ir.dst + 6) diff --git a/llvm/test/CodeGen/AArch64/tail-call.ll b/llvm/test/CodeGen/AArch64/tail-call.ll --- a/llvm/test/CodeGen/AArch64/tail-call.ll +++ b/llvm/test/CodeGen/AArch64/tail-call.ll @@ -121,11 +121,10 @@ ; SDAG: fmov s2, #1.0 ; SDAG: ret ; GISEL-LABEL: test_add_elem: -; GISEL: fmov s8, #1.00000000 +; GISEL: str x30, [sp, #-16]! ; GISEL: bl get_vec2 -; GISEL: ldr x30, [sp, #8] -; GISEL: mov v2.16b, v8.16b -; GISEL: ldr d8, [sp], #16 +; GISEL: fmov s2, #1.0 +; GISEL: ldr x30, [sp], #16 ; GISEL: ret %call = tail call { [2 x float] } @get_vec2() diff --git a/llvm/test/CodeGen/AArch64/tiny_model.ll b/llvm/test/CodeGen/AArch64/tiny_model.ll --- a/llvm/test/CodeGen/AArch64/tiny_model.ll +++ b/llvm/test/CodeGen/AArch64/tiny_model.ll @@ -60,9 +60,9 @@ ; ; CHECK-GLOBISEL-LABEL: foo2: ; CHECK-GLOBISEL: // %bb.0: // %entry -; CHECK-GLOBISEL-NEXT: adr x8, dst -; CHECK-GLOBISEL-NEXT: adr x9, ptr -; CHECK-GLOBISEL-NEXT: str x8, [x9] +; CHECK-GLOBISEL-NEXT: adr x8, ptr +; CHECK-GLOBISEL-NEXT: adr x9, dst +; CHECK-GLOBISEL-NEXT: str x9, [x8] ; CHECK-GLOBISEL-NEXT: ret ; ; CHECK-PIC-LABEL: foo2: @@ -74,9 +74,9 @@ ; ; CHECK-PIC-GLOBISEL-LABEL: foo2: ; CHECK-PIC-GLOBISEL: // %bb.0: // %entry -; CHECK-PIC-GLOBISEL-NEXT: ldr x8, :got:dst -; CHECK-PIC-GLOBISEL-NEXT: ldr x9, :got:ptr -; CHECK-PIC-GLOBISEL-NEXT: str x8, [x9] +; CHECK-PIC-GLOBISEL-NEXT: ldr x8, :got:ptr +; CHECK-PIC-GLOBISEL-NEXT: ldr x9, :got:dst +; CHECK-PIC-GLOBISEL-NEXT: str x9, [x8] ; CHECK-PIC-GLOBISEL-NEXT: ret entry: store i8* getelementptr inbounds ([65536 x i8], [65536 x i8]* @dst, i64 0, i64 0), i8** @ptr, align 8 @@ -179,9 +179,9 @@ ; ; CHECK-GLOBISEL-LABEL: bar2: ; CHECK-GLOBISEL: // %bb.0: // %entry -; CHECK-GLOBISEL-NEXT: adr x8, ldst -; CHECK-GLOBISEL-NEXT: adr x9, lptr -; CHECK-GLOBISEL-NEXT: str x8, [x9] +; CHECK-GLOBISEL-NEXT: adr x8, lptr +; CHECK-GLOBISEL-NEXT: adr x9, ldst +; CHECK-GLOBISEL-NEXT: str x9, [x8] ; CHECK-GLOBISEL-NEXT: ret ; ; CHECK-PIC-LABEL: bar2: @@ -193,9 +193,9 @@ ; ; CHECK-PIC-GLOBISEL-LABEL: bar2: ; CHECK-PIC-GLOBISEL: // %bb.0: // %entry -; CHECK-PIC-GLOBISEL-NEXT: adr x8, ldst -; CHECK-PIC-GLOBISEL-NEXT: adr x9, lptr -; CHECK-PIC-GLOBISEL-NEXT: str x8, [x9] +; CHECK-PIC-GLOBISEL-NEXT: adr x8, lptr +; CHECK-PIC-GLOBISEL-NEXT: adr x9, ldst +; CHECK-PIC-GLOBISEL-NEXT: str x9, [x8] ; CHECK-PIC-GLOBISEL-NEXT: ret entry: store i8* @ldst, i8** @lptr, align 8 @@ -297,9 +297,9 @@ ; ; CHECK-GLOBISEL-LABEL: baz2: ; CHECK-GLOBISEL: // %bb.0: // %entry -; CHECK-GLOBISEL-NEXT: adr x8, lbdst -; CHECK-GLOBISEL-NEXT: adr x9, lptr -; CHECK-GLOBISEL-NEXT: str x8, [x9] +; CHECK-GLOBISEL-NEXT: adr x8, lptr +; CHECK-GLOBISEL-NEXT: adr x9, lbdst +; CHECK-GLOBISEL-NEXT: str x9, [x8] ; CHECK-GLOBISEL-NEXT: ret ; ; CHECK-PIC-LABEL: baz2: @@ -311,9 +311,9 @@ ; ; CHECK-PIC-GLOBISEL-LABEL: baz2: ; CHECK-PIC-GLOBISEL: // %bb.0: // %entry -; CHECK-PIC-GLOBISEL-NEXT: adr x8, lbdst -; CHECK-PIC-GLOBISEL-NEXT: adr x9, lptr -; CHECK-PIC-GLOBISEL-NEXT: str x8, [x9] +; CHECK-PIC-GLOBISEL-NEXT: adr x8, lptr +; CHECK-PIC-GLOBISEL-NEXT: adr x9, lbdst +; CHECK-PIC-GLOBISEL-NEXT: str x9, [x8] ; CHECK-PIC-GLOBISEL-NEXT: ret entry: store i8* getelementptr inbounds ([65536 x i8], [65536 x i8]* @lbdst, i64 0, i64 0), i8** @lptr, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll @@ -200,12 +200,12 @@ ; GFX9: liveins: $sgpr2, $sgpr3 ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; GFX9: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY2]] - ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY3]], [[COPY4]] ; GFX9: $vgpr0 = COPY [[SELECT]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -214,12 +214,12 @@ ; GFX10: liveins: $sgpr2, $sgpr3 ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; GFX10: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY2]] - ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY3]], [[COPY4]] ; GFX10: $vgpr0 = COPY [[SELECT]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -336,13 +336,13 @@ ; GFX9: liveins: $sgpr2, $sgpr3 ; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 ; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) ; GFX9: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; GFX9: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; GFX9: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; GFX9: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GFX9: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[INT]](s1), [[COPY4]], [[COPY5]] ; GFX9: $vgpr0 = COPY [[SELECT]](s32) ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 @@ -351,13 +351,13 @@ ; GFX10: liveins: $sgpr2, $sgpr3 ; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 ; GFX10: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) ; GFX10: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; GFX10: [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; GFX10: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX10: [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; GFX10: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GFX10: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[INT]](s1), [[COPY4]], [[COPY5]] ; GFX10: $vgpr0 = COPY [[SELECT]](s32) ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -6,14 +6,14 @@ ; GPRIDX-LABEL: dyn_extract_v8f32_const_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GPRIDX-NEXT: s_mov_b32 s4, 1.0 -; GPRIDX-NEXT: s_mov_b32 s5, 2.0 -; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 -; GPRIDX-NEXT: s_mov_b32 s7, 4.0 -; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 -; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 -; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 ; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 +; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 +; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 +; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 +; GPRIDX-NEXT: s_mov_b32 s7, 4.0 +; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 +; GPRIDX-NEXT: s_mov_b32 s5, 2.0 +; GPRIDX-NEXT: s_mov_b32 s4, 1.0 ; GPRIDX-NEXT: s_mov_b64 s[12:13], exec ; GPRIDX-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s14, v0 @@ -32,14 +32,14 @@ ; MOVREL-LABEL: dyn_extract_v8f32_const_s_v: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MOVREL-NEXT: s_mov_b32 s4, 1.0 -; MOVREL-NEXT: s_mov_b32 s5, 2.0 -; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 -; MOVREL-NEXT: s_mov_b32 s7, 4.0 -; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 -; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 -; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 ; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 +; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 +; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 +; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 +; MOVREL-NEXT: s_mov_b32 s7, 4.0 +; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 +; MOVREL-NEXT: s_mov_b32 s5, 2.0 +; MOVREL-NEXT: s_mov_b32 s4, 1.0 ; MOVREL-NEXT: s_mov_b64 s[12:13], exec ; MOVREL-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s14, v0 @@ -64,13 +64,13 @@ ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b32 s4, 1.0 ; GPRIDX-NEXT: s_mov_b32 m0, s2 -; GPRIDX-NEXT: s_mov_b32 s5, 2.0 -; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 -; GPRIDX-NEXT: s_mov_b32 s7, 4.0 -; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 -; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 -; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 ; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 +; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 +; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 +; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 +; GPRIDX-NEXT: s_mov_b32 s7, 4.0 +; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 +; GPRIDX-NEXT: s_mov_b32 s5, 2.0 ; GPRIDX-NEXT: s_movrels_b32 s0, s4 ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: ; return to shader part epilog @@ -79,13 +79,13 @@ ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b32 s4, 1.0 ; MOVREL-NEXT: s_mov_b32 m0, s2 -; MOVREL-NEXT: s_mov_b32 s5, 2.0 -; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 -; MOVREL-NEXT: s_mov_b32 s7, 4.0 -; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 -; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 -; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 ; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 +; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 +; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 +; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 +; MOVREL-NEXT: s_mov_b32 s7, 4.0 +; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 +; MOVREL-NEXT: s_mov_b32 s5, 2.0 ; MOVREL-NEXT: s_movrels_b32 s0, s4 ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: ; return to shader part epilog @@ -246,14 +246,14 @@ ; GPRIDX-LABEL: dyn_extract_v8i64_const_s_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 -; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 -; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 -; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 -; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 -; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 -; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 ; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 +; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 +; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 +; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 +; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 +; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 +; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 +; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 ; GPRIDX-NEXT: s_mov_b64 s[20:21], exec ; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 ; GPRIDX-NEXT: v_readfirstlane_b32 s22, v0 @@ -273,14 +273,14 @@ ; MOVREL-LABEL: dyn_extract_v8i64_const_s_v: ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MOVREL-NEXT: s_mov_b64 s[4:5], 1 -; MOVREL-NEXT: s_mov_b64 s[6:7], 2 -; MOVREL-NEXT: s_mov_b64 s[8:9], 3 -; MOVREL-NEXT: s_mov_b64 s[10:11], 4 -; MOVREL-NEXT: s_mov_b64 s[12:13], 5 -; MOVREL-NEXT: s_mov_b64 s[14:15], 6 -; MOVREL-NEXT: s_mov_b64 s[16:17], 7 ; MOVREL-NEXT: s_mov_b64 s[18:19], 8 +; MOVREL-NEXT: s_mov_b64 s[16:17], 7 +; MOVREL-NEXT: s_mov_b64 s[14:15], 6 +; MOVREL-NEXT: s_mov_b64 s[12:13], 5 +; MOVREL-NEXT: s_mov_b64 s[10:11], 4 +; MOVREL-NEXT: s_mov_b64 s[8:9], 3 +; MOVREL-NEXT: s_mov_b64 s[6:7], 2 +; MOVREL-NEXT: s_mov_b64 s[4:5], 1 ; MOVREL-NEXT: s_mov_b64 s[20:21], exec ; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 ; MOVREL-NEXT: v_readfirstlane_b32 s22, v0 @@ -306,13 +306,13 @@ ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 ; GPRIDX-NEXT: s_mov_b32 m0, s2 -; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 -; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 -; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 -; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 -; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 -; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 ; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 +; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 +; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 +; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 +; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 +; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 +; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5] ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 @@ -323,13 +323,13 @@ ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_mov_b64 s[4:5], 1 ; MOVREL-NEXT: s_mov_b32 m0, s2 -; MOVREL-NEXT: s_mov_b64 s[6:7], 2 -; MOVREL-NEXT: s_mov_b64 s[8:9], 3 -; MOVREL-NEXT: s_mov_b64 s[10:11], 4 -; MOVREL-NEXT: s_mov_b64 s[12:13], 5 -; MOVREL-NEXT: s_mov_b64 s[14:15], 6 -; MOVREL-NEXT: s_mov_b64 s[16:17], 7 ; MOVREL-NEXT: s_mov_b64 s[18:19], 8 +; MOVREL-NEXT: s_mov_b64 s[16:17], 7 +; MOVREL-NEXT: s_mov_b64 s[14:15], 6 +; MOVREL-NEXT: s_mov_b64 s[12:13], 5 +; MOVREL-NEXT: s_mov_b64 s[10:11], 4 +; MOVREL-NEXT: s_mov_b64 s[8:9], 3 +; MOVREL-NEXT: s_mov_b64 s[6:7], 2 ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5] ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -81,13 +81,13 @@ ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 -; GPRIDX-NEXT: s_mov_b32 s4, 1.0 -; GPRIDX-NEXT: s_mov_b32 s5, 2.0 -; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 -; GPRIDX-NEXT: s_mov_b32 s7, 4.0 -; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 -; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 ; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 +; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 +; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 +; GPRIDX-NEXT: s_mov_b32 s7, 4.0 +; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 +; GPRIDX-NEXT: s_mov_b32 s5, 2.0 +; GPRIDX-NEXT: s_mov_b32 s4, 1.0 ; GPRIDX-NEXT: v_mov_b32_e32 v17, s11 ; GPRIDX-NEXT: v_mov_b32_e32 v16, s10 ; GPRIDX-NEXT: v_mov_b32_e32 v15, s9 @@ -131,18 +131,18 @@ ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: s_waitcnt_vscnt null, 0x0 ; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 -; MOVREL-NEXT: s_mov_b32 s4, 1.0 -; MOVREL-NEXT: s_mov_b32 s5, 2.0 -; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 -; MOVREL-NEXT: s_mov_b32 s7, 4.0 -; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 -; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 ; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 +; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 +; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 +; MOVREL-NEXT: s_mov_b32 s7, 4.0 +; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 +; MOVREL-NEXT: s_mov_b32 s5, 2.0 +; MOVREL-NEXT: s_mov_b32 s4, 1.0 ; MOVREL-NEXT: v_mov_b32_e32 v17, s11 -; MOVREL-NEXT: v_mov_b32_e32 v13, s7 -; MOVREL-NEXT: v_mov_b32_e32 v14, s8 -; MOVREL-NEXT: v_mov_b32_e32 v15, s9 ; MOVREL-NEXT: v_mov_b32_e32 v16, s10 +; MOVREL-NEXT: v_mov_b32_e32 v15, s9 +; MOVREL-NEXT: v_mov_b32_e32 v14, s8 +; MOVREL-NEXT: v_mov_b32_e32 v13, s7 ; MOVREL-NEXT: v_mov_b32_e32 v12, s6 ; MOVREL-NEXT: v_mov_b32_e32 v11, s5 ; MOVREL-NEXT: v_mov_b32_e32 v10, s4 @@ -725,22 +725,22 @@ ; GPRIDX-LABEL: dyn_insertelement_v8f64_const_s_v_v: ; GPRIDX: ; %bb.0: ; %entry ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GPRIDX-NEXT: s_mov_b32 s8, 0 +; GPRIDX-NEXT: s_mov_b32 s18, 0 ; GPRIDX-NEXT: s_mov_b32 s19, 0x40200000 ; GPRIDX-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GPRIDX-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GPRIDX-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill -; GPRIDX-NEXT: s_mov_b64 s[4:5], 1.0 -; GPRIDX-NEXT: s_mov_b64 s[6:7], 2.0 -; GPRIDX-NEXT: s_mov_b32 s9, 0x40080000 -; GPRIDX-NEXT: s_mov_b64 s[10:11], 4.0 -; GPRIDX-NEXT: s_mov_b32 s13, 0x40140000 -; GPRIDX-NEXT: s_mov_b32 s12, s8 -; GPRIDX-NEXT: s_mov_b32 s15, 0x40180000 -; GPRIDX-NEXT: s_mov_b32 s14, s8 ; GPRIDX-NEXT: s_mov_b32 s17, 0x401c0000 -; GPRIDX-NEXT: s_mov_b32 s16, s8 -; GPRIDX-NEXT: s_mov_b32 s18, s8 +; GPRIDX-NEXT: s_mov_b32 s16, s18 +; GPRIDX-NEXT: s_mov_b32 s15, 0x40180000 +; GPRIDX-NEXT: s_mov_b32 s14, s18 +; GPRIDX-NEXT: s_mov_b32 s13, 0x40140000 +; GPRIDX-NEXT: s_mov_b32 s12, s18 +; GPRIDX-NEXT: s_mov_b64 s[10:11], 4.0 +; GPRIDX-NEXT: s_mov_b32 s9, 0x40080000 +; GPRIDX-NEXT: s_mov_b32 s8, s18 +; GPRIDX-NEXT: s_mov_b64 s[6:7], 2.0 +; GPRIDX-NEXT: s_mov_b64 s[4:5], 1.0 ; GPRIDX-NEXT: v_mov_b32_e32 v34, s19 ; GPRIDX-NEXT: v_mov_b32_e32 v33, s18 ; GPRIDX-NEXT: v_mov_b32_e32 v32, s17 @@ -803,26 +803,26 @@ ; MOVREL: ; %bb.0: ; %entry ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; MOVREL-NEXT: s_waitcnt_vscnt null, 0x0 -; MOVREL-NEXT: s_mov_b32 s8, 0 +; MOVREL-NEXT: s_mov_b32 s18, 0 ; MOVREL-NEXT: s_mov_b32 s19, 0x40200000 -; MOVREL-NEXT: s_mov_b64 s[4:5], 1.0 -; MOVREL-NEXT: s_mov_b64 s[6:7], 2.0 -; MOVREL-NEXT: s_mov_b32 s9, 0x40080000 -; MOVREL-NEXT: s_mov_b64 s[10:11], 4.0 -; MOVREL-NEXT: s_mov_b32 s13, 0x40140000 -; MOVREL-NEXT: s_mov_b32 s12, s8 -; MOVREL-NEXT: s_mov_b32 s15, 0x40180000 -; MOVREL-NEXT: s_mov_b32 s14, s8 ; MOVREL-NEXT: s_mov_b32 s17, 0x401c0000 -; MOVREL-NEXT: s_mov_b32 s16, s8 -; MOVREL-NEXT: s_mov_b32 s18, s8 +; MOVREL-NEXT: s_mov_b32 s15, 0x40180000 +; MOVREL-NEXT: s_mov_b32 s13, 0x40140000 +; MOVREL-NEXT: s_mov_b32 s16, s18 +; MOVREL-NEXT: s_mov_b32 s14, s18 +; MOVREL-NEXT: s_mov_b32 s12, s18 +; MOVREL-NEXT: s_mov_b64 s[10:11], 4.0 +; MOVREL-NEXT: s_mov_b32 s9, 0x40080000 +; MOVREL-NEXT: s_mov_b32 s8, s18 +; MOVREL-NEXT: s_mov_b64 s[6:7], 2.0 +; MOVREL-NEXT: s_mov_b64 s[4:5], 1.0 ; MOVREL-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; MOVREL-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; MOVREL-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill ; MOVREL-NEXT: v_mov_b32_e32 v34, s19 +; MOVREL-NEXT: v_mov_b32_e32 v33, s18 ; MOVREL-NEXT: v_mov_b32_e32 v32, s17 ; MOVREL-NEXT: v_mov_b32_e32 v31, s16 -; MOVREL-NEXT: v_mov_b32_e32 v33, s18 ; MOVREL-NEXT: v_mov_b32_e32 v30, s15 ; MOVREL-NEXT: v_mov_b32_e32 v29, s14 ; MOVREL-NEXT: v_mov_b32_e32 v28, s13 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll @@ -203,8 +203,8 @@ ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967297: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_mov_b32 s0, 4 -; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: s_mov_b32 s1, s0 +; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 @@ -213,8 +213,8 @@ ; GFX7-LABEL: mubuf_store_vgpr_ptr_offset4294967297: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_mov_b32 s0, 4 -; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: s_mov_b32 s1, s0 +; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 @@ -252,11 +252,11 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg %ptr, i32 inreg %soffset) { ; GFX6-LABEL: mubuf_store_sgpr_ptr_sgpr_offset: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x200000 -; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 -; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_mov_b32 s0, s2 ; GFX6-NEXT: s_mov_b32 s1, s3 +; GFX6-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x200000 +; GFX6-NEXT: s_lshl_b64 s[4:5], s[2:3], 2 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 @@ -266,11 +266,11 @@ ; ; GFX7-LABEL: mubuf_store_sgpr_ptr_sgpr_offset: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x200000 -; GFX7-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 -; GFX7-NEXT: v_mov_b32_e32 v0, s4 ; GFX7-NEXT: s_mov_b32 s0, s2 ; GFX7-NEXT: s_mov_b32 s1, s3 +; GFX7-NEXT: s_bfe_i64 s[2:3], s[4:5], 0x200000 +; GFX7-NEXT: s_lshl_b64 s[4:5], s[2:3], 2 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 ; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 @@ -286,21 +286,21 @@ ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 -; GFX6-NEXT: s_mov_b32 s6, 0 -; GFX6-NEXT: s_lshl_b64 s[4:5], s[0:1], 2 +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 +; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0 -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 -; GFX7-NEXT: s_mov_b32 s6, 0 -; GFX7-NEXT: s_lshl_b64 s[4:5], s[0:1], 2 +; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 +; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_endpgm %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset store i32 0, i32 addrspace(1)* %gep @@ -311,21 +311,21 @@ ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 -; GFX6-NEXT: s_mov_b32 s6, 0 -; GFX6-NEXT: s_lshl_b64 s[4:5], s[0:1], 2 +; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 +; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0 -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 offset:1024 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x200000 -; GFX7-NEXT: s_mov_b32 s6, 0 -; GFX7-NEXT: s_lshl_b64 s[4:5], s[0:1], 2 +; GFX7-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 +; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 offset:1024 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024 ; GFX7-NEXT: s_endpgm %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 256 @@ -435,25 +435,25 @@ ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset: ; GFX6: ; %bb.0: ; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX6-NEXT: s_add_u32 s4, s2, 0x3ffc +; GFX6-NEXT: s_add_u32 s0, s2, 0x3ffc ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 -; GFX6-NEXT: s_mov_b32 s6, 0 -; GFX6-NEXT: s_addc_u32 s5, s3, 0 +; GFX6-NEXT: s_addc_u32 s1, s3, 0 +; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: v_mov_b32_e32 v2, 0 -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 +; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX7-NEXT: s_add_u32 s4, s2, 0x3ffc +; GFX7-NEXT: s_add_u32 s0, s2, 0x3ffc ; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 -; GFX7-NEXT: s_mov_b32 s6, 0 -; GFX7-NEXT: s_addc_u32 s5, s3, 0 +; GFX7-NEXT: s_addc_u32 s1, s3, 0 +; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: s_mov_b32 s7, 0xf000 -; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_endpgm %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 4095 %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %voffset diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -1466,9 +1466,9 @@ ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 @@ -1493,9 +1493,9 @@ ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0