Index: llvm/lib/CodeGen/GlobalISel/CallLowering.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -627,7 +627,9 @@ Register ArgReg = Args[i].Regs[Part]; // There should be Regs.size() ArgLocs per argument. VA = ArgLocs[j + Part]; - if (VA.isMemLoc()) { + const ISD::ArgFlagsTy Flags = Args[i].Flags[Part]; + + if (VA.isMemLoc() && !Flags.isByVal()) { // Individual pieces may have been spilled to the stack and others // passed in registers. @@ -643,7 +645,22 @@ continue; } - assert(VA.isRegLoc() && "custom loc should have been handled already"); + if (VA.isMemLoc() && Flags.isByVal()) { + // FIXME: We should be inserting a memcpy from the source pointer to the + // result for outgoing byval parameters. + if (!Handler.isIncomingArgumentHandler()) + continue; + + MachinePointerInfo MPO; + Register StackAddr = Handler.getStackAddress(Flags.getByValSize(), + VA.getLocMemOffset(), MPO); + assert(Args[i].Regs.size() == 1 && + "didn't expect split byval pointer"); + MIRBuilder.buildCopy(Args[i].Regs[0], StackAddr); + continue; + } + + assert(!VA.needsCustom() && "custom loc should have been handled already"); if (i == 0 && ThisReturnReg.isValid() && Handler.isIncomingArgumentHandler() && Index: llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll @@ -309,7 +309,7 @@ ; DARWIN-LABEL: name: test_byval ; DARWIN: bb.1 (%ir-block.0): ; DARWIN: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; DARWIN: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 16) + ; DARWIN: [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0) ; DARWIN: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; DARWIN: BL @simple_fn, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp ; DARWIN: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp @@ -317,7 +317,7 @@ ; WINDOWS-LABEL: name: test_byval ; WINDOWS: bb.1 (%ir-block.0): ; WINDOWS: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 - ; WINDOWS: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 16) + ; WINDOWS: [[COPY:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0) ; WINDOWS: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; WINDOWS: BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp ; WINDOWS: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll @@ -49,12 +49,8 @@ ; GCN: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C5]](s32) ; GCN: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN: $vgpr0 = COPY [[FRAME_INDEX1]](p5) - ; GCN: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg - ; GCN: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32) - ; GCN: G_STORE [[FRAME_INDEX]](p5), [[PTR_ADD2]](p5) :: (store 4 into stack, align 16, addrspace 5) - ; GCN: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) + ; GCN: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; GCN: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; GCN: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; GCN: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) @@ -65,9 +61,9 @@ ; GCN: $vgpr31 = COPY [[OR1]](s32) ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GCN: ADJCALLSTACKDOWN 0, 8, implicit-def $scc - ; GCN: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) + ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) ; GCN: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load 1 from %ir.out.gep02, addrspace 5) - ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (dereferenceable load 4 from %ir.out.gep1, addrspace 5) + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (dereferenceable load 4 from %ir.out.gep1, addrspace 5) ; GCN: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) ; GCN: G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -3912,12 +3912,8 @@ ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32) ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C6]](s32) - ; CHECK: G_STORE [[FRAME_INDEX]](p5), [[PTR_ADD2]](p5) :: (store 4 into stack, align 16, addrspace 5) - ; CHECK: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; CHECK: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK: $sgpr4_sgpr5 = COPY [[COPY10]](p4) ; CHECK: $sgpr6_sgpr7 = COPY [[COPY11]](p4) ; CHECK: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -1727,19 +1727,19 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (dereferenceable load 1 from %ir.arg0, align 4, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load 1 from %ir.arg0, align 4, addrspace 5) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[LOAD]], [[C]](s32) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load 4 from %ir.arg0 + 4, addrspace 5) - ; CHECK: G_STORE [[LOAD1]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load 4 from %ir.arg0 + 4, addrspace 5) + ; CHECK: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]] + ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef ret void @@ -1750,30 +1750,30 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) + ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 8, addrspace 5) - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (volatile dereferenceable load 1 from %ir.arg0, align 4, addrspace 5) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load 1 from %ir.arg0, align 4, addrspace 5) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[LOAD]], [[C]](s32) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load 4 from %ir.arg0 + 4, addrspace 5) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s8) = G_LOAD [[LOAD1]](p5) :: (volatile dereferenceable load 1 from %ir.arg1, align 4, addrspace 5) - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[LOAD1]], [[C]](s32) - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load 4 from %ir.arg1 + 4, addrspace 5) - ; CHECK: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (volatile dereferenceable load 4 from %ir.arg0 + 4, addrspace 5) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load 1 from %ir.arg1, align 4, addrspace 5) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load 4 from %ir.arg1 + 4, addrspace 5) + ; CHECK: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD3]](s32), [[PTR_ADD2]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: G_STORE [[LOAD4]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK: G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD5]](s32), [[PTR_ADD3]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: G_STORE [[COPY]](s32), [[DEF1]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store 4 into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY4]] %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1 store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef @@ -1787,18 +1787,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) + ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 8, addrspace 5) - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p5) :: (dereferenceable load 4 from %ir.arg0, addrspace 5) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[LOAD1]](p5) :: (dereferenceable load 8 from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD2]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load 4 from %ir.arg0, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load 8 from %ir.arg1, addrspace 5) + ; CHECK: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](s64), [[COPY3]](p1) :: (store 8 into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY4]] %arg0.load = load i32, i32 addrspace(5)* %arg0 %arg1.load = load i64, i64 addrspace(5)* %arg1 store i32 %arg0.load, i32 addrspace(1)* undef @@ -1818,18 +1818,18 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) + ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (dereferenceable load 1 from %ir.arg0, addrspace 5) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[LOAD1]](p5) :: (dereferenceable load 2 from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD2]](s8), [[C]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK: G_STORE [[LOAD3]](s16), [[COPY1]](p1) :: (store 2 into `i16 addrspace(1)* null`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load 1 from %ir.arg0, addrspace 5) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load 2 from %ir.arg1, addrspace 5) + ; CHECK: G_STORE [[LOAD]](s8), [[C]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](s16), [[COPY3]](p1) :: (store 2 into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY4]] %arg0.load = load i8, i8 addrspace(5)* %arg0 %arg1.load = load i16, i16 addrspace(5)* %arg1 store i8 %arg0.load, i8 addrspace(1)* null @@ -1850,30 +1850,30 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $sgpr30_sgpr31 ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) + ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 16, addrspace 5) - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p5) :: (dereferenceable load 4 from %ir.arg0, addrspace 5) + ; CHECK: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load 4 from %ir.arg0, addrspace 5) ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[LOAD]], [[C1]](s32) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load 4 from %ir.arg0 + 4, addrspace 5) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load 4 from %ir.arg0 + 4, addrspace 5) ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[LOAD]], [[C2]](s32) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load 4 from %ir.arg0 + 8, addrspace 5) - ; CHECK: [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[LOAD1]](p5) :: (dereferenceable load 2 from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD2]](s32), [[C]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null`, addrspace 1) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load 4 from %ir.arg0 + 8, addrspace 5) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load 2 from %ir.arg1, addrspace 5) + ; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null`, addrspace 1) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C3]](s64) - ; CHECK: G_STORE [[LOAD3]](s32), [[PTR_ADD2]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null` + 4, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null` + 4, addrspace 1) ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C4]](s64) - ; CHECK: G_STORE [[LOAD4]](s32), [[PTR_ADD3]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null` + 8, addrspace 1) - ; CHECK: G_STORE [[LOAD5]](s16), [[COPY1]](p1) :: (store 2 into `i16 addrspace(1)* null`, addrspace 1) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p1) :: (store 4 into `[3 x i32] addrspace(1)* null` + 8, addrspace 1) + ; CHECK: G_STORE [[LOAD3]](s16), [[COPY3]](p1) :: (store 2 into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY4]] %arg0.load = load [3 x i32], [3 x i32] addrspace(5)* %arg0 %arg1.load = load i16, i16 addrspace(5)* %arg1 store [3 x i32] %arg0.load, [3 x i32] addrspace(1)* null @@ -1929,15 +1929,15 @@ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, align 8, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY32:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK: [[COPY33:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store 4 into `i32 addrspace(1)* null`, addrspace 1) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[LOAD1]](p5) :: (dereferenceable load 1 from %ir.arg2, addrspace 5) - ; CHECK: G_STORE [[LOAD2]](s8), [[COPY33]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY34]] + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY32]](p5) :: (dereferenceable load 1 from %ir.arg2, addrspace 5) + ; CHECK: G_STORE [[LOAD1]](s8), [[COPY34]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY33]] + ; CHECK: S_SETPC_B64_return [[COPY35]] store i32 %arg1, i32 addrspace(1)* null %arg2.load = load i8, i8 addrspace(5)* %arg2 store i8 %arg2.load, i8 addrspace(1)* null @@ -1990,17 +1990,17 @@ ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 - ; CHECK: [[LOAD:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load 4 from %fixed-stack.1, align 16, addrspace 5) + ; CHECK: [[COPY32:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) - ; CHECK: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load 4 from %fixed-stack.0, addrspace 5) + ; CHECK: [[COPY33:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK: [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store 4 into `i32 addrspace(1)* null`, addrspace 1) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p5) :: (dereferenceable load 1 from %ir.arg1, addrspace 5) - ; CHECK: G_STORE [[LOAD2]](s8), [[COPY33]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK: S_SETPC_B64_return [[COPY34]] + ; CHECK: [[COPY34:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK: G_STORE [[LOAD]](s32), [[C]](p1) :: (store 4 into `i32 addrspace(1)* null`, addrspace 1) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY32]](p5) :: (dereferenceable load 1 from %ir.arg1, addrspace 5) + ; CHECK: G_STORE [[LOAD1]](s8), [[COPY34]](p1) :: (store 1 into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK: [[COPY35:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY33]] + ; CHECK: S_SETPC_B64_return [[COPY35]] store i32 %arg2, i32 addrspace(1)* null %arg1.load = load i8, i8 addrspace(5)* %arg1 store i8 %arg1.load, i8 addrspace(1)* null