Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -700,9 +700,12 @@ if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) { EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); - if (Size == 32 || Size == 64) + if (Size == 32) return ScalarVT.getSimpleVT(); + if (Size == 64) + return MVT::i32; + if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(VT.getVectorNumElements())) @@ -720,9 +723,12 @@ EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); - if (Size == 32 || Size == 64) + if (Size == 32) return NumElts; + if (Size == 64) + return 2 * NumElts; + // FIXME: Fails to break down as we want with v3. if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts)) return VT.getVectorNumElements() / 2; @@ -739,13 +745,20 @@ unsigned NumElts = VT.getVectorNumElements(); EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); - if (Size == 32 || Size == 64) { + if (Size == 32) { RegisterVT = ScalarVT.getSimpleVT(); IntermediateVT = RegisterVT; NumIntermediates = NumElts; return NumIntermediates; } + if (Size == 64) { + RegisterVT = MVT::i32; + IntermediateVT = RegisterVT; + NumIntermediates = 2 * NumElts; + return NumIntermediates; + } + // FIXME: We should fix the ABI to be the same on targets without 16-bit // support, but unless we can properly handle 3-vectors, it will be still be // inconsistent. Index: test/CodeGen/AMDGPU/call-argument-types.ll =================================================================== --- test/CodeGen/AMDGPU/call-argument-types.ll +++ test/CodeGen/AMDGPU/call-argument-types.ll @@ -25,6 +25,8 @@ declare void @external_void_func_f32(float) #0 declare void @external_void_func_f64(double) #0 declare void @external_void_func_v2f32(<2 x float>) #0 +declare void @external_void_func_v2f64(<2 x double>) #0 +declare void @external_void_func_v3f64(<3 x double>) #0 declare void @external_void_func_v2i16(<2 x i16>) #0 declare void @external_void_func_v2f16(<2 x half>) #0 @@ -274,10 +276,21 @@ ret void } +; GCN-LABEL: {{^}}test_call_external_void_func_v2i64_imm: +; GCN-DAG: v_mov_b32_e32 v0, 1 +; GCN-DAG: v_mov_b32_e32 v1, 2 +; GCN-DAG: v_mov_b32_e32 v2, 3 +; GCN-DAG: v_mov_b32_e32 v3, 4 +; GCN: s_swappc_b64 +define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { + call void @external_void_func_v2i64(<2 x i64> ) + ret void +} + ; GCN-LABEL: {{^}}test_call_external_void_func_v3i64: ; GCN: buffer_load_dwordx4 v[0:3] -; GCN: v_mov_b32_e32 v4, s -; GCN: v_mov_b32_e32 v5, s +; GCN: v_mov_b32_e32 v4, 1 +; GCN: v_mov_b32_e32 v5, 2 ; GCN: s_waitcnt ; GCN-NEXT: s_swappc_b64 define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { @@ -288,13 +301,12 @@ ret void } -; FIXME: Immedites should fold directly into v_mov_b32s ; GCN-LABEL: {{^}}test_call_external_void_func_v4i64: ; GCN: buffer_load_dwordx4 v[0:3] -; GCN-DAG: v_mov_b32_e32 v4, s -; GCN-DAG: v_mov_b32_e32 v5, s -; GCN-DAG: v_mov_b32_e32 v6, s -; GCN-DAG: v_mov_b32_e32 v7, s +; GCN-DAG: v_mov_b32_e32 v4, 1 +; GCN-DAG: v_mov_b32_e32 v5, 2 +; GCN-DAG: v_mov_b32_e32 v6, 3 +; GCN-DAG: v_mov_b32_e32 v7, 4 ; GCN: s_waitcnt ; GCN-NEXT: s_swappc_b64 @@ -342,6 +354,30 @@ ret void } +; GCN-LABEL: {{^}}test_call_external_void_func_v2f64_imm: +; GCN: v_mov_b32_e32 v0, 0{{$}} +; GCN: v_mov_b32_e32 v1, 2.0 +; GCN: v_mov_b32_e32 v2, 0{{$}} +; GCN: v_mov_b32_e32 v3, 0x40100000 +; GCN: s_swappc_b64 +define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { + call void @external_void_func_v2f64(<2 x double> ) + ret void +} + +; GCN-LABEL: {{^}}test_call_external_void_func_v3f64_imm: +; GCN-DAG: v_mov_b32_e32 v0, 0{{$}} +; GCN-DAG: v_mov_b32_e32 v1, 2.0 +; GCN-DAG: v_mov_b32_e32 v2, 0{{$}} +; GCN-DAG: v_mov_b32_e32 v3, 0x40100000 +; GCN-DAG: v_mov_b32_e32 v4, 0{{$}} +; GCN-DAG: v_mov_b32_e32 v5, 0x40200000 +; GCN-DAG: s_swappc_b64 +define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { + call void @external_void_func_v3f64(<3 x double> ) + ret void +} + ; GCN-LABEL: {{^}}test_call_external_void_func_v2i16: ; GFX9: buffer_load_dword v0 ; GFX9-NOT: v0