diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1449,6 +1449,14 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; // 512-bit bitcast def : BitConvert ; diff --git a/llvm/test/CodeGen/AMDGPU/bitcast-v4i64-v16i16.ll b/llvm/test/CodeGen/AMDGPU/bitcast-v4i64-v16i16.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/bitcast-v4i64-v16i16.ll @@ -0,0 +1,33 @@ +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}bitcast_4x64_16i16: +; CHECK: global_store_dwordx4 +; CHECK: global_store_dwordx4 + +define amdgpu_kernel void @bitcast_4x64_16i16(<16 x i16> addrspace(1)* %out) { +entry: + br i1 undef, label %for.body29, label %for.cond60.preheader + +for.cond60.preheader: ; preds = %if.end.3, %cond.end + %tmp.sroa.0.1.lcssa = phi <4 x i64> [ zeroinitializer, %entry ], [ %i7, %if.end.3 ] + %i5 = bitcast <4 x i64> %tmp.sroa.0.1.lcssa to <16 x i16> + store <16 x i16> %i5, <16 x i16> addrspace(1)* %out, align 32 + br label %if.end76.2 + +for.body29: ; preds = %if.end.3, %for.body29.preheader + %tmp.sroa.0.153 = phi <4 x i64> [ %i7, %if.end.3 ], [ zeroinitializer, %entry ] + %i7 = insertelement <4 x i64> %tmp.sroa.0.153, i64 0, i64 0 + br label %if.end.3 + +if.end.3: ; preds = %for.body29 + br i1 undef, label %for.body29, label %for.cond60.preheader + +if.end76.2: ; preds = %for.cond60.preheader + br label %if.then69.3 + +if.then69.3: ; preds = %if.end76.2 + %i11 = bitcast <4 x i64> %tmp.sroa.0.1.lcssa to <16 x i16> + store <16 x i16> %i11, <16 x i16> addrspace(1)* %out, align 32 + unreachable +}