diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -348,6 +348,14 @@ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f64, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i64, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f64, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i64, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f64, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i64, Custom); setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom); diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll --- a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll @@ -1,26 +1,26 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs -o - %s | FileCheck %s - -; CHECK-LABEL: foo -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: buffer_load_ushort -; CHECK: v_bfe_i32 -; CHECK: v_bfe_i32 - -define <2 x i16> @foo(<8 x i16> addrspace(1) * %p0, <8 x i16> addrspace(1) * %p1) { +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: extract_2xi16 +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: buffer_load_ushort +; GCN: v_bfe_i32 +; GCN: v_bfe_i32 + +define <2 x i16> @extract_2xi16(<8 x i16> addrspace(1) * %p0, <8 x i16> addrspace(1) * %p1) { br i1 undef, label %T, label %F T: @@ -38,3 +38,129 @@ %r2 = select <2 x i1> %b2, <2 x i16> , <2 x i16> ret <2 x i16> %r2 } + +; GCN-LABEL: extract_2xi64 +; GCN-COUNT-2: v_cndmask_b32 +define <2 x i64> @extract_2xi64(<8 x i64> addrspace(1) * %p0, <8 x i64> addrspace(1) * %p1) { + br i1 undef, label %T, label %F + +T: + %t = load volatile <8 x i64>, <8 x i64> addrspace(1) * %p0 + br label %exit + +F: + %f = load volatile <8 x i64>, <8 x i64> addrspace(1) * %p1 + br label %exit + +exit: + %m = phi <8 x i64> [ %t, %T ], [ %f, %F ] + %v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <2 x i32> + %b2 = icmp sgt <2 x i64> %v2, + %r2 = select <2 x i1> %b2, <2 x i64> , <2 x i64> + ret <2 x i64> %r2 +} + +; GCN-LABEL: extract_4xi64 +; GCN-COUNT-4: v_cndmask_b32 +define <4 x i64> @extract_4xi64(<8 x i64> addrspace(1) * %p0, <8 x i64> addrspace(1) * %p1) { + br i1 undef, label %T, label %F + +T: + %t = load volatile <8 x i64>, <8 x i64> addrspace(1) * %p0 + br label %exit + +F: + %f = load volatile <8 x i64>, <8 x i64> addrspace(1) * %p1 + br label %exit + +exit: + %m = phi <8 x i64> [ %t, %T ], [ %f, %F ] + %v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <4 x i32> + %b2 = icmp sgt <4 x i64> %v2, + %r2 = select <4 x i1> %b2, <4 x i64> , <4 x i64> + ret <4 x i64> %r2 +} + +; GCN-LABEL: extract_8xi64 +; GCN-COUNT-8: v_cndmask_b32 +define <8 x i64> @extract_8xi64(<16 x i64> addrspace(1) * %p0, <16 x i64> addrspace(1) * %p1) { + br i1 undef, label %T, label %F + +T: + %t = load volatile <16 x i64>, <16 x i64> addrspace(1) * %p0 + br label %exit + +F: + %f = load volatile <16 x i64>, <16 x i64> addrspace(1) * %p1 + br label %exit + +exit: + %m = phi <16 x i64> [ %t, %T ], [ %f, %F ] + %v2 = shufflevector <16 x i64> %m, <16 x i64> undef, <8 x i32> + %b2 = icmp sgt <8 x i64> %v2, + %r2 = select <8 x i1> %b2, <8 x i64> , <8 x i64> + ret <8 x i64> %r2 +} + +; GCN-LABEL: extract_2xf64 +; GCN-COUNT-2: v_cndmask_b32 +define <2 x double> @extract_2xf64(<8 x double> addrspace(1) * %p0, <8 x double> addrspace(1) * %p1) { + br i1 undef, label %T, label %F + +T: + %t = load volatile <8 x double>, <8 x double> addrspace(1) * %p0 + br label %exit + +F: + %f = load volatile <8 x double>, <8 x double> addrspace(1) * %p1 + br label %exit + +exit: + %m = phi <8 x double> [ %t, %T ], [ %f, %F ] + %v2 = shufflevector <8 x double> %m, <8 x double> undef, <2 x i32> + %b2 = fcmp ogt <2 x double> %v2, + %r2 = select <2 x i1> %b2, <2 x double> , <2 x double> + ret <2 x double> %r2 +} + +; GCN-LABEL: extract_4xf64 +; GCN-COUNT-4: v_cndmask_b32 +define <4 x double> @extract_4xf64(<8 x double> addrspace(1) * %p0, <8 x double> addrspace(1) * %p1) { + br i1 undef, label %T, label %F + +T: + %t = load volatile <8 x double>, <8 x double> addrspace(1) * %p0 + br label %exit + +F: + %f = load volatile <8 x double>, <8 x double> addrspace(1) * %p1 + br label %exit + +exit: + %m = phi <8 x double> [ %t, %T ], [ %f, %F ] + %v2 = shufflevector <8 x double> %m, <8 x double> undef, <4 x i32> + %b2 = fcmp ogt <4 x double> %v2, + %r2 = select <4 x i1> %b2, <4 x double> , <4 x double> + ret <4 x double> %r2 +} + +; GCN-LABEL: extract_8xf64 +; GCN-COUNT-8: v_cndmask_b32 +define <8 x double> @extract_8xf64(<16 x double> addrspace(1) * %p0, <16 x double> addrspace(1) * %p1) { + br i1 undef, label %T, label %F + +T: + %t = load volatile <16 x double>, <16 x double> addrspace(1) * %p0 + br label %exit + +F: + %f = load volatile <16 x double>, <16 x double> addrspace(1) * %p1 + br label %exit + +exit: + %m = phi <16 x double> [ %t, %T ], [ %f, %F ] + %v2 = shufflevector <16 x double> %m, <16 x double> undef, <8 x i32> + %b2 = fcmp ogt <8 x double> %v2, + %r2 = select <8 x i1> %b2, <8 x double> , <8 x double> + ret <8 x double> %r2 +}