diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21516,7 +21516,7 @@ } // Only cast if the size is the same - if (Src.getValueType().getSizeInBits() != VT.getSizeInBits()) + if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits()) return SDValue(); return DAG.getBitcast(VT, Src); diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll --- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll @@ -856,3 +856,65 @@ store <30 x i32> %phi_cast, ptr addrspace(1) %out ret void } + +define amdgpu_kernel void @bitcast_large_phis_broken(i32 %cond, ptr addrspace(1) %out, <4 x i64> %value) { +entry: + %cmp0 = icmp eq i32 %cond, 0 + %0 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> zeroinitializer, i64 0) + %1 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> zeroinitializer, i64 2) + %2 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> zeroinitializer, i64 4) + %3 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> zeroinitializer, i64 6) + %4 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> zeroinitializer, i64 8) + %5 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> zeroinitializer, i64 10) + %6 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> zeroinitializer, i64 12) + %7 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> zeroinitializer, i64 14) + br i1 %cmp0, label %if, label %end + +if: ; preds = %if, %entry + %8 = phi i64 [ 0, %entry ], [ %16, %if ] + %9 = phi i64 [ 0, %entry ], [ %17, %if ] + %10 = phi i64 [ 0, %entry ], [ %18, %if ] + %11 = phi i64 [ 0, %entry ], [ %19, %if ] + %12 = insertelement <4 x i64> poison, i64 %8, i64 0 + %13 = insertelement <4 x i64> %12, i64 %9, i64 1 + %14 = insertelement <4 x i64> %13, i64 %10, i64 2 + %15 = insertelement <4 x i64> %14, i64 %11, i64 3 + %cast = bitcast <4 x i64> %15 to <16 x i16> + %cmp1 = icmp eq i32 %cond, 1 + %16 = extractelement <4 x i64> %value, i64 0 + %17 = extractelement <4 x i64> %value, i64 1 + %18 = extractelement <4 x i64> %value, i64 2 + %19 = extractelement <4 x i64> %value, i64 3 + %20 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> %cast, i64 0) + %21 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> %cast, i64 2) + %22 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> %cast, i64 4) + %23 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> %cast, i64 6) + %24 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> %cast, i64 8) + %25 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> %cast, i64 10) + %26 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> %cast, i64 12) + %27 = call <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16> %cast, i64 14) + br i1 %cmp1, label %if, label %end + +end: ; preds = %if, %entry + %28 = phi <2 x i16> [ %0, %entry ], [ %20, %if ] + %29 = phi <2 x i16> [ %1, %entry ], [ %21, %if ] + %30 = phi <2 x i16> [ %2, %entry ], [ %22, %if ] + %31 = phi <2 x i16> [ %3, %entry ], [ %23, %if ] + %32 = phi <2 x i16> [ %4, %entry ], [ %24, %if ] + %33 = phi <2 x i16> [ %5, %entry ], [ %25, %if ] + %34 = phi <2 x i16> [ %6, %entry ], [ %26, %if ] + %35 = phi <2 x i16> [ %7, %entry ], [ %27, %if ] + %36 = call <16 x i16> @llvm.vector.insert.v16i16.v2i16(<16 x i16> poison, <2 x i16> %28, i64 0) + %37 = call <16 x i16> @llvm.vector.insert.v16i16.v2i16(<16 x i16> %36, <2 x i16> %29, i64 2) + %38 = call <16 x i16> @llvm.vector.insert.v16i16.v2i16(<16 x i16> %37, <2 x i16> %30, i64 4) + %39 = call <16 x i16> @llvm.vector.insert.v16i16.v2i16(<16 x i16> %38, <2 x i16> %31, i64 6) + %40 = call <16 x i16> @llvm.vector.insert.v16i16.v2i16(<16 x i16> %39, <2 x i16> %32, i64 8) + %41 = call <16 x i16> @llvm.vector.insert.v16i16.v2i16(<16 x i16> %40, <2 x i16> %33, i64 10) + %42 = call <16 x i16> @llvm.vector.insert.v16i16.v2i16(<16 x i16> %41, <2 x i16> %34, i64 12) + %43 = call <16 x i16> @llvm.vector.insert.v16i16.v2i16(<16 x i16> %42, <2 x i16> %35, i64 14) + store <16 x i16> %43, ptr addrspace(1) %out, align 32 + ret void +} + +declare <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16>, i64 immarg) +declare <16 x i16> @llvm.vector.insert.v16i16.v2i16(<16 x i16>, <2 x i16>, i64 immarg)