Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -694,8 +694,12 @@ // prevents it from being picked up by the earlier bitcast case. if (ValueVT.getVectorElementCount().isScalar() && (!ValueVT.isFloatingPoint() || !PartVT.isInteger())) { - Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, + EVT ExtractVT = ValueVT.getVectorElementType(); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, Val, DAG.getVectorIdxConstant(0, DL)); + + if (ExtractVT.getSizeInBits() != PartVT.getSizeInBits()) + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } else { uint64_t ValueSize = ValueVT.getFixedSizeInBits(); assert(PartVT.getFixedSizeInBits() > ValueSize && Index: llvm/test/CodeGen/AMDGPU/dagcombine-v1i8-extractvecelt-crash.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/dagcombine-v1i8-extractvecelt-crash.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx908 < %s | FileCheck %s + +; Regression test for a specific SelectionDAGBuilder Crash for +; when <1 x i8> needs to be "split" to 1 i32 part. +; +; Before the fix, an illegal extract vector elt was generated which +; later crashed the DAG Combiner. +; +; t4: i32 = extract_vector_elt t10, Constant:i32<0> +; +; Now, the SelectionDAGBuilder should emit a i8 extract_vector_elt +; and extend it to i32 after. + +define void @wombat(i1 %cond, <1 x i8> addrspace(5)* %addr) { +; CHECK-LABEL: wombat: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; CHECK-NEXT: s_xor_b64 s[6:7], vcc, -1 +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: .LBB0_1: ; %loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_and_b64 s[8:9], exec, s[6:7] +; CHECK-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5] +; CHECK-NEXT: s_cbranch_execnz .LBB0_1 +; CHECK-NEXT: ; %bb.2: ; %end +; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: buffer_store_byte v2, v1, s[0:3], 0 offen +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + br label %loop + +loop: + %load = load <1 x i8>, <1 x i8> addrspace(5)* %addr, align 1 + store <1 x i8> poison, <1 x i8> addrspace(5)* %addr, align 1 + store <1 x i8> %load, <1 x i8> addrspace(5)* %addr, align 1 + br i1 %cond, label %loop, label %end + +end: + ret void +}