diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -20523,6 +20523,9 @@ return SDValue(); EVT PtrVT = Ptr.getValueType(); + if(Idx.getValueType().getSizeInBits() < PtrVT.getSizeInBits()) + Idx = DAG.getZExtOrTrunc(Idx, DL, PtrVT); + SDValue Offset = DAG.getNode(ISD::MUL, DL, PtrVT, Idx, DAG.getConstant(EltVT.getSizeInBits() / 8, DL, PtrVT)); diff --git a/llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll b/llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s + +; Regression test for a bug in `DAGCombiner::replaceStoreOfInsertLoad` where +; Idx could be smaller than PtrVT, causing a MUL to be emitted with inconsistent +; LHS/RHS types. + +define void @main(ptr addrspace(1) %in, float %arg) { +; CHECK-LABEL: main: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: global_store_dword v[0:1], v2, off offset:12 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %1 = load <4 x float>, ptr addrspace(1) %in + %2 = insertelement <4 x float> %1, float %arg, i64 3 + store <4 x float> %2, ptr addrspace(1) %in + ret void +}