diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20523,6 +20523,9 @@
     return SDValue();
   EVT PtrVT = Ptr.getValueType();
 
+  if(Idx.getValueType().getSizeInBits() < PtrVT.getSizeInBits())
+    Idx = DAG.getZExtOrTrunc(Idx, DL, PtrVT);
+
   SDValue Offset =
       DAG.getNode(ISD::MUL, DL, PtrVT, Idx,
                   DAG.getConstant(EltVT.getSizeInBits() / 8, DL, PtrVT));
diff --git a/llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll b/llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s
+
+; Regression test for a bug in `DAGCombiner::replaceStoreOfInsertLoad` where
+; Idx could be smaller than PtrVT, causing a MUL to be emitted with inconsistent
+; LHS/RHS types.
+
+define void @main(ptr addrspace(1) %in, float %arg) {
+; CHECK-LABEL: main:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    global_store_dword v[0:1], v2, off offset:12
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %1 = load <4 x float>, ptr addrspace(1) %in
+  %2 = insertelement <4 x float> %1, float %arg, i64 3
+  store <4 x float> %2, ptr addrspace(1) %in
+  ret void
+}