diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18416,6 +18416,16 @@ Offset = DAG.getConstant(PtrOff, DL, PtrType); MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); } else { + // Unless the index is known to be valid, the extract may be poison. Emit a + // mask to ensure the index is valid such cases, so no new out-of-bound loads + // are introduced. + KnownBits EltNoBits = DAG.computeKnownBits(EltNo); + unsigned NumElts = InVecVT.getVectorElementCount().getKnownMinValue(); + if (!EltNoBits.getMaxValue().ult(NumElts)) + EltNo = DAG.getNode(ISD::AND, DL, EltNo.getValueType(), EltNo, + DAG.getConstant((1 << Log2_64(NumElts)) - 1, DL, + EltNo.getValueType())); + Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType); Offset = DAG.getNode( ISD::MUL, DL, PtrType, Offset, diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -6372,7 +6372,8 @@ define i32 @load_single_extract_variable_index_i32(<4 x i32>* %A, i32 %idx) { ; CHECK-LABEL: load_single_extract_variable_index_i32 -; CHECK: ldr w0, [x0, w1, sxtw #2] +; CHECK: and [[IDX:.*]], x1, #0x3 +; CHECK-NEXT: ldr w0, [x0, [[IDX]], lsl #2] ; CHECK-NEXT: ret ; %lv = load <4 x i32>, <4 x i32>* %A diff --git a/llvm/test/CodeGen/SystemZ/vec-extract-02.ll b/llvm/test/CodeGen/SystemZ/vec-extract-02.ll --- a/llvm/test/CodeGen/SystemZ/vec-extract-02.ll +++ b/llvm/test/CodeGen/SystemZ/vec-extract-02.ll @@ -6,7 +6,7 @@ ; The index must be extended from i32 to i64. define i32 @f1(<4 x i32> *%ptr, i32 %index) { ; CHECK-LABEL: f1: -; CHECK: risbgn {{%r[0-5]}}, %r3, 30, 189, 2 +; CHECK: risbgn {{%r[0-5]}}, %r3, 60, 189, 2 ; CHECK: l %r2, ; CHECK: br %r14 %vec = load <4 x i32>, <4 x i32> *%ptr diff --git a/llvm/test/CodeGen/X86/vecloadextract.ll b/llvm/test/CodeGen/X86/vecloadextract.ll --- a/llvm/test/CodeGen/X86/vecloadextract.ll +++ b/llvm/test/CodeGen/X86/vecloadextract.ll @@ -19,9 +19,10 @@ ; CHECK: name: variable_index ; CHECK: bb.0 (%ir-block.0): -; CHECK: [[INDEX:%[0-9]+]]:gr32_nosp = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) ; CHECK: [[POINTER:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.1) -; CHECK: [[LOAD:%[0-9]+]]:gr32 = MOV32rm killed [[POINTER]], 4, killed [[INDEX]], 0, $noreg :: (load 4) +; CHECK: [[INDEX:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) +; CHECK: [[MASKED_INDEX:%[0-9]+]]:gr32_nosp = AND32ri8 [[INDEX]], 7, implicit-def dead $eflags +; CHECK: [[LOAD:%[0-9]+]]:gr32 = MOV32rm killed [[POINTER]], 4, killed [[MASKED_INDEX]], 0, $noreg :: (load 4) ; CHECK: $eax = COPY [[LOAD]] ; CHECK: RET 0, $eax define i32 @variable_index(<8 x i32>* %v, i32 %i) { @@ -32,9 +33,10 @@ ; CHECK: name: variable_index_with_addrspace ; CHECK: bb.0 (%ir-block.0): -; CHECK: [[INDEX:%[0-9]+]]:gr32_nosp = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) ; CHECK: [[POINTER:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.1) -; CHECK: [[LOAD:%[0-9]+]]:gr32 = MOV32rm killed [[POINTER]], 4, killed [[INDEX]], 0, $noreg :: (load 4, addrspace 1) +; CHECK: [[INDEX:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0) +; CHECK: [[MASKED_INDEX:%[0-9]+]]:gr32_nosp = AND32ri8 [[INDEX]], 7, implicit-def dead $eflags +; CHECK: [[LOAD:%[0-9]+]]:gr32 = MOV32rm killed [[POINTER]], 4, killed [[MASKED_INDEX]], 0, $noreg :: (load 4, addrspace 1) ; CHECK: $eax = COPY [[LOAD]] ; CHECK: RET 0, $eax define i32 @variable_index_with_addrspace(<8 x i32> addrspace(1)* %v, i32 %i) {