diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14968,6 +14968,7 @@ SDValue Op0 = Ext.getOperand(0); EVT VecVT = Op0.getValueType(); + unsigned ResNo = Op0.getResNo(); unsigned Lane = Ext.getConstantOperandVal(1); if (VecVT.getVectorNumElements() != 4) return SDValue(); @@ -14976,7 +14977,8 @@ auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) { return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT && isa(V->getOperand(1)) && - V->getConstantOperandVal(1) == Lane + 1; + V->getConstantOperandVal(1) == Lane + 1 && + V->getOperand(0).getResNo() == ResNo; }); if (OtherIt == Op0->uses().end()) return SDValue(); diff --git a/llvm/test/CodeGen/ARM/vector-extract.ll b/llvm/test/CodeGen/ARM/vector-extract.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/vector-extract.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=armv7a-none-eabi -mattr=+neon %s -o - | FileCheck %s + +; Check that the two extracts are not combined into a vmov. + +%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } + +define i32 @vld4Qi32(i32* %A) nounwind { +; CHECK-LABEL: vld4Qi32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld4.32 {d16, d18, d20, d22}, [r0]! +; CHECK-NEXT: vld4.32 {d17, d19, d21, d23}, [r0] +; CHECK-NEXT: vmov.32 r0, d18[1] +; CHECK-NEXT: vmov.32 r1, d16[0] +; CHECK-NEXT: add r0, r1, r0 +; CHECK-NEXT: bx lr + %tmp0 = bitcast i32* %A to i8* + %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32.p0i8(i8* %tmp0, i32 1) + %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0 + %tmp3 = extractelement <4 x i32> %tmp2, i32 0 + %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 1 + %tmp5 = extractelement <4 x i32> %tmp4, i32 1 + %tmp6 = add i32 %tmp3, %tmp5 + ret i32 %tmp6 +} + +declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32.p0i8(i8*, i32) nounwind readonly