Index: lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- lib/Target/ARM/ARMISelDAGToDAG.cpp +++ lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1765,15 +1765,17 @@ Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 - // case entirely when the rest are updated to that form, too. bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); - if ((NumVecs <= 2) && !IsImmUpdate) - Opc = getVLDSTRegisterUpdateOpcode(Opc); - // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so - // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate) - Ops.push_back(IsImmUpdate ? Reg0 : Inc); + if (!IsImmUpdate) { + // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so + // check for the opcode rather than the number of vector elements. + if (isVLDfixed(Opc)) + Opc = getVLDSTRegisterUpdateOpcode(Opc); + Ops.push_back(Inc); + // VLD1/VLD2 fixed increment does not need Reg0 so only include it in + // the operands if not such an opcode. + } else if (!isVLDfixed(Opc)) + Ops.push_back(Reg0); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -1919,16 +1921,17 @@ Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 - // case entirely when the rest are updated to that form, too. bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); - if (NumVecs <= 2 && !IsImmUpdate) - Opc = getVLDSTRegisterUpdateOpcode(Opc); - // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so - // check for that explicitly too. Horribly hacky, but temporary. - if (!IsImmUpdate) + if (!IsImmUpdate) { + // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so + // check for the opcode rather than the number of vector elements. + if (isVSTfixed(Opc)) + Opc = getVLDSTRegisterUpdateOpcode(Opc); Ops.push_back(Inc); - else if (NumVecs > 2 && !isVSTfixed(Opc)) + } + // VST1/VST2 fixed increment does not need Reg0 so only include it in + // the operands if not such an opcode. + else if (!isVSTfixed(Opc)) Ops.push_back(Reg0); } Ops.push_back(SrcReg); Index: test/CodeGen/ARM/pr35157.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/pr35157.ll @@ -0,0 +1,45 @@ +; RUN: llc -mtriple armv7 -start-after=verify < %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + +%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> } +%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } + +define <1 x i64> @vld3i64(i8* %A, i8** %B) nounwind { + %1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %A, i32 8) + %2 = getelementptr i8, i8* %A, i32 4 + store i8* %2, i8** %B + %3 = extractvalue %struct.__neon_int64x1x3_t %1, 0 + ret <1 x i64> %3 +} + +define <1 x i64> @vld4i64(i8* %A, i8** %B) nounwind { + %1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8* %A, i32 8) + %2 = getelementptr i8, i8* %A, i32 4 + store i8* %2, i8** %B + %3 = extractvalue %struct.__neon_int64x1x4_t %1, 0 + ret <1 x i64> %3 +} + +define i8* @vst3i64_wb(i8* %A, <1 x i64>* %B) nounwind { + %1 = load <1 x i64>, <1 x i64>* %B + call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* %A, <1 x i64> %1, <1 x i64> %1, <1 x i64> %1, i32 4) + %2 = getelementptr i8, i8* %A, i32 4 + ret i8* %2 +} + +define i8* @vst4i64_wb(i8* %A, <1 x i64>* %B) nounwind { + %1 = load <1 x i64>, <1 x i64>* %B + call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* %A, <1 x i64> %1, <1 x i64> %1, <1 x i64> %1, <1 x i64> %1, i32 4) + %2 = getelementptr i8, i8* %A, i32 4 + ret i8* %2 +} + +; Function Attrs: argmemonly nounwind +declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8*, i32) nounwind readonly +declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8*, i32) nounwind readonly +declare void @llvm.arm.neon.vst3.p0i8.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) #1 +declare void @llvm.arm.neon.vst4.p0i8.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) #1 + +attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a7" "target-features"="+dsp,+hwdiv,+hwdiv-arm,+neon,+vfp4,-crc,-dotprod,-ras,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind }