diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17137,6 +17137,10 @@ EVT SubVecVT = SubVec.getValueType(); EVT VT = DestVec.getValueType(); unsigned NumSrcElts = SubVecVT.getVectorNumElements(); + // If the source only has a single vector element, the cost of creating adding + // it to a vector is likely to exceed the cost of a insert_vector_elt. + if (NumSrcElts == 1) + return SDValue(); unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits(); unsigned NumMaskVals = ExtendRatio * NumSrcElts; diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -200,7 +200,8 @@ ; CHECK-LABEL: ins1f2: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: zip1 v0.2d, v1.2d, v0.2d +; CHECK-NEXT: mov v1.d[1], v0.d[0] +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret %tmp3 = extractelement <1 x double> %tmp1, i32 0 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 @@ -211,7 +212,7 @@ ; CHECK-LABEL: ins1f2_args_flipped: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d +; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ret %tmp3 = extractelement <1 x double> %tmp1, i32 0 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 diff --git a/llvm/test/CodeGen/AArch64/vector-insert-shuffle-cycle.ll b/llvm/test/CodeGen/AArch64/vector-insert-shuffle-cycle.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vector-insert-shuffle-cycle.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - | FileCheck %s + +target triple = "arm64-apple-ios13.4.0" + +; Make we do not get stuck in a cycle in DAGCombiner. + +define void @test(i1 %c, <1 x double>* %ptr) { +; CHECK-LABEL: test: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: tbz w0, #0, LBB0_2 +; CHECK-NEXT: ; %bb.1: ; %bb1 +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: LBB0_2: ; %bb2 +; CHECK-NEXT: ldr q1, [x8] +; CHECK-NEXT: mov.d v1[0], v0[0] +; CHECK-NEXT: str q1, [x8] +; CHECK-NEXT: ret +entry: + br i1 %c, label %bb1, label %bb2 + +bb1: + %lv1 = load <1 x double>, <1 x double>* %ptr, align 16 + br label %bb2 + +bb2: + %p = phi <1 x double> [ %lv1, %bb1 ], [ zeroinitializer, %entry ] + %vecext19 = extractelement <1 x double> %p, i32 0 + %arrayidx21 = getelementptr inbounds [4 x <4 x double>], [4 x <4 x double>]* undef, i64 0, i64 3 + %lv2 = load <4 x double>, <4 x double>* %arrayidx21, align 16 + %vecins22 = insertelement <4 x double> %lv2, double %vecext19, i32 2 + store <4 x double> %vecins22, <4 x double>* %arrayidx21, align 16 + ret void +}