diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21109,6 +21109,11 @@ } } + // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V) + if (V.getOpcode() == ISD::SPLAT_VECTOR) + if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse()) + return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0)); + // Try to move vector bitcast after extract_subv by scaling extraction index: // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') if (V.getOpcode() == ISD::BITCAST && diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -424,6 +424,44 @@ ret <4 x i32> %out } +; +; Extract fixed-width vector from a scalable vector splat. +; + +define <2 x float> @extract_v2f32_nxv4f32_splat(float %f) { +; CHECK-LABEL: extract_v2f32_nxv4f32_splat: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: dup v0.2s, v0.s[0] +; CHECK-NEXT: ret + %ins = insertelement poison, float %f, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %ext = call <2 x float> @llvm.experimental.vector.extract.v2f32.nxv4f32( %splat, i64 0) + ret <2 x float> %ext +} + +define <2 x float> @extract_v2f32_nxv4f32_splat_const() { +; CHECK-LABEL: extract_v2f32_nxv4f32_splat_const: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov v0.2s, #1.00000000 +; CHECK-NEXT: ret + %ins = insertelement poison, float 1.0, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %ext = call <2 x float> @llvm.experimental.vector.extract.v2f32.nxv4f32( %splat, i64 0) + ret <2 x float> %ext +} + +define <4 x i32> @extract_v4i32_nxv8i32_splat_const() { +; CHECK-LABEL: extract_v4i32_nxv8i32_splat_const: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.4s, #1 +; CHECK-NEXT: ret + %ins = insertelement poison, i32 1, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %ext = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv8i32( %splat, i64 0) + ret <4 x i32> %ext +} + attributes #0 = { vscale_range(2,2) } attributes #1 = { vscale_range(8,8) } @@ -442,3 +480,5 @@ declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv2i8(, i64) declare <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64(, i64) +declare <2 x float> @llvm.experimental.vector.extract.v2f32.nxv4f32(, i64) +declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv8i32(, i64) diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll @@ -1014,3 +1014,63 @@ declare @llvm.experimental.vector.extract.nxv4bf16.nxv16bf16(, i64) + +; +; Extract from a splat +; +define @extract_nxv2f32_nxv4f32_splat(float %f) { +; CHECK-LABEL: extract_nxv2f32_nxv4f32_splat: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: mov z0.s, s0 +; CHECK-NEXT: ret + %ins = insertelement poison, float %f, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %ext = call @llvm.experimental.vector.extract.nxv2f32.nxv4f32( %splat, i64 0) + ret %ext +} + +define @extract_nxv2f32_nxv4f32_splat_const() { +; CHECK-LABEL: extract_nxv2f32_nxv4f32_splat_const: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov z0.s, #1.00000000 +; CHECK-NEXT: ret + %ins = insertelement poison, float 1.0, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %ext = call @llvm.experimental.vector.extract.nxv2f32.nxv4f32( %splat, i64 0) + ret %ext +} + +define @extract_nxv4i32_nxv8i32_splat_const() { +; CHECK-LABEL: extract_nxv4i32_nxv8i32_splat_const: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #1 // =0x1 +; CHECK-NEXT: ret + %ins = insertelement poison, i32 1, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %ext = call @llvm.experimental.vector.extract.nxv4i32.nxv8i32( %splat, i64 0) + ret %ext +} + +define @extract_nxv2i1_nxv16i1_all_ones() { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_all_ones: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ret + %ins = insertelement poison, i1 1, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %ext = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %splat, i64 0) + ret %ext +} + +define @extract_nxv2i1_nxv16i1_all_zero() { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_all_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p0.b +; CHECK-NEXT: ret + %ext = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( zeroinitializer, i64 0) + ret %ext +} + +declare @llvm.experimental.vector.extract.nxv2f32.nxv4f32(, i64) +declare @llvm.experimental.vector.extract.nxv4i32.nxv8i32(, i64) diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -554,10 +554,7 @@ ; CHECK-LABEL: insert_nxv16i1_nxv4i1_into_zero: ; CHECK: // %bb.0: ; CHECK-NEXT: pfalse p1.b -; CHECK-NEXT: punpklo p2.h, p1.b -; CHECK-NEXT: punpkhi p1.h, p1.b -; CHECK-NEXT: punpkhi p2.h, p2.b -; CHECK-NEXT: uzp1 p0.h, p0.h, p2.h +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h ; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b ; CHECK-NEXT: ret %v0 = call @llvm.experimental.vector.insert.nx16i1.nxv4i1( zeroinitializer, %sv, i64 0) diff --git a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll @@ -134,11 +134,10 @@ ; CHECK-LABEL: store_nxv6f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov z0.s, #1.00000000 -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: uunpklo z1.d, z0.s -; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: st1w { z0.s }, p0, [x0] -; CHECK-NEXT: st1w { z1.d }, p1, [x0, #2, mul vl] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: st1w { z0.d }, p0, [x0, #2, mul vl] +; CHECK-NEXT: st1w { z0.s }, p1, [x0] ; CHECK-NEXT: ret %ins = insertelement undef, float 1.0, i32 0 %splat = shufflevector %ins, undef, zeroinitializer @@ -150,11 +149,10 @@ ; CHECK-LABEL: store_nxv12f16: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov z0.h, #1.00000000 -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: uunpklo z1.s, z0.h -; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: st1h { z0.h }, p0, [x0] -; CHECK-NEXT: st1h { z1.s }, p1, [x0, #2, mul vl] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.h +; CHECK-NEXT: st1h { z0.s }, p0, [x0, #2, mul vl] +; CHECK-NEXT: st1h { z0.h }, p1, [x0] ; CHECK-NEXT: ret %ins = insertelement undef, half 1.0, i32 0 %splat = shufflevector %ins, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -1572,33 +1572,26 @@ ret %v } -; FIXME: We don't catch this as unmasked. - define @vadd_vi_nxv32i32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vi_nxv32i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a4, a1, 2 -; CHECK-NEXT: vsetvli a3, zero, e8, m4, ta, mu -; CHECK-NEXT: vmset.m v24 -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, mu ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub a3, a0, a1 -; CHECK-NEXT: vslidedown.vx v0, v24, a4 -; CHECK-NEXT: bltu a0, a3, .LBB119_2 +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: bltu a0, a1, .LBB119_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB119_2: +; CHECK-NEXT: li a3, 0 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: sub a1, a0, a1 +; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: bltu a0, a1, .LBB119_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB119_4: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu +; CHECK-NEXT: vadd.vi v16, v16, -1 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 -1, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer