Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1282,6 +1282,16 @@ : VectorIndex; +defm sve_elm_idx_extdup_32b + : VectorIndex; +defm sve_elm_idx_extdup_32h + : VectorIndex; +defm sve_elm_idx_extdup_32s + : VectorIndex; + // 8-bit immediate for AdvSIMD where 64-bit values of the form: // aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh // are encoded as the eight bit value 'abcdefgh'. Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -607,6 +607,24 @@ (FDUP_ZI_D fpimm64:$imm8)>; } + // Simplify DUP + TBL to DUPLane + def : Pat<(nxv16i8 (AArch64tbl (nxv16i8 ZPR:$vec), (nxv16i8 (AArch64dup sve_elm_idx_extdup_32b:$index)))), + (DUP_ZZI_B ZPR:$vec, sve_elm_idx_extdup_32b:$index)>; + def : Pat<(nxv8i16 (AArch64tbl (nxv8i16 ZPR:$vec), (nxv8i16 (AArch64dup sve_elm_idx_extdup_32h:$index)))), + (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_32h:$index)>; + def : Pat<(nxv4i32 (AArch64tbl (nxv4i32 ZPR:$vec), (nxv4i32 (AArch64dup sve_elm_idx_extdup_32s:$index)))), + (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_32s:$index)>; + def : Pat<(nxv2i64 (AArch64tbl (nxv2i64 ZPR:$vec), (nxv2i64 (AArch64dup sve_elm_idx_extdup_d:$index)))), + (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index)>; + def : Pat<(nxv8f16 (AArch64tbl (nxv8f16 ZPR:$vec), (nxv8i16 (AArch64dup sve_elm_idx_extdup_32h:$index)))), + (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_32h:$index)>; + def : Pat<(nxv8bf16 (AArch64tbl (nxv8bf16 ZPR:$vec), (nxv8i16 (AArch64dup sve_elm_idx_extdup_32h:$index)))), + (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_32h:$index)>; + def : Pat<(nxv4f32 (AArch64tbl (nxv4f32 ZPR:$vec), (nxv4i32 (AArch64dup sve_elm_idx_extdup_32s:$index)))), + (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_32s:$index)>; + def : Pat<(nxv2f64 (AArch64tbl (nxv2f64 ZPR:$vec), (nxv2i64 (AArch64dup sve_elm_idx_extdup_d:$index)))), + (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index)>; + // Select elements from either vector (predicated) defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>; @@ -2279,23 +2297,23 @@ // Extract element from vector with immediate index def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), sve_elm_idx_extdup_b:$index)), - (EXTRACT_SUBREG (DUP_ZZI_B ZPR:$vec, sve_elm_idx_extdup_b:$index), ssub)>; + (EXTRACT_SUBREG (DUP_ZZI_B ZPR:$vec, (trunc_imm sve_elm_idx_extdup_b:$index)), ssub)>; def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), sve_elm_idx_extdup_h:$index)), - (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), ssub)>; + (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, (trunc_imm sve_elm_idx_extdup_h:$index)), ssub)>; def : Pat<(i32 (vector_extract (nxv4i32 ZPR:$vec), sve_elm_idx_extdup_s:$index)), - (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>; + (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, (trunc_imm sve_elm_idx_extdup_s:$index)), ssub)>; def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), sve_elm_idx_extdup_d:$index)), (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>; def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)), - (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>; + (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, (trunc_imm sve_elm_idx_extdup_h:$index)), hsub)>; def : Pat<(f16 (vector_extract (nxv8f16 ZPR:$vec), sve_elm_idx_extdup_h:$index)), - (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>; + (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, (trunc_imm sve_elm_idx_extdup_h:$index)), hsub)>; def : Pat<(f16 (vector_extract (nxv4f16 ZPR:$vec), sve_elm_idx_extdup_s:$index)), - (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>; + (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, (trunc_imm sve_elm_idx_extdup_s:$index)), hsub)>; def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)), (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>; def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)), - (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>; + (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, (trunc_imm sve_elm_idx_extdup_s:$index)), ssub)>; def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)), (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), ssub)>; def : Pat<(f64 (vector_extract (nxv2f64 ZPR:$vec), sve_elm_idx_extdup_d:$index)), Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -979,15 +979,15 @@ } multiclass sve_int_perm_dup_i { - def _B : sve_int_perm_dup_i<{?,?,?,?,1}, sve_elm_idx_extdup_b, asm, ZPR8> { + def _B : sve_int_perm_dup_i<{?,?,?,?,1}, sve_elm_idx_extdup_32b, asm, ZPR8> { let Inst{23-22} = idx{5-4}; let Inst{20-17} = idx{3-0}; } - def _H : sve_int_perm_dup_i<{?,?,?,1,0}, sve_elm_idx_extdup_h, asm, ZPR16> { + def _H : sve_int_perm_dup_i<{?,?,?,1,0}, sve_elm_idx_extdup_32h, asm, ZPR16> { let Inst{23-22} = idx{4-3}; let Inst{20-18} = idx{2-0}; } - def _S : sve_int_perm_dup_i<{?,?,1,0,0}, sve_elm_idx_extdup_s, asm, ZPR32> { + def _S : sve_int_perm_dup_i<{?,?,1,0,0}, sve_elm_idx_extdup_32s, asm, ZPR32> { let Inst{23-22} = idx{3-2}; let Inst{20-19} = idx{1-0}; } @@ -1000,11 +1000,11 @@ } def : InstAlias<"mov $Zd, $Zn$idx", - (!cast(NAME # _B) ZPR8:$Zd, ZPR8:$Zn, sve_elm_idx_extdup_b:$idx), 1>; + (!cast(NAME # _B) ZPR8:$Zd, ZPR8:$Zn, sve_elm_idx_extdup_32b:$idx), 1>; def : InstAlias<"mov $Zd, $Zn$idx", - (!cast(NAME # _H) ZPR16:$Zd, ZPR16:$Zn, sve_elm_idx_extdup_h:$idx), 1>; + (!cast(NAME # _H) ZPR16:$Zd, ZPR16:$Zn, sve_elm_idx_extdup_32h:$idx), 1>; def : InstAlias<"mov $Zd, $Zn$idx", - (!cast(NAME # _S) ZPR32:$Zd, ZPR32:$Zn, sve_elm_idx_extdup_s:$idx), 1>; + (!cast(NAME # _S) ZPR32:$Zd, ZPR32:$Zn, sve_elm_idx_extdup_32s:$idx), 1>; def : InstAlias<"mov $Zd, $Zn$idx", (!cast(NAME # _D) ZPR64:$Zd, ZPR64:$Zn, sve_elm_idx_extdup_d:$idx), 1>; def : InstAlias<"mov $Zd, $Zn$idx", Index: llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll @@ -8,7 +8,7 @@ ; Unpredicated dup instruction (which is an alias for mov): ; * register + register, ; * register + immediate -; +; * register + vector element index define @dup_i8(i8 %b) { ; CHECK-LABEL: dup_i8: @@ -154,6 +154,69 @@ ret %out } +define @dup_ext_i8( %data) { +; CHECK-LABEL: dup_ext_i8: +; CHECK: mov z0.b, z0.b[1] +; CHECK-NEXT: ret + %tmp = call @llvm.aarch64.sve.dup.x.nxv16i8(i8 1) + %out = call @llvm.aarch64.sve.tbl.nxv16i8( %data, %tmp) + ret %out +} + +define @dup_ext_i16( %data) { +; CHECK-LABEL: dup_ext_i16: +; CHECK: mov z0.h, z0.h[1] +; CHECK-NEXT: ret + %tmp = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 1) + %out = call @llvm.aarch64.sve.tbl.nxv8i16( %data, %tmp) + ret %out +} + +define @dup_ext_i32( %data) { +; CHECK-LABEL: dup_ext_i32: +; CHECK: mov z0.s, z0.s[1] +; CHECK-NEXT: ret + %tmp = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) + %out = call @llvm.aarch64.sve.tbl.nxv4i32( %data, %tmp) + ret %out +} + +define @dup_ext_i64( %data) { +; CHECK-LABEL: dup_ext_i64: +; CHECK: mov z0.d, z0.d[1] +; CHECK-NEXT: ret + %tmp = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 1) + %out = call @llvm.aarch64.sve.tbl.nxv2i64( %data, %tmp) + ret %out +} + +define @dup_ext_f16( %data) { +; CHECK-LABEL: dup_ext_f16: +; CHECK: mov z0.h, z0.h[1] +; CHECK-NEXT: ret + %tmp = call @llvm.aarch64.sve.dup.x.nxv8i16(i16 1) + %out = call @llvm.aarch64.sve.tbl.nxv8f16( %data, %tmp) + ret %out +} + +define @dup_ext_f32( %data) { +; CHECK-LABEL: dup_ext_f32: +; CHECK: mov z0.s, z0.s[1] +; CHECK-NEXT: ret + %tmp = call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) + %out = call @llvm.aarch64.sve.tbl.nxv4f32( %data, %tmp) + ret %out +} + +define @dup_ext_f64( %data) { +; CHECK-LABEL: dup_ext_f64: +; CHECK: mov z0.d, z0.d[1] +; CHECK-NEXT: ret + %tmp = call @llvm.aarch64.sve.dup.x.nxv2i64(i64 1) + %out = call @llvm.aarch64.sve.tbl.nxv2f64( %data, %tmp) + ret %out +} + declare @llvm.aarch64.sve.dup.x.nxv16i8( i8) declare @llvm.aarch64.sve.dup.x.nxv8i16(i16) declare @llvm.aarch64.sve.dup.x.nxv4i32(i32) @@ -163,6 +226,14 @@ declare @llvm.aarch64.sve.dup.x.nxv2f32(float) declare @llvm.aarch64.sve.dup.x.nxv4f32(float) declare @llvm.aarch64.sve.dup.x.nxv2f64(double) +declare @llvm.aarch64.sve.tbl.nxv16i8( , ) +declare @llvm.aarch64.sve.tbl.nxv8i16( , ) +declare @llvm.aarch64.sve.tbl.nxv4i32( , ) +declare @llvm.aarch64.sve.tbl.nxv2i64( , ) +declare @llvm.aarch64.sve.tbl.nxv8f16( , ) +declare @llvm.aarch64.sve.tbl.nxv8bf16( , ) +declare @llvm.aarch64.sve.tbl.nxv4f32( , ) +declare @llvm.aarch64.sve.tbl.nxv2f64( , ) ; +bf16 is required for the bfloat version. attributes #0 = { "target-features"="+sve,+bf16" } Index: llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll +++ llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll @@ -29,8 +29,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: add x8, x0, x2, lsl #3 -; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: ret %load = load double, double* %a %dup = call @llvm.aarch64.sve.dup.x.nxv2f64(double %load)