diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -128,6 +128,24 @@ bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { return SelectAddrModeUnscaled(N, 16, Base, OffImm); } + template + bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) { + // Test if there is an appropriate addressing mode and check if the + // immediate fits. + bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm); + if (Found) { + if (auto *CI = dyn_cast(OffImm)) { + int64_t C = CI->getSExtValue(); + if (C <= Max) + return true; + } + } + + // Otherwise, base only, materialize address in register. + Base = N; + OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); + return true; + } template bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -3127,6 +3127,13 @@ def am_indexed64 : ComplexPattern; def am_indexed128 : ComplexPattern; +// (unsigned immediate) +// Indexed for 8-bit registers. offset is in range [0,63]. +def am_indexed8_6b : ComplexPattern", []>; +def am_indexed16_6b : ComplexPattern", []>; +def am_indexed32_6b : ComplexPattern", []>; +def am_indexed64_6b : ComplexPattern", []>; + def gi_am_indexed8 : GIComplexOperandMatcher">, GIComplexPatternEquiv; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2300,6 +2300,22 @@ case AArch64::LD1B_D_IMM: case AArch64::LD1SB_D_IMM: case AArch64::ST1B_D_IMM: + case AArch64::LD1RB_IMM: + case AArch64::LD1RB_H_IMM: + case AArch64::LD1RB_S_IMM: + case AArch64::LD1RB_D_IMM: + case AArch64::LD1RSB_H_IMM: + case AArch64::LD1RSB_S_IMM: + case AArch64::LD1RSB_D_IMM: + case AArch64::LD1RH_IMM: + case AArch64::LD1RH_S_IMM: + case AArch64::LD1RH_D_IMM: + case AArch64::LD1RSH_S_IMM: + case AArch64::LD1RSH_D_IMM: + case AArch64::LD1RW_IMM: + case AArch64::LD1RW_D_IMM: + case AArch64::LD1RSW_IMM: + case AArch64::LD1RD_IMM: return 3; case AArch64::ADDG: case AArch64::STGOffset: @@ -2913,6 +2929,42 @@ MinOffset = -64; MaxOffset = 63; break; + case AArch64::LD1RB_IMM: + case AArch64::LD1RB_H_IMM: + case AArch64::LD1RB_S_IMM: + case AArch64::LD1RB_D_IMM: + case AArch64::LD1RSB_H_IMM: + case AArch64::LD1RSB_S_IMM: + case AArch64::LD1RSB_D_IMM: + Scale = TypeSize::Fixed(1); + Width = 1; + MinOffset = 0; + MaxOffset = 63; + break; + case AArch64::LD1RH_IMM: + case AArch64::LD1RH_S_IMM: + case AArch64::LD1RH_D_IMM: + case AArch64::LD1RSH_S_IMM: + case AArch64::LD1RSH_D_IMM: + Scale = TypeSize::Fixed(2); + Width = 2; + MinOffset = 0; + MaxOffset = 63; + break; + case AArch64::LD1RW_IMM: + case AArch64::LD1RW_D_IMM: + case AArch64::LD1RSW_IMM: + Scale = TypeSize::Fixed(4); + Width = 4; + MinOffset = 0; + MaxOffset = 63; + break; + case AArch64::LD1RD_IMM: + Scale = TypeSize::Fixed(8); + Width = 8; + MinOffset = 0; + MaxOffset = 63; + break; } return true; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1643,6 +1643,45 @@ def : Pat<(AArch64ptest (nxv2i1 PPR:$pg), (nxv2i1 PPR:$src)), (PTEST_PP PPR:$pg, PPR:$src)>; + let AddedComplexity = 1 in { + class LD1RPat : + Pat<(vt (AArch64dup (index_vt (operator (CP GPR64:$base, immtype:$offset))))), + (load (ptrue 31), GPR64:$base, $offset)>; + } + + // LDR1 of 8-bit data + def : LD1RPat; + def : LD1RPat; + def : LD1RPat; + def : LD1RPat; + def : LD1RPat; + def : LD1RPat; + def : LD1RPat; + + // LDR1 of 16-bit data + def : LD1RPat; + def : LD1RPat; + def : LD1RPat; + def : LD1RPat; + def : LD1RPat; + + // LDR1 of 32-bit data + def : LD1RPat; + def : LD1RPat; + def : LD1RPat; + + // LDR1 of 64-bit data + def : LD1RPat; + + // LD1R of FP data + def : LD1RPat; + def : LD1RPat; + def : LD1RPat; + def : LD1RPat; + def : LD1RPat; + def : LD1RPat; + // LD1R of 128-bit masked data def : Pat<(nxv16i8 (AArch64ld1rq_z PPR:$gp, GPR64:$base)), (LD1RQ_B_IMM $gp, $base, (i64 0))>; diff --git a/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll b/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll --- a/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll +++ b/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll @@ -23,10 +23,10 @@ define @test_post_ld1_dup(double* %a, double** %ptr, i64 %inc) { ; CHECK-LABEL: test_post_ld1_dup: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0] ; CHECK-NEXT: add x8, x0, x2, lsl #3 ; CHECK-NEXT: str x8, [x1] -; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: ret %load = load double, double* %a %dup = call @llvm.aarch64.sve.dup.x.nxv2f64(double %load) diff --git a/llvm/test/CodeGen/AArch64/sve-ld1r.ll b/llvm/test/CodeGen/AArch64/sve-ld1r.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ld1r.ll @@ -0,0 +1,724 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s +; +; Check that ldr1* instruction is generated to splat scalar during load, +; rather than mov from scalar to vector register (which would require the vector unit). +; +; one-off: ld1r_stack checks that ldr1b works with stack objects. +; +; Test axes: +; types = [i8, i16, i32, i64, half, float, double] +; methods = [direct load, gep upper bound - 1, gep out of range x {neg,pos}, sext..., zext..., unpacked_floats...] +; + +@g8 = external global i8 + +; One-off test for splatted value coming from stack load. +define @ld1r_stack() { +; CHECK-LABEL: ld1r_stack: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 // =16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: adrp x8, :got:g8 +; CHECK-NEXT: ldr x8, [x8, :got_lo12:g8] +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ldrb w8, [x8] +; CHECK-NEXT: strb w8, [sp, #12] +; CHECK-NEXT: ld1rb { z0.b }, p0/z, [sp, #14] +; CHECK-NEXT: add sp, sp, #16 // =16 +; CHECK-NEXT: ret + %valp = alloca i8 + %valp2 = load volatile i8, i8* @g8 + store volatile i8 %valp2, i8* %valp + %valp3 = getelementptr i8, i8* %valp, i32 2 + %val = load i8, i8* %valp3 + %1 = insertelement undef, i8 %val, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + +define @ld1rb(i8* %valp) { +; CHECK-LABEL: ld1rb: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i8, i8* %valp + %ins = insertelement undef, i8 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rb_gep(i8* %valp) { +; CHECK-LABEL: ld1rb_gep: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0, #63] +; CHECK-NEXT: ret + %valp2 = getelementptr i8, i8* %valp, i32 63 + %val = load i8, i8* %valp2 + %ins = insertelement undef, i8 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rb_gep_out_of_range_up(i8* %valp) { +; CHECK-LABEL: ld1rb_gep_out_of_range_up: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #64 // =64 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr i8, i8* %valp, i32 64 + %val = load i8, i8* %valp2 + %ins = insertelement undef, i8 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rb_gep_out_of_range_down(i8* %valp) { +; CHECK-LABEL: ld1rb_gep_out_of_range_down: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #1 // =1 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr i8, i8* %valp, i32 -1 + %val = load i8, i8* %valp2 + %ins = insertelement undef, i8 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rb_i8_i16_zext(i8* %valp) { +; CHECK-LABEL: ld1rb_i8_i16_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1rb { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i8, i8* %valp + %ext = zext i8 %val to i16 + %ins = insertelement undef, i16 %ext, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rb_i8_i16_sext(i8* %valp) { +; CHECK-LABEL: ld1rb_i8_i16_sext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1rsb { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i8, i8* %valp + %ext = sext i8 %val to i16 + %ins = insertelement undef, i16 %ext, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rb_i8_i32_zext(i8* %valp) { +; CHECK-LABEL: ld1rb_i8_i32_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rb { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i8, i8* %valp + %ext = zext i8 %val to i32 + %ins = insertelement undef, i32 %ext, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rb_i8_i32_sext(i8* %valp) { +; CHECK-LABEL: ld1rb_i8_i32_sext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rsb { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i8, i8* %valp + %ext = sext i8 %val to i32 + %ins = insertelement undef, i32 %ext, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rb_i8_i64_zext(i8* %valp) { +; CHECK-LABEL: ld1rb_i8_i64_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rb { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i8, i8* %valp + %ext = zext i8 %val to i64 + %ins = insertelement undef, i64 %ext, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rb_i8_i64_sext(i8* %valp) { +; CHECK-LABEL: ld1rb_i8_i64_sext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i8, i8* %valp + %ext = sext i8 %val to i64 + %ins = insertelement undef, i64 %ext, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh(i16* %valp) { +; CHECK-LABEL: ld1rh: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i16, i16* %valp + %ins = insertelement undef, i16 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_gep(i16* %valp) { +; CHECK-LABEL: ld1rh_gep: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0, #126] +; CHECK-NEXT: ret + %valp2 = getelementptr i16, i16* %valp, i32 63 + %val = load i16, i16* %valp2 + %ins = insertelement undef, i16 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_gep_out_of_range_up(i16* %valp) { +; CHECK-LABEL: ld1rh_gep_out_of_range_up: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #128 // =128 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr i16, i16* %valp, i32 64 + %val = load i16, i16* %valp2 + %ins = insertelement undef, i16 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_gep_out_of_range_down(i16* %valp) { +; CHECK-LABEL: ld1rh_gep_out_of_range_down: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #2 // =2 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr i16, i16* %valp, i32 -1 + %val = load i16, i16* %valp2 + %ins = insertelement undef, i16 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_i16_i32_zext(i16* %valp) { +; CHECK-LABEL: ld1rh_i16_i32_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i16, i16* %valp + %ext = zext i16 %val to i32 + %ins = insertelement undef, i32 %ext, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_i16_i32_sext(i16* %valp) { +; CHECK-LABEL: ld1rh_i16_i32_sext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rsh { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i16, i16* %valp + %ext = sext i16 %val to i32 + %ins = insertelement undef, i32 %ext, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_i16_i64_zext(i16* %valp) { +; CHECK-LABEL: ld1rh_i16_i64_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i16, i16* %valp + %ext = zext i16 %val to i64 + %ins = insertelement undef, i64 %ext, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_i16_i64_sext(i16* %valp) { +; CHECK-LABEL: ld1rh_i16_i64_sext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rsh { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i16, i16* %valp + %ext = sext i16 %val to i64 + %ins = insertelement undef, i64 %ext, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rw(i32* %valp) { +; CHECK-LABEL: ld1rw: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i32, i32* %valp + %ins = insertelement undef, i32 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rw_gep(i32* %valp) { +; CHECK-LABEL: ld1rw_gep: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0, #252] +; CHECK-NEXT: ret + %valp2 = getelementptr i32, i32* %valp, i32 63 + %val = load i32, i32* %valp2 + %ins = insertelement undef, i32 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rw_gep_out_of_range_up(i32* %valp) { +; CHECK-LABEL: ld1rw_gep_out_of_range_up: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #256 // =256 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr i32, i32* %valp, i32 64 + %val = load i32, i32* %valp2 + %ins = insertelement undef, i32 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rw_gep_out_of_range_down(i32* %valp) { +; CHECK-LABEL: ld1rw_gep_out_of_range_down: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #4 // =4 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr i32, i32* %valp, i32 -1 + %val = load i32, i32* %valp2 + %ins = insertelement undef, i32 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rw_i32_i64_zext(i32* %valp) { +; CHECK-LABEL: ld1rw_i32_i64_zext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i32, i32* %valp + %ext = zext i32 %val to i64 + %ins = insertelement undef, i64 %ext, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rw_i32_i64_sext(i32* %valp) { +; CHECK-LABEL: ld1rw_i32_i64_sext: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rsw { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i32, i32* %valp + %ext = sext i32 %val to i64 + %ins = insertelement undef, i64 %ext, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rd(i64* %valp) { +; CHECK-LABEL: ld1rd: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load i64, i64* %valp + %ins = insertelement undef, i64 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rd_gep(i64* %valp) { +; CHECK-LABEL: ld1rd_gep: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0, #504] +; CHECK-NEXT: ret + %valp2 = getelementptr i64, i64* %valp, i32 63 + %val = load i64, i64* %valp2 + %ins = insertelement undef, i64 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rd_gep_out_of_range_up(i64* %valp) { +; CHECK-LABEL: ld1rd_gep_out_of_range_up: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #512 // =512 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr i64, i64* %valp, i32 64 + %val = load i64, i64* %valp2 + %ins = insertelement undef, i64 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rd_gep_out_of_range_down(i64* %valp) { +; CHECK-LABEL: ld1rd_gep_out_of_range_down: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #8 // =8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr i64, i64* %valp, i32 -1 + %val = load i64, i64* %valp2 + %ins = insertelement undef, i64 %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_half(half* %valp) { +; CHECK-LABEL: ld1rh_half: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load half, half* %valp + %ins = insertelement undef, half %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_half_gep(half* %valp) { +; CHECK-LABEL: ld1rh_half_gep: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0, #126] +; CHECK-NEXT: ret + %valp2 = getelementptr half, half* %valp, i32 63 + %val = load half, half* %valp2 + %ins = insertelement undef, half %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_half_gep_out_of_range_up(half* %valp) { +; CHECK-LABEL: ld1rh_half_gep_out_of_range_up: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #128 // =128 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr half, half* %valp, i32 64 + %val = load half, half* %valp2 + %ins = insertelement undef, half %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_half_gep_out_of_range_down(half* %valp) { +; CHECK-LABEL: ld1rh_half_gep_out_of_range_down: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #2 // =2 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr half, half* %valp, i32 -1 + %val = load half, half* %valp2 + %ins = insertelement undef, half %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_half_unpacked4(half* %valp) { +; CHECK-LABEL: ld1rh_half_unpacked4: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load half, half* %valp + %ins = insertelement undef, half %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_half_unpacked4_gep(half* %valp) { +; CHECK-LABEL: ld1rh_half_unpacked4_gep: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0, #126] +; CHECK-NEXT: ret + %valp2 = getelementptr half, half* %valp, i32 63 + %val = load half, half* %valp2 + %ins = insertelement undef, half %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_half_unpacked4_gep_out_of_range_up(half* %valp) { +; CHECK-LABEL: ld1rh_half_unpacked4_gep_out_of_range_up: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #128 // =128 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr half, half* %valp, i32 64 + %val = load half, half* %valp2 + %ins = insertelement undef, half %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_half_unpacked4_gep_out_of_range_down(half* %valp) { +; CHECK-LABEL: ld1rh_half_unpacked4_gep_out_of_range_down: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #2 // =2 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr half, half* %valp, i32 -1 + %val = load half, half* %valp2 + %ins = insertelement undef, half %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_half_unpacked2(half* %valp) { +; CHECK-LABEL: ld1rh_half_unpacked2: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load half, half* %valp + %ins = insertelement undef, half %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_half_unpacked2_gep(half* %valp) { +; CHECK-LABEL: ld1rh_half_unpacked2_gep: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0, #126] +; CHECK-NEXT: ret + %valp2 = getelementptr half, half* %valp, i32 63 + %val = load half, half* %valp2 + %ins = insertelement undef, half %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_half_unpacked2_gep_out_of_range_up(half* %valp) { +; CHECK-LABEL: ld1rh_half_unpacked2_gep_out_of_range_up: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #128 // =128 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr half, half* %valp, i32 64 + %val = load half, half* %valp2 + %ins = insertelement undef, half %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rh_half_unpacked2_gep_out_of_range_down(half* %valp) { +; CHECK-LABEL: ld1rh_half_unpacked2_gep_out_of_range_down: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #2 // =2 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr half, half* %valp, i32 -1 + %val = load half, half* %valp2 + %ins = insertelement undef, half %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rw_float(float* %valp) { +; CHECK-LABEL: ld1rw_float: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load float, float* %valp + %ins = insertelement undef, float %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rw_float_gep(float* %valp) { +; CHECK-LABEL: ld1rw_float_gep: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0, #252] +; CHECK-NEXT: ret + %valp2 = getelementptr float, float* %valp, i32 63 + %val = load float, float* %valp2 + %ins = insertelement undef, float %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rw_float_gep_out_of_range_up(float* %valp) { +; CHECK-LABEL: ld1rw_float_gep_out_of_range_up: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #256 // =256 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr float, float* %valp, i32 64 + %val = load float, float* %valp2 + %ins = insertelement undef, float %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rw_float_gep_out_of_range_down(float* %valp) { +; CHECK-LABEL: ld1rw_float_gep_out_of_range_down: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #4 // =4 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr float, float* %valp, i32 -1 + %val = load float, float* %valp2 + %ins = insertelement undef, float %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rw_float_unpacked2(float* %valp) { +; CHECK-LABEL: ld1rw_float_unpacked2: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load float, float* %valp + %ins = insertelement undef, float %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rw_float_unpacked2_gep(float* %valp) { +; CHECK-LABEL: ld1rw_float_unpacked2_gep: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0, #252] +; CHECK-NEXT: ret + %valp2 = getelementptr float, float* %valp, i32 63 + %val = load float, float* %valp2 + %ins = insertelement undef, float %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rw_float_unpacked2_gep_out_of_range_up(float* %valp) { +; CHECK-LABEL: ld1rw_float_unpacked2_gep_out_of_range_up: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #256 // =256 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr float, float* %valp, i32 64 + %val = load float, float* %valp2 + %ins = insertelement undef, float %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rw_float_unpacked2_gep_out_of_range_down(float* %valp) { +; CHECK-LABEL: ld1rw_float_unpacked2_gep_out_of_range_down: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #4 // =4 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr float, float* %valp, i32 -1 + %val = load float, float* %valp2 + %ins = insertelement undef, float %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rd_double(double* %valp) { +; CHECK-LABEL: ld1rd_double: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0] +; CHECK-NEXT: ret + %val = load double, double* %valp + %ins = insertelement undef, double %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rd_double_gep(double* %valp) { +; CHECK-LABEL: ld1rd_double_gep: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0, #504] +; CHECK-NEXT: ret + %valp2 = getelementptr double, double* %valp, i32 63 + %val = load double, double* %valp2 + %ins = insertelement undef, double %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rd_double_gep_out_of_range_up(double* %valp) { +; CHECK-LABEL: ld1rd_double_gep_out_of_range_up: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #512 // =512 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr double, double* %valp, i32 64 + %val = load double, double* %valp2 + %ins = insertelement undef, double %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} + +define @ld1rd_double_gep_out_of_range_down(double* %valp) { +; CHECK-LABEL: ld1rd_double_gep_out_of_range_down: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x0, #8 // =8 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] +; CHECK-NEXT: ret + %valp2 = getelementptr double, double* %valp, i32 -1 + %val = load double, double* %valp2 + %ins = insertelement undef, double %val, i32 0 + %shf = shufflevector %ins, undef, zeroinitializer + ret %shf +} diff --git a/llvm/test/CodeGen/AArch64/sve-ld1r.mir b/llvm/test/CodeGen/AArch64/sve-ld1r.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-ld1r.mir @@ -0,0 +1,217 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -run-pass=prologepilog -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s +# +# Test that prologepilog works for each of the LD1R instructions for stack-based objects. +# +--- | + define void @testcase_positive_offset() { + %dummy = alloca i64, align 8 + %object = alloca i64, align 8 + ; Reads from %object at offset 63 * readsize + ret void + } + define void @testcase_positive_offset_out_of_range() { + %dummy = alloca i64, align 8 + %object = alloca i64, align 8 + ; Reads from %object at offset 64 * readsize + ret void + } + define void @testcase_negative_offset_out_of_range() { + %dummy = alloca i64, align 8 + %object = alloca i64, align 8 + ; Reads from %object at offset -1 * readsize + ret void + } +... +--- +name: testcase_positive_offset +tracksRegLiveness: true +stack: + - { id: 0, name: dummy, type: default, offset: 0, size: 8, alignment: 8 } + - { id: 1, name: object, type: default, offset: 0, size: 8, alignment: 8 } +body: | + bb.0 (%ir-block.0): + liveins: $p0 + + ; CHECK-LABEL: name: testcase_positive_offset + ; CHECK: liveins: $p0 + ; CHECK: $sp = frame-setup SUBXri $sp, 16, 0 + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK: renamable $z0 = LD1RB_IMM renamable $p0, $sp, 63 :: (load (s8) from %ir.object, align 2) + ; CHECK: renamable $z0 = LD1RB_H_IMM renamable $p0, $sp, 63 :: (load (s8) from %ir.object, align 2) + ; CHECK: renamable $z0 = LD1RB_S_IMM renamable $p0, $sp, 63 :: (load (s8) from %ir.object, align 2) + ; CHECK: renamable $z0 = LD1RB_D_IMM renamable $p0, $sp, 63 :: (load (s8) from %ir.object, align 2) + ; CHECK: renamable $z0 = LD1RSB_H_IMM renamable $p0, $sp, 63 :: (load (s8) from %ir.object, align 2) + ; CHECK: renamable $z0 = LD1RSB_S_IMM renamable $p0, $sp, 63 :: (load (s8) from %ir.object, align 2) + ; CHECK: renamable $z0 = LD1RSB_D_IMM renamable $p0, $sp, 63 :: (load (s8) from %ir.object, align 2) + ; CHECK: renamable $z0 = LD1RH_IMM renamable $p0, $sp, 63 :: (load (s16) from %ir.object) + ; CHECK: renamable $z0 = LD1RH_S_IMM renamable $p0, $sp, 63 :: (load (s16) from %ir.object) + ; CHECK: renamable $z0 = LD1RH_D_IMM renamable $p0, $sp, 63 :: (load (s16) from %ir.object) + ; CHECK: renamable $z0 = LD1RSH_S_IMM renamable $p0, $sp, 63 :: (load (s16) from %ir.object) + ; CHECK: renamable $z0 = LD1RSH_D_IMM renamable $p0, $sp, 63 :: (load (s16) from %ir.object) + ; CHECK: renamable $z0 = LD1RW_IMM renamable $p0, $sp, 63 :: (load (s32) from %ir.object) + ; CHECK: renamable $z0 = LD1RW_D_IMM renamable $p0, $sp, 63 :: (load (s32) from %ir.object) + ; CHECK: renamable $z0 = LD1RSW_IMM renamable $p0, $sp, 63 :: (load (s32) from %ir.object) + ; CHECK: renamable $z0 = LD1RD_IMM renamable $p0, $sp, 63 :: (load (s64) from %ir.object) + ; CHECK: renamable $z0 = LD1RD_IMM renamable $p0, $sp, 63 :: (load (s64) from %ir.object) + ; CHECK: $sp = frame-destroy ADDXri $sp, 16, 0 + ; CHECK: RET_ReallyLR implicit $z0 + renamable $z0 = LD1RB_IMM renamable $p0, %stack.1.object, 63 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RB_H_IMM renamable $p0, %stack.1.object, 63 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RB_S_IMM renamable $p0, %stack.1.object, 63 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RB_D_IMM renamable $p0, %stack.1.object, 63 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RSB_H_IMM renamable $p0, %stack.1.object, 63 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RSB_S_IMM renamable $p0, %stack.1.object, 63 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RSB_D_IMM renamable $p0, %stack.1.object, 63 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RH_IMM renamable $p0, %stack.1.object, 63 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RH_S_IMM renamable $p0, %stack.1.object, 63 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RH_D_IMM renamable $p0, %stack.1.object, 63 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RSH_S_IMM renamable $p0, %stack.1.object, 63 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RSH_D_IMM renamable $p0, %stack.1.object, 63 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RW_IMM renamable $p0, %stack.1.object, 63 :: (load 4 from %ir.object, align 4) + renamable $z0 = LD1RW_D_IMM renamable $p0, %stack.1.object, 63 :: (load 4 from %ir.object, align 4) + renamable $z0 = LD1RSW_IMM renamable $p0, %stack.1.object, 63 :: (load 4 from %ir.object, align 4) + renamable $z0 = LD1RD_IMM renamable $p0, %stack.1.object, 63 :: (load 8 from %ir.object, align 8) + renamable $z0 = LD1RD_IMM renamable $p0, %stack.1.object, 63 :: (load 8 from %ir.object, align 8) + RET_ReallyLR implicit $z0 +... +--- +name: testcase_positive_offset_out_of_range +tracksRegLiveness: true +stack: + - { id: 0, name: dummy, type: default, offset: 0, size: 8, alignment: 8 } + - { id: 1, name: object, type: default, offset: 0, size: 8, alignment: 8 } +body: | + bb.0 (%ir-block.0): + liveins: $p0 + + ; CHECK-LABEL: name: testcase_positive_offset_out_of_range + ; CHECK: liveins: $p0 + ; CHECK: $sp = frame-setup SUBXri $sp, 16, 0 + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK: $x8 = ADDXri $sp, 1, 0 + ; CHECK: renamable $z0 = LD1RB_IMM renamable $p0, killed $x8, 63 :: (load (s8) from %ir.object, align 2) + ; CHECK: $x8 = ADDXri $sp, 1, 0 + ; CHECK: renamable $z0 = LD1RB_H_IMM renamable $p0, killed $x8, 63 :: (load (s8) from %ir.object, align 2) + ; CHECK: $x8 = ADDXri $sp, 1, 0 + ; CHECK: renamable $z0 = LD1RB_S_IMM renamable $p0, killed $x8, 63 :: (load (s8) from %ir.object, align 2) + ; CHECK: $x8 = ADDXri $sp, 1, 0 + ; CHECK: renamable $z0 = LD1RB_D_IMM renamable $p0, killed $x8, 63 :: (load (s8) from %ir.object, align 2) + ; CHECK: $x8 = ADDXri $sp, 1, 0 + ; CHECK: renamable $z0 = LD1RSB_H_IMM renamable $p0, killed $x8, 63 :: (load (s8) from %ir.object, align 2) + ; CHECK: $x8 = ADDXri $sp, 1, 0 + ; CHECK: renamable $z0 = LD1RSB_S_IMM renamable $p0, killed $x8, 63 :: (load (s8) from %ir.object, align 2) + ; CHECK: $x8 = ADDXri $sp, 1, 0 + ; CHECK: renamable $z0 = LD1RSB_D_IMM renamable $p0, killed $x8, 63 :: (load (s8) from %ir.object, align 2) + ; CHECK: $x8 = ADDXri $sp, 2, 0 + ; CHECK: renamable $z0 = LD1RH_IMM renamable $p0, killed $x8, 63 :: (load (s16) from %ir.object) + ; CHECK: $x8 = ADDXri $sp, 2, 0 + ; CHECK: renamable $z0 = LD1RH_S_IMM renamable $p0, killed $x8, 63 :: (load (s16) from %ir.object) + ; CHECK: $x8 = ADDXri $sp, 2, 0 + ; CHECK: renamable $z0 = LD1RH_D_IMM renamable $p0, killed $x8, 63 :: (load (s16) from %ir.object) + ; CHECK: $x8 = ADDXri $sp, 2, 0 + ; CHECK: renamable $z0 = LD1RSH_S_IMM renamable $p0, killed $x8, 63 :: (load (s16) from %ir.object) + ; CHECK: $x8 = ADDXri $sp, 2, 0 + ; CHECK: renamable $z0 = LD1RSH_D_IMM renamable $p0, killed $x8, 63 :: (load (s16) from %ir.object) + ; CHECK: $x8 = ADDXri $sp, 4, 0 + ; CHECK: renamable $z0 = LD1RW_IMM renamable $p0, killed $x8, 63 :: (load (s32) from %ir.object) + ; CHECK: $x8 = ADDXri $sp, 4, 0 + ; CHECK: renamable $z0 = LD1RW_D_IMM renamable $p0, killed $x8, 63 :: (load (s32) from %ir.object) + ; CHECK: $x8 = ADDXri $sp, 4, 0 + ; CHECK: renamable $z0 = LD1RSW_IMM renamable $p0, killed $x8, 63 :: (load (s32) from %ir.object) + ; CHECK: $x8 = ADDXri $sp, 8, 0 + ; CHECK: renamable $z0 = LD1RD_IMM renamable $p0, killed $x8, 63 :: (load (s64) from %ir.object) + ; CHECK: $x8 = ADDXri $sp, 8, 0 + ; CHECK: renamable $z0 = LD1RD_IMM renamable $p0, killed $x8, 63 :: (load (s64) from %ir.object) + ; CHECK: $sp = frame-destroy ADDXri $sp, 16, 0 + ; CHECK: RET_ReallyLR implicit $z0 + renamable $z0 = LD1RB_IMM renamable $p0, %stack.1.object, 64 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RB_H_IMM renamable $p0, %stack.1.object, 64 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RB_S_IMM renamable $p0, %stack.1.object, 64 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RB_D_IMM renamable $p0, %stack.1.object, 64 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RSB_H_IMM renamable $p0, %stack.1.object, 64 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RSB_S_IMM renamable $p0, %stack.1.object, 64 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RSB_D_IMM renamable $p0, %stack.1.object, 64 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RH_IMM renamable $p0, %stack.1.object, 64 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RH_S_IMM renamable $p0, %stack.1.object, 64 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RH_D_IMM renamable $p0, %stack.1.object, 64 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RSH_S_IMM renamable $p0, %stack.1.object, 64 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RSH_D_IMM renamable $p0, %stack.1.object, 64 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RW_IMM renamable $p0, %stack.1.object, 64 :: (load 4 from %ir.object, align 4) + renamable $z0 = LD1RW_D_IMM renamable $p0, %stack.1.object, 64 :: (load 4 from %ir.object, align 4) + renamable $z0 = LD1RSW_IMM renamable $p0, %stack.1.object, 64 :: (load 4 from %ir.object, align 4) + renamable $z0 = LD1RD_IMM renamable $p0, %stack.1.object, 64 :: (load 8 from %ir.object, align 8) + renamable $z0 = LD1RD_IMM renamable $p0, %stack.1.object, 64 :: (load 8 from %ir.object, align 8) + RET_ReallyLR implicit $z0 +... + +... +--- +name: testcase_negative_offset_out_of_range +tracksRegLiveness: true +stack: + - { id: 0, name: dummy, type: default, offset: 0, size: 8, alignment: 8 } + - { id: 1, name: object, type: default, offset: 0, size: 8, alignment: 8 } +body: | + bb.0 (%ir-block.0): + liveins: $p0 + + ; CHECK-LABEL: name: testcase_negative_offset_out_of_range + ; CHECK: liveins: $p0 + ; CHECK: $sp = frame-setup SUBXri $sp, 16, 0 + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK: $x8 = SUBXri $sp, 1, 0 + ; CHECK: renamable $z0 = LD1RB_IMM renamable $p0, killed $x8, 0 :: (load (s8) from %ir.object, align 2) + ; CHECK: $x8 = SUBXri $sp, 1, 0 + ; CHECK: renamable $z0 = LD1RB_H_IMM renamable $p0, killed $x8, 0 :: (load (s8) from %ir.object, align 2) + ; CHECK: $x8 = SUBXri $sp, 1, 0 + ; CHECK: renamable $z0 = LD1RB_S_IMM renamable $p0, killed $x8, 0 :: (load (s8) from %ir.object, align 2) + ; CHECK: $x8 = SUBXri $sp, 1, 0 + ; CHECK: renamable $z0 = LD1RB_D_IMM renamable $p0, killed $x8, 0 :: (load (s8) from %ir.object, align 2) + ; CHECK: $x8 = SUBXri $sp, 1, 0 + ; CHECK: renamable $z0 = LD1RSB_H_IMM renamable $p0, killed $x8, 0 :: (load (s8) from %ir.object, align 2) + ; CHECK: $x8 = SUBXri $sp, 1, 0 + ; CHECK: renamable $z0 = LD1RSB_S_IMM renamable $p0, killed $x8, 0 :: (load (s8) from %ir.object, align 2) + ; CHECK: $x8 = SUBXri $sp, 1, 0 + ; CHECK: renamable $z0 = LD1RSB_D_IMM renamable $p0, killed $x8, 0 :: (load (s8) from %ir.object, align 2) + ; CHECK: $x8 = SUBXri $sp, 2, 0 + ; CHECK: renamable $z0 = LD1RH_IMM renamable $p0, killed $x8, 0 :: (load (s16) from %ir.object) + ; CHECK: $x8 = SUBXri $sp, 2, 0 + ; CHECK: renamable $z0 = LD1RH_S_IMM renamable $p0, killed $x8, 0 :: (load (s16) from %ir.object) + ; CHECK: $x8 = SUBXri $sp, 2, 0 + ; CHECK: renamable $z0 = LD1RH_D_IMM renamable $p0, killed $x8, 0 :: (load (s16) from %ir.object) + ; CHECK: $x8 = SUBXri $sp, 2, 0 + ; CHECK: renamable $z0 = LD1RSH_S_IMM renamable $p0, killed $x8, 0 :: (load (s16) from %ir.object) + ; CHECK: $x8 = SUBXri $sp, 2, 0 + ; CHECK: renamable $z0 = LD1RSH_D_IMM renamable $p0, killed $x8, 0 :: (load (s16) from %ir.object) + ; CHECK: $x8 = SUBXri $sp, 4, 0 + ; CHECK: renamable $z0 = LD1RW_IMM renamable $p0, killed $x8, 0 :: (load (s32) from %ir.object) + ; CHECK: $x8 = SUBXri $sp, 4, 0 + ; CHECK: renamable $z0 = LD1RW_D_IMM renamable $p0, killed $x8, 0 :: (load (s32) from %ir.object) + ; CHECK: $x8 = SUBXri $sp, 4, 0 + ; CHECK: renamable $z0 = LD1RSW_IMM renamable $p0, killed $x8, 0 :: (load (s32) from %ir.object) + ; CHECK: $x8 = SUBXri $sp, 8, 0 + ; CHECK: renamable $z0 = LD1RD_IMM renamable $p0, killed $x8, 0 :: (load (s64) from %ir.object) + ; CHECK: $x8 = SUBXri $sp, 8, 0 + ; CHECK: renamable $z0 = LD1RD_IMM renamable $p0, killed $x8, 0 :: (load (s64) from %ir.object) + ; CHECK: $sp = frame-destroy ADDXri $sp, 16, 0 + ; CHECK: RET_ReallyLR implicit $z0 + renamable $z0 = LD1RB_IMM renamable $p0, %stack.1.object, -1 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RB_H_IMM renamable $p0, %stack.1.object, -1 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RB_S_IMM renamable $p0, %stack.1.object, -1 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RB_D_IMM renamable $p0, %stack.1.object, -1 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RSB_H_IMM renamable $p0, %stack.1.object, -1 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RSB_S_IMM renamable $p0, %stack.1.object, -1 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RSB_D_IMM renamable $p0, %stack.1.object, -1 :: (load 1 from %ir.object, align 2) + renamable $z0 = LD1RH_IMM renamable $p0, %stack.1.object, -1 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RH_S_IMM renamable $p0, %stack.1.object, -1 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RH_D_IMM renamable $p0, %stack.1.object, -1 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RSH_S_IMM renamable $p0, %stack.1.object, -1 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RSH_D_IMM renamable $p0, %stack.1.object, -1 :: (load 2 from %ir.object, align 2) + renamable $z0 = LD1RW_IMM renamable $p0, %stack.1.object, -1 :: (load 4 from %ir.object, align 4) + renamable $z0 = LD1RW_D_IMM renamable $p0, %stack.1.object, -1 :: (load 4 from %ir.object, align 4) + renamable $z0 = LD1RSW_IMM renamable $p0, %stack.1.object, -1 :: (load 4 from %ir.object, align 4) + renamable $z0 = LD1RD_IMM renamable $p0, %stack.1.object, -1 :: (load 8 from %ir.object, align 8) + renamable $z0 = LD1RD_IMM renamable $p0, %stack.1.object, -1 :: (load 8 from %ir.object, align 8) + RET_ReallyLR implicit $z0 +... diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -1,74 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s ;; Splats of legal integer vector types define @sve_splat_16xi8(i8 %val) { -; CHECK-LABEL: @sve_splat_16xi8 -; CHECK: mov z0.b, w0 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_16xi8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, w0 +; CHECK-NEXT: ret %ins = insertelement undef, i8 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_8xi16(i16 %val) { -; CHECK-LABEL: @sve_splat_8xi16 -; CHECK: mov z0.h, w0 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_8xi16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, w0 +; CHECK-NEXT: ret %ins = insertelement undef, i16 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_4xi32(i32 %val) { -; CHECK-LABEL: @sve_splat_4xi32 -; CHECK: mov z0.s, w0 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_4xi32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, w0 +; CHECK-NEXT: ret %ins = insertelement undef, i32 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_2xi64(i64 %val) { -; CHECK-LABEL: @sve_splat_2xi64 -; CHECK: mov z0.d, x0 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_2xi64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: ret %ins = insertelement undef, i64 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_16xi8_imm() { -; CHECK-LABEL: @sve_splat_16xi8_imm -; CHECK: mov z0.b, #1 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_16xi8_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.b, #1 // =0x1 +; CHECK-NEXT: ret %ins = insertelement undef, i8 1, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_8xi16_imm() { -; CHECK-LABEL: @sve_splat_8xi16_imm -; CHECK: mov z0.h, #1 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_8xi16_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #1 // =0x1 +; CHECK-NEXT: ret %ins = insertelement undef, i16 1, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_4xi32_imm() { -; CHECK-LABEL: @sve_splat_4xi32_imm -; CHECK: mov z0.s, #1 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_4xi32_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #1 // =0x1 +; CHECK-NEXT: ret %ins = insertelement undef, i32 1, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_2xi64_imm() { -; CHECK-LABEL: @sve_splat_2xi64_imm -; CHECK: mov z0.d, #1 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_2xi64_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #1 // =0x1 +; CHECK-NEXT: ret %ins = insertelement undef, i64 1, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat @@ -77,54 +86,63 @@ ;; Promote splats of smaller illegal integer vector types define @sve_splat_2xi8(i8 %val) { -; CHECK-LABEL: @sve_splat_2xi8 -; CHECK: mov z0.d, x0 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_2xi8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: ret %ins = insertelement undef, i8 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_4xi8(i8 %val) { -; CHECK-LABEL: @sve_splat_4xi8 -; CHECK: mov z0.s, w0 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_4xi8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, w0 +; CHECK-NEXT: ret %ins = insertelement undef, i8 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_8xi8(i8 %val) { -; CHECK-LABEL: @sve_splat_8xi8 -; CHECK: mov z0.h, w0 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_8xi8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, w0 +; CHECK-NEXT: ret %ins = insertelement undef, i8 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_2xi16(i16 %val) { -; CHECK-LABEL: @sve_splat_2xi16 -; CHECK: mov z0.d, x0 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_2xi16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: ret %ins = insertelement undef, i16 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_4xi16(i16 %val) { -; CHECK-LABEL: @sve_splat_4xi16 -; CHECK: mov z0.s, w0 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_4xi16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, w0 +; CHECK-NEXT: ret %ins = insertelement undef, i16 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_2xi32(i32 %val) { -; CHECK-LABEL: @sve_splat_2xi32 -; CHECK: mov z0.d, x0 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_2xi32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: ret %ins = insertelement undef, i32 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat @@ -134,8 +152,9 @@ define @sve_splat_1xi32(i32 %val) { ; CHECK-LABEL: sve_splat_1xi32: -; CHECK: mov z0.s, w0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.s, w0 +; CHECK-NEXT: ret entry: %ins = insertelement undef, i32 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer @@ -143,51 +162,60 @@ } define @sve_splat_12xi32(i32 %val) { -; CHECK-LABEL: @sve_splat_12xi32 -; CHECK: mov z0.s, w0 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_12xi32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, w0 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: ret %ins = insertelement undef, i32 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_2xi1(i1 %val) { -; CHECK-LABEL: @sve_splat_2xi1 -; CHECK: sbfx x8, x0, #0, #1 -; CHECK-NEXT: whilelo p0.d, xzr, x8 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_2xi1: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.d, xzr, x8 +; CHECK-NEXT: ret %ins = insertelement undef, i1 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_4xi1(i1 %val) { -; CHECK-LABEL: @sve_splat_4xi1 -; CHECK: sbfx x8, x0, #0, #1 -; CHECK-NEXT: whilelo p0.s, xzr, x8 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_4xi1: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.s, xzr, x8 +; CHECK-NEXT: ret %ins = insertelement undef, i1 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_8xi1(i1 %val) { -; CHECK-LABEL: @sve_splat_8xi1 -; CHECK: sbfx x8, x0, #0, #1 -; CHECK-NEXT: whilelo p0.h, xzr, x8 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_8xi1: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.h, xzr, x8 +; CHECK-NEXT: ret %ins = insertelement undef, i1 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat } define @sve_splat_16xi1(i1 %val) { -; CHECK-LABEL: @sve_splat_16xi1 -; CHECK: sbfx x8, x0, #0, #1 -; CHECK-NEXT: whilelo p0.b, xzr, x8 -; CHECK-NEXT: ret +; CHECK-LABEL: sve_splat_16xi1: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x8, x0, #0, #1 +; CHECK-NEXT: whilelo p0.b, xzr, x8 +; CHECK-NEXT: ret %ins = insertelement undef, i1 %val, i32 0 %splat = shufflevector %ins, undef, zeroinitializer ret %splat @@ -197,8 +225,10 @@ define @splat_nxv8bf16(bfloat %val) #0 { ; CHECK-LABEL: splat_nxv8bf16: -; CHECK: mov z0.h, h0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret %1 = insertelement undef, bfloat %val, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 @@ -206,8 +236,10 @@ define @splat_nxv8f16(half %val) { ; CHECK-LABEL: splat_nxv8f16: -; CHECK: mov z0.h, h0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret %1 = insertelement undef, half %val, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 @@ -215,8 +247,10 @@ define @splat_nxv4f16(half %val) { ; CHECK-LABEL: splat_nxv4f16: -; CHECK: mov z0.h, h0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret %1 = insertelement undef, half %val, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 @@ -224,8 +258,10 @@ define @splat_nxv2f16(half %val) { ; CHECK-LABEL: splat_nxv2f16: -; CHECK: mov z0.h, h0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: mov z0.h, h0 +; CHECK-NEXT: ret %1 = insertelement undef, half %val, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 @@ -233,8 +269,10 @@ define @splat_nxv4f32(float %val) { ; CHECK-LABEL: splat_nxv4f32: -; CHECK: mov z0.s, s0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: mov z0.s, s0 +; CHECK-NEXT: ret %1 = insertelement undef, float %val, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 @@ -242,8 +280,10 @@ define @splat_nxv2f32(float %val) { ; CHECK-LABEL: splat_nxv2f32: -; CHECK: mov z0.s, s0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: mov z0.s, s0 +; CHECK-NEXT: ret %1 = insertelement undef, float %val, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 @@ -251,8 +291,10 @@ define @splat_nxv2f64(double %val) { ; CHECK-LABEL: splat_nxv2f64: -; CHECK: mov z0.d, d0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov z0.d, d0 +; CHECK-NEXT: ret %1 = insertelement undef, double %val, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 @@ -260,57 +302,65 @@ define @splat_nxv8f16_zero() { ; CHECK-LABEL: splat_nxv8f16_zero: -; CHECK: mov z0.h, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: ret ret zeroinitializer } define @splat_nxv8bf16_zero() #0 { ; CHECK-LABEL: splat_nxv8bf16_zero: -; CHECK: mov z0.h, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: ret ret zeroinitializer } define @splat_nxv4f16_zero() { ; CHECK-LABEL: splat_nxv4f16_zero: -; CHECK: mov z0.h, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: ret ret zeroinitializer } define @splat_nxv2f16_zero() { ; CHECK-LABEL: splat_nxv2f16_zero: -; CHECK: mov z0.h, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: ret ret zeroinitializer } define @splat_nxv4f32_zero() { ; CHECK-LABEL: splat_nxv4f32_zero: -; CHECK: mov z0.s, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: ret ret zeroinitializer } define @splat_nxv2f32_zero() { ; CHECK-LABEL: splat_nxv2f32_zero: -; CHECK: mov z0.s, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: ret ret zeroinitializer } define @splat_nxv2f64_zero() { ; CHECK-LABEL: splat_nxv2f64_zero: -; CHECK: mov z0.d, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, #0 // =0x0 +; CHECK-NEXT: ret ret zeroinitializer } define @splat_nxv8f16_imm() { ; CHECK-LABEL: splat_nxv8f16_imm: -; CHECK: fmov z0.h, #1.00000000 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fmov z0.h, #1.00000000 +; CHECK-NEXT: ret %1 = insertelement undef, half 1.0, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 @@ -318,8 +368,9 @@ define @splat_nxv4f16_imm() { ; CHECK-LABEL: splat_nxv4f16_imm: -; CHECK: fmov z0.h, #1.00000000 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fmov z0.h, #1.00000000 +; CHECK-NEXT: ret %1 = insertelement undef, half 1.0, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 @@ -327,8 +378,9 @@ define @splat_nxv2f16_imm() { ; CHECK-LABEL: splat_nxv2f16_imm: -; CHECK: fmov z0.h, #1.00000000 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fmov z0.h, #1.00000000 +; CHECK-NEXT: ret %1 = insertelement undef, half 1.0, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 @@ -336,8 +388,9 @@ define @splat_nxv4f32_imm() { ; CHECK-LABEL: splat_nxv4f32_imm: -; CHECK: fmov z0.s, #1.00000000 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fmov z0.s, #1.00000000 +; CHECK-NEXT: ret %1 = insertelement undef, float 1.0, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 @@ -345,8 +398,9 @@ define @splat_nxv2f32_imm() { ; CHECK-LABEL: splat_nxv2f32_imm: -; CHECK: fmov z0.s, #1.00000000 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fmov z0.s, #1.00000000 +; CHECK-NEXT: ret %1 = insertelement undef, float 1.0, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 @@ -354,8 +408,9 @@ define @splat_nxv2f64_imm() { ; CHECK-LABEL: splat_nxv2f64_imm: -; CHECK: fmov z0.d, #1.00000000 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: fmov z0.d, #1.00000000 +; CHECK-NEXT: ret %1 = insertelement undef, double 1.0, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 @@ -363,8 +418,9 @@ define @splat_nxv4i32_fold( %x) { ; CHECK-LABEL: splat_nxv4i32_fold: -; CHECK: mov z0.s, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: ret %r = sub %x, %x ret %r } @@ -372,38 +428,84 @@ define @splat_nxv4f32_fold( %x) { ; CHECK-LABEL: splat_nxv4f32_fold: -; CHECK: mov z0.s, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: ret %r = fsub nnan %x, %x ret %r } define @splat_nxv2f32_fmov_fold() { -; CHECK-LABEL: splat_nxv2f32_fmov_fold -; CHECK: mov w8, #1109917696 -; CHECK-NEXT: mov z0.s, w8 +; CHECK-LABEL: splat_nxv2f32_fmov_fold: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1109917696 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ret %1 = insertelement undef, float 4.200000e+01, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 } define @splat_nxv4f32_fmov_fold() { -; CHECK-LABEL: splat_nxv4f32_fmov_fold -; CHECK: mov w8, #1109917696 -; CHECK-NEXT: mov z0.s, w8 +; CHECK-LABEL: splat_nxv4f32_fmov_fold: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1109917696 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ret %1 = insertelement undef, float 4.200000e+01, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 } define @splat_nxv2f64_fmov_fold() { -; CHECK-LABEL: splat_nxv2f64_fmov_fold -; CHECK: mov x8, #4631107791820423168 -; CHECK-NEXT: mov z0.d, x8 +; CHECK-LABEL: splat_nxv2f64_fmov_fold: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #4631107791820423168 +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: ret %1 = insertelement undef, double 4.200000e+01, i32 0 %2 = shufflevector %1, undef, zeroinitializer ret %2 } +; Splat of float constants not representable as a single immediate. + +define @splat_nxv2f32_imm_out_of_range() { +; CHECK-LABEL: splat_nxv2f32_imm_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #7864 +; CHECK-NEXT: movk w8, #16469, lsl #16 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ret + %1 = insertelement undef, float 3.3299999237060546875, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + +define @splat_nxv4f32_imm_out_of_range() { +; CHECK-LABEL: splat_nxv4f32_imm_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #7864 +; CHECK-NEXT: movk w8, #16469, lsl #16 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ret + %1 = insertelement undef, float 3.3299999237060546875, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + +define @splat_nxv2f64_imm_out_of_range() { +; CHECK-LABEL: splat_nxv2f64_imm_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI47_0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: add x8, x8, :lo12:.LCPI47_0 +; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] +; CHECK-NEXT: ret + %1 = insertelement undef, double 3.33, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + ; +bf16 is required for the bfloat version. attributes #0 = { "target-features"="+sve,+bf16" }