Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll +++ llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll @@ -112,6 +112,100 @@ ret %res } + +; Extending loads from unpacked to wide illegal types + +define @masked_sload_4i8_4i64(ptr %a, ptr %b, %c) { +; CHECK-LABEL: masked_sload_4i8_4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x1] +; CHECK-NEXT: sunpkhi z2.d, z0.s +; CHECK-NEXT: sunpklo z0.d, z0.s +; CHECK-NEXT: sunpkhi z3.d, z1.s +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: add z1.d, z2.d, z3.d +; CHECK-NEXT: ret + %aval = call @llvm.masked.load.nxv4i8( *%a, i32 16, %c, zeroinitializer) + %bval = call @llvm.masked.load.nxv4i8( *%b, i32 16, %c, zeroinitializer) + %aext = sext %aval to + %bext = sext %bval to + %res = add %aext, %bext + ret %res +} + +define @masked_sload_4i16_4i64(ptr %a, ptr %b, %c) { +; CHECK-LABEL: masked_sload_4i16_4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x1] +; CHECK-NEXT: sunpkhi z2.d, z0.s +; CHECK-NEXT: sunpklo z0.d, z0.s +; CHECK-NEXT: sunpkhi z3.d, z1.s +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: add z1.d, z2.d, z3.d +; CHECK-NEXT: ret + %aval = call @llvm.masked.load.nxv4i16( *%a, i32 16, %c, zeroinitializer) + %bval = call @llvm.masked.load.nxv4i16( *%b, i32 16, %c, zeroinitializer) + %aext = sext %aval to + %bext = sext %bval to + %res = add %aext, %bext + ret %res +} + +define @masked_sload_8i8_8i32(ptr %a, ptr %b, %c) { +; CHECK-LABEL: masked_sload_8i8_8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x1] +; CHECK-NEXT: sunpkhi z2.s, z0.h +; CHECK-NEXT: sunpklo z0.s, z0.h +; CHECK-NEXT: sunpkhi z3.s, z1.h +; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: add z1.s, z2.s, z3.s +; CHECK-NEXT: ret + %aval = call @llvm.masked.load.nxv8i8( *%a, i32 16, %c, zeroinitializer) + %bval = call @llvm.masked.load.nxv8i8( *%b, i32 16, %c, zeroinitializer) + %aext = sext %aval to + %bext = sext %bval to + %res = add %aext, %bext + ret %res +} + +define @masked_sload_8i8_8i64(ptr %a, ptr %b, %c) { +; CHECK-LABEL: masked_sload_8i8_8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x1] +; CHECK-NEXT: sunpkhi z2.s, z0.h +; CHECK-NEXT: sunpklo z0.s, z0.h +; CHECK-NEXT: sunpkhi z3.s, z1.h +; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: sunpkhi z4.d, z2.s +; CHECK-NEXT: sunpklo z2.d, z2.s +; CHECK-NEXT: sunpkhi z5.d, z0.s +; CHECK-NEXT: sunpklo z0.d, z0.s +; CHECK-NEXT: sunpkhi z6.d, z3.s +; CHECK-NEXT: sunpklo z3.d, z3.s +; CHECK-NEXT: sunpkhi z7.d, z1.s +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: add z1.d, z5.d, z7.d +; CHECK-NEXT: add z2.d, z2.d, z3.d +; CHECK-NEXT: add z3.d, z4.d, z6.d +; CHECK-NEXT: ret + %aval = call @llvm.masked.load.nxv8i8( *%a, i32 16, %c, zeroinitializer) + %bval = call @llvm.masked.load.nxv8i8( *%b, i32 16, %c, zeroinitializer) + %aext = sext %aval to + %bext = sext %bval to + %res = add %aext, %bext + ret %res +} + + declare @llvm.masked.load.nxv2i8(*, i32, , ) declare @llvm.masked.load.nxv2i16(*, i32, , ) declare @llvm.masked.load.nxv2i32(*, i32, , ) Index: llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll +++ llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll @@ -108,6 +108,99 @@ ret %res } +; Extending loads from unpacked to wide illegal types + +define @masked_zload_4i8_4i64(ptr %a, ptr %b, %c) { +; CHECK-LABEL: masked_zload_4i8_4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1b { z1.s }, p0/z, [x1] +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpkhi z3.d, z1.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: add z1.d, z2.d, z3.d +; CHECK-NEXT: ret + %aval = call @llvm.masked.load.nxv4i8( *%a, i32 16, %c, zeroinitializer) + %bval = call @llvm.masked.load.nxv4i8( *%b, i32 16, %c, zeroinitializer) + %aext = zext %aval to + %bext = zext %bval to + %res = add %aext, %bext + ret %res +} + +define @masked_zload_4i16_4i64(ptr %a, ptr %b, %c) { +; CHECK-LABEL: masked_zload_4i16_4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1h { z1.s }, p0/z, [x1] +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpkhi z3.d, z1.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: add z1.d, z2.d, z3.d +; CHECK-NEXT: ret + %aval = call @llvm.masked.load.nxv4i16( *%a, i32 16, %c, zeroinitializer) + %bval = call @llvm.masked.load.nxv4i16( *%b, i32 16, %c, zeroinitializer) + %aext = zext %aval to + %bext = zext %bval to + %res = add %aext, %bext + ret %res +} + +define @masked_zload_8i8_8i32(ptr %a, ptr %b, %c) { +; CHECK-LABEL: masked_zload_8i8_8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1b { z1.h }, p0/z, [x1] +; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpkhi z3.s, z1.h +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: add z1.s, z2.s, z3.s +; CHECK-NEXT: ret + %aval = call @llvm.masked.load.nxv8i8( *%a, i32 16, %c, zeroinitializer) + %bval = call @llvm.masked.load.nxv8i8( *%b, i32 16, %c, zeroinitializer) + %aext = zext %aval to + %bext = zext %bval to + %res = add %aext, %bext + ret %res +} + +define @masked_zload_8i8_8i64(ptr %a, ptr %b, %c) { +; CHECK-LABEL: masked_zload_8i8_8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1b { z1.h }, p0/z, [x1] +; CHECK-NEXT: uunpkhi z2.s, z0.h +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z3.s, z1.h +; CHECK-NEXT: uunpkhi z1.s, z1.h +; CHECK-NEXT: uunpkhi z4.d, z2.s +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: uunpkhi z5.d, z0.s +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: uunpklo z6.d, z3.s +; CHECK-NEXT: uunpkhi z7.d, z1.s +; CHECK-NEXT: uunpklo z24.d, z1.s +; CHECK-NEXT: uunpkhi z1.d, z3.s +; CHECK-NEXT: add z0.d, z0.d, z6.d +; CHECK-NEXT: add z3.d, z4.d, z7.d +; CHECK-NEXT: add z1.d, z5.d, z1.d +; CHECK-NEXT: add z2.d, z2.d, z24.d +; CHECK-NEXT: ret + %aval = call @llvm.masked.load.nxv8i8( *%a, i32 16, %c, zeroinitializer) + %bval = call @llvm.masked.load.nxv8i8( *%b, i32 16, %c, zeroinitializer) + %aext = zext %aval to + %bext = zext %bval to + %res = add %aext, %bext + ret %res +} + + declare @llvm.masked.load.nxv2i8(*, i32, , ) declare @llvm.masked.load.nxv2i16(*, i32, , ) declare @llvm.masked.load.nxv2i32(*, i32, , )