diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -46,166 +46,170 @@ def v512i1 : ValueType<512, 24>; // 512 x i1 vector value def v1024i1 : ValueType<1024, 25>; // 1024 x i1 vector value -def v1i8 : ValueType<8, 26>; // 1 x i8 vector value -def v2i8 : ValueType<16, 27>; // 2 x i8 vector value -def v4i8 : ValueType<32, 28>; // 4 x i8 vector value -def v8i8 : ValueType<64, 29>; // 8 x i8 vector value -def v16i8 : ValueType<128, 30>; // 16 x i8 vector value -def v32i8 : ValueType<256, 31>; // 32 x i8 vector value -def v64i8 : ValueType<512, 32>; // 64 x i8 vector value -def v128i8 : ValueType<1024, 33>; // 128 x i8 vector value -def v256i8 : ValueType<2048, 34>; // 256 x i8 vector value - -def v1i16 : ValueType<16, 35>; // 1 x i16 vector value -def v2i16 : ValueType<32, 36>; // 2 x i16 vector value -def v3i16 : ValueType<48, 37>; // 3 x i16 vector value -def v4i16 : ValueType<64, 38>; // 4 x i16 vector value -def v8i16 : ValueType<128, 39>; // 8 x i16 vector value -def v16i16 : ValueType<256, 40>; // 16 x i16 vector value -def v32i16 : ValueType<512, 41>; // 32 x i16 vector value -def v64i16 : ValueType<1024, 42>; // 64 x i16 vector value -def v128i16 : ValueType<2048, 43>; // 128 x i16 vector value -def v256i16 : ValueType<4096, 44>; // 256 x i16 vector value - -def v1i32 : ValueType<32, 45>; // 1 x i32 vector value -def v2i32 : ValueType<64, 46>; // 2 x i32 vector value -def v3i32 : ValueType<96, 47>; // 3 x i32 vector value -def v4i32 : ValueType<128, 48>; // 4 x i32 vector value -def v5i32 : ValueType<160, 49>; // 5 x i32 vector value -def v8i32 : ValueType<256, 50>; // 8 x i32 vector value -def v16i32 : ValueType<512, 51>; // 16 x i32 vector value -def v32i32 : ValueType<1024, 52>; // 32 x i32 vector value -def v64i32 : ValueType<2048, 53>; // 64 x i32 vector value -def v128i32 : ValueType<4096, 54>; // 128 x i32 vector value -def v256i32 : ValueType<8192, 55>; // 256 x i32 vector value -def v512i32 : ValueType<16384, 56>; // 512 x i32 vector value -def v1024i32 : ValueType<32768, 57>; // 1024 x i32 vector value -def v2048i32 : ValueType<65536, 58>; // 2048 x i32 vector value - -def v1i64 : ValueType<64, 59>; // 1 x i64 vector value -def v2i64 : ValueType<128, 60>; // 2 x i64 vector value -def v4i64 : ValueType<256, 61>; // 4 x i64 vector value -def v8i64 : ValueType<512, 62>; // 8 x i64 vector value -def v16i64 : ValueType<1024, 63>; // 16 x i64 vector value -def v32i64 : ValueType<2048, 64>; // 32 x i64 vector value -def v64i64 : ValueType<4096, 65>; // 64 x i64 vector value -def v128i64 : ValueType<8192, 66>; // 128 x i64 vector value -def v256i64 : ValueType<16384, 67>; // 256 x i64 vector value - -def v1i128 : ValueType<128, 68>; // 1 x i128 vector value - -def v1f16 : ValueType<16, 69>; // 1 x f16 vector value -def v2f16 : ValueType<32, 70>; // 2 x f16 vector value -def v3f16 : ValueType<48, 71>; // 3 x f16 vector value -def v4f16 : ValueType<64, 72>; // 4 x f16 vector value -def v8f16 : ValueType<128, 73>; // 8 x f16 vector value -def v16f16 : ValueType<256, 74>; // 16 x f16 vector value -def v32f16 : ValueType<512, 75>; // 32 x f16 vector value -def v64f16 : ValueType<1024, 76>; // 64 x f16 vector value -def v128f16 : ValueType<2048, 77>; // 128 x f16 vector value -def v256f16 : ValueType<4096, 78>; // 256 x f16 vector value - -def v2bf16 : ValueType<32, 79>; // 2 x bf16 vector value -def v3bf16 : ValueType<48, 80>; // 3 x bf16 vector value -def v4bf16 : ValueType<64, 81>; // 4 x bf16 vector value -def v8bf16 : ValueType<128, 82>; // 8 x bf16 vector value -def v16bf16 : ValueType<256, 83>; // 16 x bf16 vector value -def v32bf16 : ValueType<512, 84>; // 32 x bf16 vector value -def v64bf16 : ValueType<1024, 85>; // 64 x bf16 vector value -def v128bf16 : ValueType<2048, 86>; // 128 x bf16 vector value - -def v1f32 : ValueType<32, 87>; // 1 x f32 vector value -def v2f32 : ValueType<64, 88>; // 2 x f32 vector value -def v3f32 : ValueType<96, 89>; // 3 x f32 vector value -def v4f32 : ValueType<128, 90>; // 4 x f32 vector value -def v5f32 : ValueType<160, 91>; // 5 x f32 vector value -def v8f32 : ValueType<256, 92>; // 8 x f32 vector value -def v16f32 : ValueType<512, 93>; // 16 x f32 vector value -def v32f32 : ValueType<1024, 94>; // 32 x f32 vector value -def v64f32 : ValueType<2048, 95>; // 64 x f32 vector value -def v128f32 : ValueType<4096, 96>; // 128 x f32 vector value -def v256f32 : ValueType<8192, 97>; // 256 x f32 vector value -def v512f32 : ValueType<16384, 98>; // 512 x f32 vector value -def v1024f32 : ValueType<32768, 99>; // 1024 x f32 vector value -def v2048f32 : ValueType<65536, 100>; // 2048 x f32 vector value - -def v1f64 : ValueType<64, 101>; // 1 x f64 vector value -def v2f64 : ValueType<128, 102>; // 2 x f64 vector value -def v4f64 : ValueType<256, 103>; // 4 x f64 vector value -def v8f64 : ValueType<512, 104>; // 8 x f64 vector value -def v16f64 : ValueType<1024, 105>; // 16 x f64 vector value -def v32f64 : ValueType<2048, 106>; // 32 x f64 vector value -def v64f64 : ValueType<4096, 107>; // 64 x f64 vector value -def v128f64 : ValueType<8192, 108>; // 128 x f64 vector value -def v256f64 : ValueType<16384, 109>; // 256 x f64 vector value - -def nxv1i1 : ValueType<1, 110>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 111>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 112>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 113>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 114>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 115>; // n x 32 x i1 vector value -def nxv64i1 : ValueType<64, 116>; // n x 64 x i1 vector value - -def nxv1i8 : ValueType<8, 117>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 118>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 119>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 120>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 121>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 122>; // n x 32 x i8 vector value -def nxv64i8 : ValueType<512, 123>; // n x 64 x i8 vector value - -def nxv1i16 : ValueType<16, 124>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 125>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 126>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 127>; // n x 8 x i16 vector value -def nxv16i16 : ValueType<256, 128>; // n x 16 x i16 vector value -def nxv32i16 : ValueType<512, 129>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 130>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 131>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 132>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 133>; // n x 8 x i32 vector value -def nxv16i32 : ValueType<512, 134>; // n x 16 x i32 vector value -def nxv32i32 : ValueType<1024, 135>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 136>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 137>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 138>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 139>; // n x 8 x i64 vector value -def nxv16i64 : ValueType<1024, 140>; // n x 16 x i64 vector value -def nxv32i64 : ValueType<2048, 141>; // n x 32 x i64 vector value - -def nxv1f16 : ValueType<16, 142>; // n x 1 x f16 vector value -def nxv2f16 : ValueType<32, 143>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64, 144>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 145>; // n x 8 x f16 vector value -def nxv16f16 : ValueType<256, 146>; // n x 16 x f16 vector value -def nxv32f16 : ValueType<512, 147>; // n x 32 x f16 vector value - -def nxv1bf16 : ValueType<16, 148>; // n x 1 x bf16 vector value -def nxv2bf16 : ValueType<32, 149>; // n x 2 x bf16 vector value -def nxv4bf16 : ValueType<64, 150>; // n x 4 x bf16 vector value -def nxv8bf16 : ValueType<128, 151>; // n x 8 x bf16 vector value - -def nxv1f32 : ValueType<32, 152>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64, 153>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 154>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 155>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 156>; // n x 16 x f32 vector value - -def nxv1f64 : ValueType<64, 157>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 158>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 159>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 160>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64, 161>; // X86 MMX value -def FlagVT : ValueType<0, 162>; // Pre-RA sched glue -def isVoid : ValueType<0, 163>; // Produces no value -def untyped : ValueType<8, 164>; // Produces an untyped value -def funcref : ValueType<0, 165>; // WebAssembly's funcref type -def externref : ValueType<0, 166>; // WebAssembly's externref type -def x86amx : ValueType<8192, 167>; // X86 AMX value +def v1i8 : ValueType<8, 26>; // 1 x i8 vector value +def v2i8 : ValueType<16, 27>; // 2 x i8 vector value +def v4i8 : ValueType<32, 28>; // 4 x i8 vector value +def v8i8 : ValueType<64, 29>; // 8 x i8 vector value +def v16i8 : ValueType<128, 30>; // 16 x i8 vector value +def v32i8 : ValueType<256, 31>; // 32 x i8 vector value +def v64i8 : ValueType<512, 32>; // 64 x i8 vector value +def v128i8 : ValueType<1024, 33>; // 128 x i8 vector value +def v256i8 : ValueType<2048, 34>; // 256 x i8 vector value +def v512i8 : ValueType<4096, 35>; // 512 x i8 vector value +def v1024i8 : ValueType<8192, 36>; // 1024 x i8 vector value + +def v1i16 : ValueType<16, 37>; // 1 x i16 vector value +def v2i16 : ValueType<32, 38>; // 2 x i16 vector value +def v3i16 : ValueType<48, 39>; // 3 x i16 vector value +def v4i16 : ValueType<64, 40>; // 4 x i16 vector value +def v8i16 : ValueType<128, 41>; // 8 x i16 vector value +def v16i16 : ValueType<256, 42>; // 16 x i16 vector value +def v32i16 : ValueType<512, 43>; // 32 x i16 vector value +def v64i16 : ValueType<1024, 44>; // 64 x i16 vector value +def v128i16 : ValueType<2048, 45>; // 128 x i16 vector value +def v256i16 : ValueType<4096, 46>; // 256 x i16 vector value +def v512i16 : ValueType<8192, 47>; // 512 x i16 vector value + +def v1i32 : ValueType<32, 48>; // 1 x i32 vector value +def v2i32 : ValueType<64, 49>; // 2 x i32 vector value +def v3i32 : ValueType<96, 50>; // 3 x i32 vector value +def v4i32 : ValueType<128, 51>; // 4 x i32 vector value +def v5i32 : ValueType<160, 52>; // 5 x i32 vector value +def v8i32 : ValueType<256, 53>; // 8 x i32 vector value +def v16i32 : ValueType<512, 54>; // 16 x i32 vector value +def v32i32 : ValueType<1024, 55>; // 32 x i32 vector value +def v64i32 : ValueType<2048, 56>; // 64 x i32 vector value +def v128i32 : ValueType<4096, 57>; // 128 x i32 vector value +def v256i32 : ValueType<8192, 58>; // 256 x i32 vector value +def v512i32 : ValueType<16384, 59>; // 512 x i32 vector value +def v1024i32 : ValueType<32768, 60>; // 1024 x i32 vector value +def v2048i32 : ValueType<65536, 61>; // 2048 x i32 vector value + +def v1i64 : ValueType<64, 62>; // 1 x i64 vector value +def v2i64 : ValueType<128, 63>; // 2 x i64 vector value +def v4i64 : ValueType<256, 64>; // 4 x i64 vector value +def v8i64 : ValueType<512, 65>; // 8 x i64 vector value +def v16i64 : ValueType<1024, 66>; // 16 x i64 vector value +def v32i64 : ValueType<2048, 67>; // 32 x i64 vector value +def v64i64 : ValueType<4096, 68>; // 64 x i64 vector value +def v128i64 : ValueType<8192, 69>; // 128 x i64 vector value +def v256i64 : ValueType<16384, 70>; // 256 x i64 vector value + +def v1i128 : ValueType<128, 71>; // 1 x i128 vector value + +def v1f16 : ValueType<16, 72>; // 1 x f16 vector value +def v2f16 : ValueType<32, 73>; // 2 x f16 vector value +def v3f16 : ValueType<48, 74>; // 3 x f16 vector value +def v4f16 : ValueType<64, 75>; // 4 x f16 vector value +def v8f16 : ValueType<128, 76>; // 8 x f16 vector value +def v16f16 : ValueType<256, 77>; // 16 x f16 vector value +def v32f16 : ValueType<512, 78>; // 32 x f16 vector value +def v64f16 : ValueType<1024, 79>; // 64 x f16 vector value +def v128f16 : ValueType<2048, 80>; // 128 x f16 vector value +def v256f16 : ValueType<4096, 81>; // 256 x f16 vector value +def v512f16 : ValueType<8192, 82>; // 512 x f16 vector value + +def v2bf16 : ValueType<32, 83>; // 2 x bf16 vector value +def v3bf16 : ValueType<48, 84>; // 3 x bf16 vector value +def v4bf16 : ValueType<64, 85>; // 4 x bf16 vector value +def v8bf16 : ValueType<128, 86>; // 8 x bf16 vector value +def v16bf16 : ValueType<256, 87>; // 16 x bf16 vector value +def v32bf16 : ValueType<512, 88>; // 32 x bf16 vector value +def v64bf16 : ValueType<1024, 89>; // 64 x bf16 vector value +def v128bf16 : ValueType<2048, 90>; // 128 x bf16 vector value + +def v1f32 : ValueType<32, 91>; // 1 x f32 vector value +def v2f32 : ValueType<64, 92>; // 2 x f32 vector value +def v3f32 : ValueType<96, 93>; // 3 x f32 vector value +def v4f32 : ValueType<128, 94>; // 4 x f32 vector value +def v5f32 : ValueType<160, 95>; // 5 x f32 vector value +def v8f32 : ValueType<256, 96>; // 8 x f32 vector value +def v16f32 : ValueType<512, 97>; // 16 x f32 vector value +def v32f32 : ValueType<1024, 98>; // 32 x f32 vector value +def v64f32 : ValueType<2048, 99>; // 64 x f32 vector value +def v128f32 : ValueType<4096, 100>; // 128 x f32 vector value +def v256f32 : ValueType<8192, 101>; // 256 x f32 vector value +def v512f32 : ValueType<16384, 102>; // 512 x f32 vector value +def v1024f32 : ValueType<32768, 103>; // 1024 x f32 vector value +def v2048f32 : ValueType<65536, 104>; // 2048 x f32 vector value + +def v1f64 : ValueType<64, 105>; // 1 x f64 vector value +def v2f64 : ValueType<128, 106>; // 2 x f64 vector value +def v4f64 : ValueType<256, 107>; // 4 x f64 vector value +def v8f64 : ValueType<512, 108>; // 8 x f64 vector value +def v16f64 : ValueType<1024, 109>; // 16 x f64 vector value +def v32f64 : ValueType<2048, 110>; // 32 x f64 vector value +def v64f64 : ValueType<4096, 111>; // 64 x f64 vector value +def v128f64 : ValueType<8192, 112>; // 128 x f64 vector value +def v256f64 : ValueType<16384, 113>; // 256 x f64 vector value + +def nxv1i1 : ValueType<1, 114>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 115>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 116>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 117>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 118>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 119>; // n x 32 x i1 vector value +def nxv64i1 : ValueType<64, 120>; // n x 64 x i1 vector value + +def nxv1i8 : ValueType<8, 121>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 122>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 123>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 124>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 125>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 126>; // n x 32 x i8 vector value +def nxv64i8 : ValueType<512, 127>; // n x 64 x i8 vector value + +def nxv1i16 : ValueType<16, 128>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 129>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 130>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 131>; // n x 8 x i16 vector value +def nxv16i16 : ValueType<256, 132>; // n x 16 x i16 vector value +def nxv32i16 : ValueType<512, 133>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 134>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 135>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 136>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 137>; // n x 8 x i32 vector value +def nxv16i32 : ValueType<512, 138>; // n x 16 x i32 vector value +def nxv32i32 : ValueType<1024, 139>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 140>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 141>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 142>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 143>; // n x 8 x i64 vector value +def nxv16i64 : ValueType<1024, 144>; // n x 16 x i64 vector value +def nxv32i64 : ValueType<2048, 145>; // n x 32 x i64 vector value + +def nxv1f16 : ValueType<16, 146>; // n x 1 x f16 vector value +def nxv2f16 : ValueType<32, 147>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64, 148>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 149>; // n x 8 x f16 vector value +def nxv16f16 : ValueType<256, 150>; // n x 16 x f16 vector value +def nxv32f16 : ValueType<512, 151>; // n x 32 x f16 vector value + +def nxv1bf16 : ValueType<16, 152>; // n x 1 x bf16 vector value +def nxv2bf16 : ValueType<32, 153>; // n x 2 x bf16 vector value +def nxv4bf16 : ValueType<64, 154>; // n x 4 x bf16 vector value +def nxv8bf16 : ValueType<128, 155>; // n x 8 x bf16 vector value + +def nxv1f32 : ValueType<32, 156>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64, 157>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 158>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 159>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 160>; // n x 16 x f32 vector value + +def nxv1f64 : ValueType<64, 161>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 162>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 163>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 164>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64, 165>; // X86 MMX value +def FlagVT : ValueType<0, 166>; // Pre-RA sched glue +def isVoid : ValueType<0, 167>; // Produces no value +def untyped : ValueType<8, 168>; // Produces an untyped value +def funcref : ValueType<0, 169>; // WebAssembly's funcref type +def externref : ValueType<0, 170>; // WebAssembly's externref type +def x86amx : ValueType<8192, 171>; // X86 AMX value def token : ValueType<0, 248>; // TokenTy diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -72,101 +72,105 @@ v512i1 = 24, // 512 x i1 v1024i1 = 25, // 1024 x i1 - v1i8 = 26, // 1 x i8 - v2i8 = 27, // 2 x i8 - v4i8 = 28, // 4 x i8 - v8i8 = 29, // 8 x i8 - v16i8 = 30, // 16 x i8 - v32i8 = 31, // 32 x i8 - v64i8 = 32, // 64 x i8 - v128i8 = 33, // 128 x i8 - v256i8 = 34, // 256 x i8 - - v1i16 = 35, // 1 x i16 - v2i16 = 36, // 2 x i16 - v3i16 = 37, // 3 x i16 - v4i16 = 38, // 4 x i16 - v8i16 = 39, // 8 x i16 - v16i16 = 40, // 16 x i16 - v32i16 = 41, // 32 x i16 - v64i16 = 42, // 64 x i16 - v128i16 = 43, // 128 x i16 - v256i16 = 44, // 256 x i16 - - v1i32 = 45, // 1 x i32 - v2i32 = 46, // 2 x i32 - v3i32 = 47, // 3 x i32 - v4i32 = 48, // 4 x i32 - v5i32 = 49, // 5 x i32 - v8i32 = 50, // 8 x i32 - v16i32 = 51, // 16 x i32 - v32i32 = 52, // 32 x i32 - v64i32 = 53, // 64 x i32 - v128i32 = 54, // 128 x i32 - v256i32 = 55, // 256 x i32 - v512i32 = 56, // 512 x i32 - v1024i32 = 57, // 1024 x i32 - v2048i32 = 58, // 2048 x i32 - - v1i64 = 59, // 1 x i64 - v2i64 = 60, // 2 x i64 - v4i64 = 61, // 4 x i64 - v8i64 = 62, // 8 x i64 - v16i64 = 63, // 16 x i64 - v32i64 = 64, // 32 x i64 - v64i64 = 65, // 64 x i64 - v128i64 = 66, // 128 x i64 - v256i64 = 67, // 256 x i64 - - v1i128 = 68, // 1 x i128 + v1i8 = 26, // 1 x i8 + v2i8 = 27, // 2 x i8 + v4i8 = 28, // 4 x i8 + v8i8 = 29, // 8 x i8 + v16i8 = 30, // 16 x i8 + v32i8 = 31, // 32 x i8 + v64i8 = 32, // 64 x i8 + v128i8 = 33, // 128 x i8 + v256i8 = 34, // 256 x i8 + v512i8 = 35, // 512 x i8 + v1024i8 = 36, // 1024 x i8 + + v1i16 = 37, // 1 x i16 + v2i16 = 38, // 2 x i16 + v3i16 = 39, // 3 x i16 + v4i16 = 40, // 4 x i16 + v8i16 = 41, // 8 x i16 + v16i16 = 42, // 16 x i16 + v32i16 = 43, // 32 x i16 + v64i16 = 44, // 64 x i16 + v128i16 = 45, // 128 x i16 + v256i16 = 46, // 256 x i16 + v512i16 = 47, // 512 x i16 + + v1i32 = 48, // 1 x i32 + v2i32 = 49, // 2 x i32 + v3i32 = 50, // 3 x i32 + v4i32 = 51, // 4 x i32 + v5i32 = 52, // 5 x i32 + v8i32 = 53, // 8 x i32 + v16i32 = 54, // 16 x i32 + v32i32 = 55, // 32 x i32 + v64i32 = 56, // 64 x i32 + v128i32 = 57, // 128 x i32 + v256i32 = 58, // 256 x i32 + v512i32 = 59, // 512 x i32 + v1024i32 = 60, // 1024 x i32 + v2048i32 = 61, // 2048 x i32 + + v1i64 = 62, // 1 x i64 + v2i64 = 63, // 2 x i64 + v4i64 = 64, // 4 x i64 + v8i64 = 65, // 8 x i64 + v16i64 = 66, // 16 x i64 + v32i64 = 67, // 32 x i64 + v64i64 = 68, // 64 x i64 + v128i64 = 69, // 128 x i64 + v256i64 = 70, // 256 x i64 + + v1i128 = 71, // 1 x i128 FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128, - v1f16 = 69, // 1 x f16 - v2f16 = 70, // 2 x f16 - v3f16 = 71, // 3 x f16 - v4f16 = 72, // 4 x f16 - v8f16 = 73, // 8 x f16 - v16f16 = 74, // 16 x f16 - v32f16 = 75, // 32 x f16 - v64f16 = 76, // 64 x f16 - v128f16 = 77, // 128 x f16 - v256f16 = 78, // 256 x f16 - - v2bf16 = 79, // 2 x bf16 - v3bf16 = 80, // 3 x bf16 - v4bf16 = 81, // 4 x bf16 - v8bf16 = 82, // 8 x bf16 - v16bf16 = 83, // 16 x bf16 - v32bf16 = 84, // 32 x bf16 - v64bf16 = 85, // 64 x bf16 - v128bf16 = 86, // 128 x bf16 - - v1f32 = 87, // 1 x f32 - v2f32 = 88, // 2 x f32 - v3f32 = 89, // 3 x f32 - v4f32 = 90, // 4 x f32 - v5f32 = 91, // 5 x f32 - v8f32 = 92, // 8 x f32 - v16f32 = 93, // 16 x f32 - v32f32 = 94, // 32 x f32 - v64f32 = 95, // 64 x f32 - v128f32 = 96, // 128 x f32 - v256f32 = 97, // 256 x f32 - v512f32 = 98, // 512 x f32 - v1024f32 = 99, // 1024 x f32 - v2048f32 = 100, // 2048 x f32 - - v1f64 = 101, // 1 x f64 - v2f64 = 102, // 2 x f64 - v4f64 = 103, // 4 x f64 - v8f64 = 104, // 8 x f64 - v16f64 = 105, // 16 x f64 - v32f64 = 106, // 32 x f64 - v64f64 = 107, // 64 x f64 - v128f64 = 108, // 128 x f64 - v256f64 = 109, // 256 x f64 + v1f16 = 72, // 1 x f16 + v2f16 = 73, // 2 x f16 + v3f16 = 74, // 3 x f16 + v4f16 = 75, // 4 x f16 + v8f16 = 76, // 8 x f16 + v16f16 = 77, // 16 x f16 + v32f16 = 78, // 32 x f16 + v64f16 = 79, // 64 x f16 + v128f16 = 80, // 128 x f16 + v256f16 = 81, // 256 x f16 + v512f16 = 82, // 256 x f16 + + v2bf16 = 83, // 2 x bf16 + v3bf16 = 84, // 3 x bf16 + v4bf16 = 85, // 4 x bf16 + v8bf16 = 86, // 8 x bf16 + v16bf16 = 87, // 16 x bf16 + v32bf16 = 88, // 32 x bf16 + v64bf16 = 89, // 64 x bf16 + v128bf16 = 90, // 128 x bf16 + + v1f32 = 91, // 1 x f32 + v2f32 = 92, // 2 x f32 + v3f32 = 93, // 3 x f32 + v4f32 = 94, // 4 x f32 + v5f32 = 95, // 5 x f32 + v8f32 = 96, // 8 x f32 + v16f32 = 97, // 16 x f32 + v32f32 = 98, // 32 x f32 + v64f32 = 99, // 64 x f32 + v128f32 = 100, // 128 x f32 + v256f32 = 101, // 256 x f32 + v512f32 = 102, // 512 x f32 + v1024f32 = 103, // 1024 x f32 + v2048f32 = 104, // 2048 x f32 + + v1f64 = 105, // 1 x f64 + v2f64 = 106, // 2 x f64 + v4f64 = 107, // 4 x f64 + v8f64 = 108, // 8 x f64 + v16f64 = 109, // 16 x f64 + v32f64 = 110, // 32 x f64 + v64f64 = 111, // 64 x f64 + v128f64 = 112, // 128 x f64 + v256f64 = 113, // 256 x f64 FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v1f16, LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v256f64, @@ -174,68 +178,68 @@ FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_FIXEDLEN_VECTOR_VALUETYPE = v256f64, - nxv1i1 = 110, // n x 1 x i1 - nxv2i1 = 111, // n x 2 x i1 - nxv4i1 = 112, // n x 4 x i1 - nxv8i1 = 113, // n x 8 x i1 - nxv16i1 = 114, // n x 16 x i1 - nxv32i1 = 115, // n x 32 x i1 - nxv64i1 = 116, // n x 64 x i1 - - nxv1i8 = 117, // n x 1 x i8 - nxv2i8 = 118, // n x 2 x i8 - nxv4i8 = 119, // n x 4 x i8 - nxv8i8 = 120, // n x 8 x i8 - nxv16i8 = 121, // n x 16 x i8 - nxv32i8 = 122, // n x 32 x i8 - nxv64i8 = 123, // n x 64 x i8 - - nxv1i16 = 124, // n x 1 x i16 - nxv2i16 = 125, // n x 2 x i16 - nxv4i16 = 126, // n x 4 x i16 - nxv8i16 = 127, // n x 8 x i16 - nxv16i16 = 128, // n x 16 x i16 - nxv32i16 = 129, // n x 32 x i16 - - nxv1i32 = 130, // n x 1 x i32 - nxv2i32 = 131, // n x 2 x i32 - nxv4i32 = 132, // n x 4 x i32 - nxv8i32 = 133, // n x 8 x i32 - nxv16i32 = 134, // n x 16 x i32 - nxv32i32 = 135, // n x 32 x i32 - - nxv1i64 = 136, // n x 1 x i64 - nxv2i64 = 137, // n x 2 x i64 - nxv4i64 = 138, // n x 4 x i64 - nxv8i64 = 139, // n x 8 x i64 - nxv16i64 = 140, // n x 16 x i64 - nxv32i64 = 141, // n x 32 x i64 + nxv1i1 = 114, // n x 1 x i1 + nxv2i1 = 115, // n x 2 x i1 + nxv4i1 = 116, // n x 4 x i1 + nxv8i1 = 117, // n x 8 x i1 + nxv16i1 = 118, // n x 16 x i1 + nxv32i1 = 119, // n x 32 x i1 + nxv64i1 = 120, // n x 64 x i1 + + nxv1i8 = 121, // n x 1 x i8 + nxv2i8 = 122, // n x 2 x i8 + nxv4i8 = 123, // n x 4 x i8 + nxv8i8 = 124, // n x 8 x i8 + nxv16i8 = 125, // n x 16 x i8 + nxv32i8 = 126, // n x 32 x i8 + nxv64i8 = 127, // n x 64 x i8 + + nxv1i16 = 128, // n x 1 x i16 + nxv2i16 = 129, // n x 2 x i16 + nxv4i16 = 130, // n x 4 x i16 + nxv8i16 = 131, // n x 8 x i16 + nxv16i16 = 132, // n x 16 x i16 + nxv32i16 = 133, // n x 32 x i16 + + nxv1i32 = 134, // n x 1 x i32 + nxv2i32 = 135, // n x 2 x i32 + nxv4i32 = 136, // n x 4 x i32 + nxv8i32 = 137, // n x 8 x i32 + nxv16i32 = 138, // n x 16 x i32 + nxv32i32 = 139, // n x 32 x i32 + + nxv1i64 = 140, // n x 1 x i64 + nxv2i64 = 141, // n x 2 x i64 + nxv4i64 = 142, // n x 4 x i64 + nxv8i64 = 143, // n x 8 x i64 + nxv16i64 = 144, // n x 16 x i64 + nxv32i64 = 145, // n x 32 x i64 FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1, LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64, - nxv1f16 = 142, // n x 1 x f16 - nxv2f16 = 143, // n x 2 x f16 - nxv4f16 = 144, // n x 4 x f16 - nxv8f16 = 145, // n x 8 x f16 - nxv16f16 = 146, // n x 16 x f16 - nxv32f16 = 147, // n x 32 x f16 - - nxv1bf16 = 148, // n x 1 x bf16 - nxv2bf16 = 149, // n x 2 x bf16 - nxv4bf16 = 150, // n x 4 x bf16 - nxv8bf16 = 151, // n x 8 x bf16 - - nxv1f32 = 152, // n x 1 x f32 - nxv2f32 = 153, // n x 2 x f32 - nxv4f32 = 154, // n x 4 x f32 - nxv8f32 = 155, // n x 8 x f32 - nxv16f32 = 156, // n x 16 x f32 - - nxv1f64 = 157, // n x 1 x f64 - nxv2f64 = 158, // n x 2 x f64 - nxv4f64 = 159, // n x 4 x f64 - nxv8f64 = 160, // n x 8 x f64 + nxv1f16 = 146, // n x 1 x f16 + nxv2f16 = 147, // n x 2 x f16 + nxv4f16 = 148, // n x 4 x f16 + nxv8f16 = 149, // n x 8 x f16 + nxv16f16 = 150, // n x 16 x f16 + nxv32f16 = 151, // n x 32 x f16 + + nxv1bf16 = 152, // n x 1 x bf16 + nxv2bf16 = 153, // n x 2 x bf16 + nxv4bf16 = 154, // n x 4 x bf16 + nxv8bf16 = 155, // n x 8 x bf16 + + nxv1f32 = 156, // n x 1 x f32 + nxv2f32 = 157, // n x 2 x f32 + nxv4f32 = 158, // n x 4 x f32 + nxv8f32 = 159, // n x 8 x f32 + nxv16f32 = 160, // n x 16 x f32 + + nxv1f64 = 161, // n x 1 x f64 + nxv2f64 = 162, // n x 2 x f64 + nxv4f64 = 163, // n x 4 x f64 + nxv8f64 = 164, // n x 8 x f64 FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv1f16, LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64, @@ -246,19 +250,19 @@ FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 161, // This is an X86 MMX value + x86mmx = 165, // This is an X86 MMX value - Glue = 162, // This glues nodes together during pre-RA sched + Glue = 166, // This glues nodes together during pre-RA sched - isVoid = 163, // This has no value + isVoid = 167, // This has no value - Untyped = 164, // This value takes a register, but has + Untyped = 168, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - funcref = 165, // WebAssembly's funcref type - externref = 166, // WebAssembly's externref type - x86amx = 167, // This is an X86 AMX value + funcref = 169, // WebAssembly's funcref type + externref = 170, // WebAssembly's externref type + x86amx = 171, // This is an X86 AMX value FIRST_VALUETYPE = 1, // This is always the beginning of the list. LAST_VALUETYPE = x86amx, // This always remains at the end of the list. @@ -535,6 +539,8 @@ case v64i8: case v128i8: case v256i8: + case v512i8: + case v1024i8: case nxv1i8: case nxv2i8: case nxv4i8: @@ -552,6 +558,7 @@ case v64i16: case v128i16: case v256i16: + case v512i16: case nxv1i16: case nxv2i16: case nxv4i16: @@ -604,6 +611,7 @@ case v64f16: case v128f16: case v256f16: + case v512f16: case nxv1f16: case nxv2f16: case nxv4f16: @@ -665,10 +673,14 @@ case v2048i32: case v2048f32: return 2048; case v1024i1: + case v1024i8: case v1024i32: case v1024f32: return 1024; case v512i1: + case v512i8: + case v512i16: case v512i32: + case v512f16: case v512f32: return 512; case v256i1: case v256i8: @@ -986,14 +998,18 @@ case v64f32: case v32f64: return TypeSize::Fixed(2048); case nxv32i64: return TypeSize::Scalable(2048); + case v512i8: case v256i16: case v128i32: case v64i64: case v256f16: case v128f32: case v64f64: return TypeSize::Fixed(4096); + case v1024i8: + case v512i16: case v256i32: case v128i64: + case v512f16: case v256f32: case x86amx: case v128f64: return TypeSize::Fixed(8192); @@ -1158,6 +1174,8 @@ if (NumElements == 64) return MVT::v64i8; if (NumElements == 128) return MVT::v128i8; if (NumElements == 256) return MVT::v256i8; + if (NumElements == 512) return MVT::v512i8; + if (NumElements == 1024) return MVT::v1024i8; break; case MVT::i16: if (NumElements == 1) return MVT::v1i16; @@ -1170,6 +1188,7 @@ if (NumElements == 64) return MVT::v64i16; if (NumElements == 128) return MVT::v128i16; if (NumElements == 256) return MVT::v256i16; + if (NumElements == 512) return MVT::v512i16; break; case MVT::i32: if (NumElements == 1) return MVT::v1i32; @@ -1212,6 +1231,7 @@ if (NumElements == 64) return MVT::v64f16; if (NumElements == 128) return MVT::v128f16; if (NumElements == 256) return MVT::v256f16; + if (NumElements == 512) return MVT::v512f16; break; case MVT::bf16: if (NumElements == 2) return MVT::v2bf16; diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -238,6 +238,10 @@ return FixedVectorType::get(Type::getInt8Ty(Context), 128); case MVT::v256i8: return FixedVectorType::get(Type::getInt8Ty(Context), 256); + case MVT::v512i8: + return FixedVectorType::get(Type::getInt8Ty(Context), 512); + case MVT::v1024i8: + return FixedVectorType::get(Type::getInt8Ty(Context), 1024); case MVT::v1i16: return FixedVectorType::get(Type::getInt16Ty(Context), 1); case MVT::v2i16: @@ -258,6 +262,8 @@ return FixedVectorType::get(Type::getInt16Ty(Context), 128); case MVT::v256i16: return FixedVectorType::get(Type::getInt16Ty(Context), 256); + case MVT::v512i16: + return FixedVectorType::get(Type::getInt16Ty(Context), 512); case MVT::v1i32: return FixedVectorType::get(Type::getInt32Ty(Context), 1); case MVT::v2i32: @@ -326,6 +332,8 @@ return FixedVectorType::get(Type::getHalfTy(Context), 128); case MVT::v256f16: return FixedVectorType::get(Type::getHalfTy(Context), 256); + case MVT::v512f16: + return FixedVectorType::get(Type::getHalfTy(Context), 512); case MVT::v2bf16: return FixedVectorType::get(Type::getBFloatTy(Context), 2); case MVT::v3bf16: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1171,11 +1171,11 @@ if (!Subtarget.useRVVForFixedLengthVectors()) return false; - // We only support a set of vector types with an equivalent number of - // elements to avoid legalization issues. Therefore -- since we don't have - // v512i8/v512i16/etc -- the longest fixed-length vector type we support has - // 256 elements. - if (VT.getVectorNumElements() > 256) + // We only support a set of vector types with a consistent maximum fixed size + // across all supported vector element types to avoid legalization issues. + // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest + // fixed-length vector type we support is 1024 bytes. + if (VT.getFixedSizeInBits() > 1024 * 8) return false; unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits(); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll @@ -1,9 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=256 < %s | FileCheck %s --check-prefix=VLEN256 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=512 < %s | FileCheck %s --check-prefix=VLEN512 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=1024 < %s | FileCheck %s --check-prefix=VLEN1024 -; FIXME: A larger VLEN is producing worse code. -; FIXME: v256i16 is legal so v512i8 should be too since they're the same size. define <512 x i8> @bitcast_1024B(<256 x i16> %a, <512 x i8> %b) { ; VLEN256-LABEL: bitcast_1024B: ; VLEN256: # %bb.0: @@ -18,1188 +17,17 @@ ; ; VLEN512-LABEL: bitcast_1024B: ; VLEN512: # %bb.0: -; VLEN512-NEXT: addi sp, sp, -1024 -; VLEN512-NEXT: .cfi_def_cfa_offset 1024 -; VLEN512-NEXT: sd ra, 1016(sp) # 8-byte Folded Spill -; VLEN512-NEXT: sd s0, 1008(sp) # 8-byte Folded Spill -; VLEN512-NEXT: sd s1, 1000(sp) # 8-byte Folded Spill -; VLEN512-NEXT: sd s2, 992(sp) # 8-byte Folded Spill -; VLEN512-NEXT: sd s3, 984(sp) # 8-byte Folded Spill -; VLEN512-NEXT: sd s4, 976(sp) # 8-byte Folded Spill -; VLEN512-NEXT: sd s5, 968(sp) # 8-byte Folded Spill -; VLEN512-NEXT: sd s6, 960(sp) # 8-byte Folded Spill -; VLEN512-NEXT: sd s7, 952(sp) # 8-byte Folded Spill -; VLEN512-NEXT: sd s8, 944(sp) # 8-byte Folded Spill -; VLEN512-NEXT: sd s9, 936(sp) # 8-byte Folded Spill -; VLEN512-NEXT: sd s10, 928(sp) # 8-byte Folded Spill -; VLEN512-NEXT: sd s11, 920(sp) # 8-byte Folded Spill -; VLEN512-NEXT: .cfi_offset ra, -8 -; VLEN512-NEXT: .cfi_offset s0, -16 -; VLEN512-NEXT: .cfi_offset s1, -24 -; VLEN512-NEXT: .cfi_offset s2, -32 -; VLEN512-NEXT: .cfi_offset s3, -40 -; VLEN512-NEXT: .cfi_offset s4, -48 -; VLEN512-NEXT: .cfi_offset s5, -56 -; VLEN512-NEXT: .cfi_offset s6, -64 -; VLEN512-NEXT: .cfi_offset s7, -72 -; VLEN512-NEXT: .cfi_offset s8, -80 -; VLEN512-NEXT: .cfi_offset s9, -88 -; VLEN512-NEXT: .cfi_offset s10, -96 -; VLEN512-NEXT: .cfi_offset s11, -104 -; VLEN512-NEXT: addi s0, sp, 1024 -; VLEN512-NEXT: .cfi_def_cfa s0, 0 -; VLEN512-NEXT: csrr a0, vlenb -; VLEN512-NEXT: slli a0, a0, 3 -; VLEN512-NEXT: sub sp, sp, a0 -; VLEN512-NEXT: andi sp, sp, -256 -; VLEN512-NEXT: addi a0, zero, 32 -; VLEN512-NEXT: vsetivli zero, 1, e64, m8, ta, mu -; VLEN512-NEXT: vslidedown.vx v24, v8, a0 -; VLEN512-NEXT: vmv.x.s a6, v24 -; VLEN512-NEXT: addi a0, zero, 33 -; VLEN512-NEXT: vslidedown.vx v24, v8, a0 -; VLEN512-NEXT: addi a0, sp, 920 -; VLEN512-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; VLEN512-NEXT: addi a0, zero, 34 -; VLEN512-NEXT: addi a1, zero, 35 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s a7, v0 -; VLEN512-NEXT: addi a1, zero, 36 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s t0, v0 -; VLEN512-NEXT: addi a1, zero, 37 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s t1, v0 -; VLEN512-NEXT: addi a1, zero, 38 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s t2, v0 -; VLEN512-NEXT: addi a1, zero, 39 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s t3, v0 -; VLEN512-NEXT: addi a1, zero, 40 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s t4, v0 -; VLEN512-NEXT: addi a1, zero, 41 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s t5, v0 -; VLEN512-NEXT: addi a1, zero, 42 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s t6, v0 -; VLEN512-NEXT: addi a1, zero, 43 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s s2, v0 -; VLEN512-NEXT: addi a1, zero, 44 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s s3, v0 -; VLEN512-NEXT: addi a1, zero, 45 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s s4, v0 -; VLEN512-NEXT: addi a1, zero, 46 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s s5, v0 -; VLEN512-NEXT: addi a1, zero, 47 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s s6, v0 -; VLEN512-NEXT: addi a1, zero, 48 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s s7, v0 -; VLEN512-NEXT: addi a1, zero, 49 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s s8, v0 -; VLEN512-NEXT: addi a1, zero, 50 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s s9, v0 -; VLEN512-NEXT: addi a1, zero, 51 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s s10, v0 -; VLEN512-NEXT: addi a1, zero, 52 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s s11, v0 -; VLEN512-NEXT: addi a1, zero, 53 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s ra, v0 -; VLEN512-NEXT: addi a1, zero, 54 -; VLEN512-NEXT: vslidedown.vx v0, v8, a1 -; VLEN512-NEXT: vmv.x.s a1, v0 -; VLEN512-NEXT: addi a2, zero, 55 -; VLEN512-NEXT: vslidedown.vx v0, v8, a2 -; VLEN512-NEXT: vmv.x.s a2, v0 -; VLEN512-NEXT: addi a3, zero, 56 -; VLEN512-NEXT: vslidedown.vx v0, v8, a3 -; VLEN512-NEXT: vmv.x.s s1, v0 -; VLEN512-NEXT: addi a3, zero, 57 -; VLEN512-NEXT: vslidedown.vx v0, v8, a3 -; VLEN512-NEXT: vmv.x.s a3, v0 -; VLEN512-NEXT: addi a4, zero, 58 -; VLEN512-NEXT: vslidedown.vx v0, v8, a4 -; VLEN512-NEXT: vmv.x.s a4, v0 -; VLEN512-NEXT: addi a5, zero, 63 -; VLEN512-NEXT: vslidedown.vx v0, v8, a5 -; VLEN512-NEXT: vmv.x.s a5, v0 -; VLEN512-NEXT: vslidedown.vx v0, v8, a0 -; VLEN512-NEXT: srli a0, a5, 56 -; VLEN512-NEXT: sb a0, 511(sp) -; VLEN512-NEXT: srli a0, a5, 48 -; VLEN512-NEXT: sb a0, 510(sp) -; VLEN512-NEXT: srli a0, a5, 40 -; VLEN512-NEXT: sb a0, 509(sp) -; VLEN512-NEXT: srli a0, a5, 32 -; VLEN512-NEXT: sb a0, 508(sp) -; VLEN512-NEXT: srli a0, a5, 24 -; VLEN512-NEXT: sb a0, 507(sp) -; VLEN512-NEXT: srli a0, a5, 16 -; VLEN512-NEXT: sb a0, 506(sp) -; VLEN512-NEXT: addi a0, zero, 62 -; VLEN512-NEXT: vslidedown.vx v24, v8, a0 -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: sb a5, 504(sp) -; VLEN512-NEXT: srli a5, a5, 8 -; VLEN512-NEXT: sb a5, 505(sp) -; VLEN512-NEXT: srli a5, a0, 56 -; VLEN512-NEXT: sb a5, 503(sp) -; VLEN512-NEXT: srli a5, a0, 48 -; VLEN512-NEXT: sb a5, 502(sp) -; VLEN512-NEXT: srli a5, a0, 40 -; VLEN512-NEXT: sb a5, 501(sp) -; VLEN512-NEXT: srli a5, a0, 32 -; VLEN512-NEXT: sb a5, 500(sp) -; VLEN512-NEXT: srli a5, a0, 24 -; VLEN512-NEXT: sb a5, 499(sp) -; VLEN512-NEXT: srli a5, a0, 16 -; VLEN512-NEXT: sb a5, 498(sp) -; VLEN512-NEXT: addi a5, zero, 61 -; VLEN512-NEXT: vslidedown.vx v24, v8, a5 -; VLEN512-NEXT: vmv.x.s a5, v24 -; VLEN512-NEXT: sb a0, 496(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 497(sp) -; VLEN512-NEXT: srli a0, a5, 56 -; VLEN512-NEXT: sb a0, 495(sp) -; VLEN512-NEXT: srli a0, a5, 48 -; VLEN512-NEXT: sb a0, 494(sp) -; VLEN512-NEXT: srli a0, a5, 40 -; VLEN512-NEXT: sb a0, 493(sp) -; VLEN512-NEXT: srli a0, a5, 32 -; VLEN512-NEXT: sb a0, 492(sp) -; VLEN512-NEXT: srli a0, a5, 24 -; VLEN512-NEXT: sb a0, 491(sp) -; VLEN512-NEXT: srli a0, a5, 16 -; VLEN512-NEXT: sb a0, 490(sp) -; VLEN512-NEXT: addi a0, zero, 60 -; VLEN512-NEXT: vslidedown.vx v24, v8, a0 -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: sb a5, 488(sp) -; VLEN512-NEXT: srli a5, a5, 8 -; VLEN512-NEXT: sb a5, 489(sp) -; VLEN512-NEXT: srli a5, a0, 56 -; VLEN512-NEXT: sb a5, 487(sp) -; VLEN512-NEXT: srli a5, a0, 48 -; VLEN512-NEXT: sb a5, 486(sp) -; VLEN512-NEXT: srli a5, a0, 40 -; VLEN512-NEXT: sb a5, 485(sp) -; VLEN512-NEXT: srli a5, a0, 32 -; VLEN512-NEXT: sb a5, 484(sp) -; VLEN512-NEXT: srli a5, a0, 24 -; VLEN512-NEXT: sb a5, 483(sp) -; VLEN512-NEXT: srli a5, a0, 16 -; VLEN512-NEXT: sb a5, 482(sp) -; VLEN512-NEXT: addi a5, zero, 59 -; VLEN512-NEXT: vslidedown.vx v24, v8, a5 -; VLEN512-NEXT: vmv.x.s a5, v24 -; VLEN512-NEXT: sb a0, 480(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 481(sp) -; VLEN512-NEXT: srli a0, a5, 56 -; VLEN512-NEXT: sb a0, 479(sp) -; VLEN512-NEXT: srli a0, a5, 48 -; VLEN512-NEXT: sb a0, 478(sp) -; VLEN512-NEXT: srli a0, a5, 40 -; VLEN512-NEXT: sb a0, 477(sp) -; VLEN512-NEXT: srli a0, a5, 32 -; VLEN512-NEXT: sb a0, 476(sp) -; VLEN512-NEXT: srli a0, a5, 24 -; VLEN512-NEXT: sb a0, 475(sp) -; VLEN512-NEXT: srli a0, a5, 16 -; VLEN512-NEXT: sb a0, 474(sp) -; VLEN512-NEXT: sb a5, 472(sp) -; VLEN512-NEXT: srli a0, a5, 8 -; VLEN512-NEXT: sb a0, 473(sp) -; VLEN512-NEXT: srli a0, a4, 56 -; VLEN512-NEXT: sb a0, 471(sp) -; VLEN512-NEXT: srli a0, a4, 48 -; VLEN512-NEXT: sb a0, 470(sp) -; VLEN512-NEXT: srli a0, a4, 40 -; VLEN512-NEXT: sb a0, 469(sp) -; VLEN512-NEXT: srli a0, a4, 32 -; VLEN512-NEXT: sb a0, 468(sp) -; VLEN512-NEXT: srli a0, a4, 24 -; VLEN512-NEXT: sb a0, 467(sp) -; VLEN512-NEXT: srli a0, a4, 16 -; VLEN512-NEXT: sb a0, 466(sp) -; VLEN512-NEXT: sb a4, 464(sp) -; VLEN512-NEXT: srli a0, a4, 8 -; VLEN512-NEXT: sb a0, 465(sp) -; VLEN512-NEXT: srli a0, a3, 56 -; VLEN512-NEXT: sb a0, 463(sp) -; VLEN512-NEXT: srli a0, a3, 48 -; VLEN512-NEXT: sb a0, 462(sp) -; VLEN512-NEXT: srli a0, a3, 40 -; VLEN512-NEXT: sb a0, 461(sp) -; VLEN512-NEXT: srli a0, a3, 32 -; VLEN512-NEXT: sb a0, 460(sp) -; VLEN512-NEXT: srli a0, a3, 24 -; VLEN512-NEXT: sb a0, 459(sp) -; VLEN512-NEXT: srli a0, a3, 16 -; VLEN512-NEXT: sb a0, 458(sp) -; VLEN512-NEXT: sb a3, 456(sp) -; VLEN512-NEXT: srli a0, a3, 8 -; VLEN512-NEXT: sb a0, 457(sp) -; VLEN512-NEXT: srli a0, s1, 56 -; VLEN512-NEXT: sb a0, 455(sp) -; VLEN512-NEXT: srli a0, s1, 48 -; VLEN512-NEXT: sb a0, 454(sp) -; VLEN512-NEXT: srli a0, s1, 40 -; VLEN512-NEXT: sb a0, 453(sp) -; VLEN512-NEXT: srli a0, s1, 32 -; VLEN512-NEXT: sb a0, 452(sp) -; VLEN512-NEXT: srli a0, s1, 24 -; VLEN512-NEXT: sb a0, 451(sp) -; VLEN512-NEXT: srli a0, s1, 16 -; VLEN512-NEXT: sb a0, 450(sp) -; VLEN512-NEXT: sb s1, 448(sp) -; VLEN512-NEXT: srli a0, s1, 8 -; VLEN512-NEXT: sb a0, 449(sp) -; VLEN512-NEXT: srli a0, a2, 56 -; VLEN512-NEXT: sb a0, 447(sp) -; VLEN512-NEXT: srli a0, a2, 48 -; VLEN512-NEXT: sb a0, 446(sp) -; VLEN512-NEXT: srli a0, a2, 40 -; VLEN512-NEXT: sb a0, 445(sp) -; VLEN512-NEXT: srli a0, a2, 32 -; VLEN512-NEXT: sb a0, 444(sp) -; VLEN512-NEXT: srli a0, a2, 24 -; VLEN512-NEXT: sb a0, 443(sp) -; VLEN512-NEXT: srli a0, a2, 16 -; VLEN512-NEXT: sb a0, 442(sp) -; VLEN512-NEXT: sb a2, 440(sp) -; VLEN512-NEXT: srli a0, a2, 8 -; VLEN512-NEXT: sb a0, 441(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 439(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 438(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 437(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 436(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 435(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 434(sp) -; VLEN512-NEXT: sb a1, 432(sp) -; VLEN512-NEXT: srli a0, a1, 8 -; VLEN512-NEXT: sb a0, 433(sp) -; VLEN512-NEXT: srli a0, ra, 56 -; VLEN512-NEXT: sb a0, 431(sp) -; VLEN512-NEXT: srli a0, ra, 48 -; VLEN512-NEXT: sb a0, 430(sp) -; VLEN512-NEXT: srli a0, ra, 40 -; VLEN512-NEXT: sb a0, 429(sp) -; VLEN512-NEXT: srli a0, ra, 32 -; VLEN512-NEXT: sb a0, 428(sp) -; VLEN512-NEXT: srli a0, ra, 24 -; VLEN512-NEXT: sb a0, 427(sp) -; VLEN512-NEXT: srli a0, ra, 16 -; VLEN512-NEXT: sb a0, 426(sp) -; VLEN512-NEXT: sb ra, 424(sp) -; VLEN512-NEXT: srli a0, ra, 8 -; VLEN512-NEXT: sb a0, 425(sp) -; VLEN512-NEXT: srli a0, s11, 56 -; VLEN512-NEXT: sb a0, 423(sp) -; VLEN512-NEXT: srli a0, s11, 48 -; VLEN512-NEXT: sb a0, 422(sp) -; VLEN512-NEXT: srli a0, s11, 40 -; VLEN512-NEXT: sb a0, 421(sp) -; VLEN512-NEXT: srli a0, s11, 32 -; VLEN512-NEXT: sb a0, 420(sp) -; VLEN512-NEXT: srli a0, s11, 24 -; VLEN512-NEXT: sb a0, 419(sp) -; VLEN512-NEXT: srli a0, s11, 16 -; VLEN512-NEXT: sb a0, 418(sp) -; VLEN512-NEXT: sb s11, 416(sp) -; VLEN512-NEXT: srli a0, s11, 8 -; VLEN512-NEXT: sb a0, 417(sp) -; VLEN512-NEXT: srli a0, s10, 56 -; VLEN512-NEXT: sb a0, 415(sp) -; VLEN512-NEXT: srli a0, s10, 48 -; VLEN512-NEXT: sb a0, 414(sp) -; VLEN512-NEXT: srli a0, s10, 40 -; VLEN512-NEXT: sb a0, 413(sp) -; VLEN512-NEXT: srli a0, s10, 32 -; VLEN512-NEXT: sb a0, 412(sp) -; VLEN512-NEXT: srli a0, s10, 24 -; VLEN512-NEXT: sb a0, 411(sp) -; VLEN512-NEXT: srli a0, s10, 16 -; VLEN512-NEXT: sb a0, 410(sp) -; VLEN512-NEXT: sb s10, 408(sp) -; VLEN512-NEXT: srli a0, s10, 8 -; VLEN512-NEXT: sb a0, 409(sp) -; VLEN512-NEXT: srli a0, s9, 56 -; VLEN512-NEXT: sb a0, 407(sp) -; VLEN512-NEXT: srli a0, s9, 48 -; VLEN512-NEXT: sb a0, 406(sp) -; VLEN512-NEXT: srli a0, s9, 40 -; VLEN512-NEXT: sb a0, 405(sp) -; VLEN512-NEXT: srli a0, s9, 32 -; VLEN512-NEXT: sb a0, 404(sp) -; VLEN512-NEXT: srli a0, s9, 24 -; VLEN512-NEXT: sb a0, 403(sp) -; VLEN512-NEXT: srli a0, s9, 16 -; VLEN512-NEXT: sb a0, 402(sp) -; VLEN512-NEXT: sb s9, 400(sp) -; VLEN512-NEXT: srli a0, s9, 8 -; VLEN512-NEXT: sb a0, 401(sp) -; VLEN512-NEXT: srli a0, s8, 56 -; VLEN512-NEXT: sb a0, 399(sp) -; VLEN512-NEXT: srli a0, s8, 48 -; VLEN512-NEXT: sb a0, 398(sp) -; VLEN512-NEXT: srli a0, s8, 40 -; VLEN512-NEXT: sb a0, 397(sp) -; VLEN512-NEXT: srli a0, s8, 32 -; VLEN512-NEXT: sb a0, 396(sp) -; VLEN512-NEXT: srli a0, s8, 24 -; VLEN512-NEXT: sb a0, 395(sp) -; VLEN512-NEXT: srli a0, s8, 16 -; VLEN512-NEXT: sb a0, 394(sp) -; VLEN512-NEXT: sb s8, 392(sp) -; VLEN512-NEXT: srli a0, s8, 8 -; VLEN512-NEXT: sb a0, 393(sp) -; VLEN512-NEXT: srli a0, s7, 56 -; VLEN512-NEXT: sb a0, 391(sp) -; VLEN512-NEXT: srli a0, s7, 48 -; VLEN512-NEXT: sb a0, 390(sp) -; VLEN512-NEXT: srli a0, s7, 40 -; VLEN512-NEXT: sb a0, 389(sp) -; VLEN512-NEXT: srli a0, s7, 32 -; VLEN512-NEXT: sb a0, 388(sp) -; VLEN512-NEXT: srli a0, s7, 24 -; VLEN512-NEXT: sb a0, 387(sp) -; VLEN512-NEXT: srli a0, s7, 16 -; VLEN512-NEXT: sb a0, 386(sp) -; VLEN512-NEXT: sb s7, 384(sp) -; VLEN512-NEXT: srli a0, s7, 8 -; VLEN512-NEXT: sb a0, 385(sp) -; VLEN512-NEXT: srli a0, s6, 56 -; VLEN512-NEXT: sb a0, 383(sp) -; VLEN512-NEXT: srli a0, s6, 48 -; VLEN512-NEXT: sb a0, 382(sp) -; VLEN512-NEXT: srli a0, s6, 40 -; VLEN512-NEXT: sb a0, 381(sp) -; VLEN512-NEXT: srli a0, s6, 32 -; VLEN512-NEXT: sb a0, 380(sp) -; VLEN512-NEXT: srli a0, s6, 24 -; VLEN512-NEXT: sb a0, 379(sp) -; VLEN512-NEXT: srli a0, s6, 16 -; VLEN512-NEXT: sb a0, 378(sp) -; VLEN512-NEXT: sb s6, 376(sp) -; VLEN512-NEXT: srli a0, s6, 8 -; VLEN512-NEXT: sb a0, 377(sp) -; VLEN512-NEXT: srli a0, s5, 56 -; VLEN512-NEXT: sb a0, 375(sp) -; VLEN512-NEXT: srli a0, s5, 48 -; VLEN512-NEXT: sb a0, 374(sp) -; VLEN512-NEXT: srli a0, s5, 40 -; VLEN512-NEXT: sb a0, 373(sp) -; VLEN512-NEXT: srli a0, s5, 32 -; VLEN512-NEXT: sb a0, 372(sp) -; VLEN512-NEXT: srli a0, s5, 24 -; VLEN512-NEXT: sb a0, 371(sp) -; VLEN512-NEXT: srli a0, s5, 16 -; VLEN512-NEXT: sb a0, 370(sp) -; VLEN512-NEXT: sb s5, 368(sp) -; VLEN512-NEXT: srli a0, s5, 8 -; VLEN512-NEXT: sb a0, 369(sp) -; VLEN512-NEXT: srli a0, s4, 56 -; VLEN512-NEXT: sb a0, 367(sp) -; VLEN512-NEXT: srli a0, s4, 48 -; VLEN512-NEXT: sb a0, 366(sp) -; VLEN512-NEXT: srli a0, s4, 40 -; VLEN512-NEXT: sb a0, 365(sp) -; VLEN512-NEXT: srli a0, s4, 32 -; VLEN512-NEXT: sb a0, 364(sp) -; VLEN512-NEXT: srli a0, s4, 24 -; VLEN512-NEXT: sb a0, 363(sp) -; VLEN512-NEXT: srli a0, s4, 16 -; VLEN512-NEXT: sb a0, 362(sp) -; VLEN512-NEXT: sb s4, 360(sp) -; VLEN512-NEXT: srli a0, s4, 8 -; VLEN512-NEXT: sb a0, 361(sp) -; VLEN512-NEXT: srli a0, s3, 56 -; VLEN512-NEXT: sb a0, 359(sp) -; VLEN512-NEXT: srli a0, s3, 48 -; VLEN512-NEXT: sb a0, 358(sp) -; VLEN512-NEXT: srli a0, s3, 40 -; VLEN512-NEXT: sb a0, 357(sp) -; VLEN512-NEXT: srli a0, s3, 32 -; VLEN512-NEXT: sb a0, 356(sp) -; VLEN512-NEXT: srli a0, s3, 24 -; VLEN512-NEXT: sb a0, 355(sp) -; VLEN512-NEXT: srli a0, s3, 16 -; VLEN512-NEXT: sb a0, 354(sp) -; VLEN512-NEXT: sb s3, 352(sp) -; VLEN512-NEXT: srli a0, s3, 8 -; VLEN512-NEXT: sb a0, 353(sp) -; VLEN512-NEXT: srli a0, s2, 56 -; VLEN512-NEXT: sb a0, 351(sp) -; VLEN512-NEXT: srli a0, s2, 48 -; VLEN512-NEXT: sb a0, 350(sp) -; VLEN512-NEXT: srli a0, s2, 40 -; VLEN512-NEXT: sb a0, 349(sp) -; VLEN512-NEXT: srli a0, s2, 32 -; VLEN512-NEXT: sb a0, 348(sp) -; VLEN512-NEXT: srli a0, s2, 24 -; VLEN512-NEXT: sb a0, 347(sp) -; VLEN512-NEXT: srli a0, s2, 16 -; VLEN512-NEXT: sb a0, 346(sp) -; VLEN512-NEXT: sb s2, 344(sp) -; VLEN512-NEXT: srli a0, s2, 8 -; VLEN512-NEXT: sb a0, 345(sp) -; VLEN512-NEXT: srli a0, t6, 56 -; VLEN512-NEXT: sb a0, 343(sp) -; VLEN512-NEXT: srli a0, t6, 48 -; VLEN512-NEXT: sb a0, 342(sp) -; VLEN512-NEXT: srli a0, t6, 40 -; VLEN512-NEXT: sb a0, 341(sp) -; VLEN512-NEXT: srli a0, t6, 32 -; VLEN512-NEXT: sb a0, 340(sp) -; VLEN512-NEXT: srli a0, t6, 24 -; VLEN512-NEXT: sb a0, 339(sp) -; VLEN512-NEXT: srli a0, t6, 16 -; VLEN512-NEXT: sb a0, 338(sp) -; VLEN512-NEXT: sb t6, 336(sp) -; VLEN512-NEXT: srli a0, t6, 8 -; VLEN512-NEXT: sb a0, 337(sp) -; VLEN512-NEXT: srli a0, t5, 56 -; VLEN512-NEXT: sb a0, 335(sp) -; VLEN512-NEXT: srli a0, t5, 48 -; VLEN512-NEXT: sb a0, 334(sp) -; VLEN512-NEXT: srli a0, t5, 40 -; VLEN512-NEXT: sb a0, 333(sp) -; VLEN512-NEXT: srli a0, t5, 32 -; VLEN512-NEXT: sb a0, 332(sp) -; VLEN512-NEXT: srli a0, t5, 24 -; VLEN512-NEXT: sb a0, 331(sp) -; VLEN512-NEXT: srli a0, t5, 16 -; VLEN512-NEXT: sb a0, 330(sp) -; VLEN512-NEXT: sb t5, 328(sp) -; VLEN512-NEXT: srli a0, t5, 8 -; VLEN512-NEXT: sb a0, 329(sp) -; VLEN512-NEXT: srli a0, t4, 56 -; VLEN512-NEXT: sb a0, 327(sp) -; VLEN512-NEXT: srli a0, t4, 48 -; VLEN512-NEXT: sb a0, 326(sp) -; VLEN512-NEXT: srli a0, t4, 40 -; VLEN512-NEXT: sb a0, 325(sp) -; VLEN512-NEXT: srli a0, t4, 32 -; VLEN512-NEXT: sb a0, 324(sp) -; VLEN512-NEXT: srli a0, t4, 24 -; VLEN512-NEXT: sb a0, 323(sp) -; VLEN512-NEXT: srli a0, t4, 16 -; VLEN512-NEXT: sb a0, 322(sp) -; VLEN512-NEXT: sb t4, 320(sp) -; VLEN512-NEXT: srli a0, t4, 8 -; VLEN512-NEXT: sb a0, 321(sp) -; VLEN512-NEXT: srli a0, t3, 56 -; VLEN512-NEXT: sb a0, 319(sp) -; VLEN512-NEXT: srli a0, t3, 48 -; VLEN512-NEXT: sb a0, 318(sp) -; VLEN512-NEXT: srli a0, t3, 40 -; VLEN512-NEXT: sb a0, 317(sp) -; VLEN512-NEXT: srli a0, t3, 32 -; VLEN512-NEXT: sb a0, 316(sp) -; VLEN512-NEXT: srli a0, t3, 24 -; VLEN512-NEXT: sb a0, 315(sp) -; VLEN512-NEXT: srli a0, t3, 16 -; VLEN512-NEXT: sb a0, 314(sp) -; VLEN512-NEXT: sb t3, 312(sp) -; VLEN512-NEXT: srli a0, t3, 8 -; VLEN512-NEXT: sb a0, 313(sp) -; VLEN512-NEXT: srli a0, t2, 56 -; VLEN512-NEXT: sb a0, 311(sp) -; VLEN512-NEXT: srli a0, t2, 48 -; VLEN512-NEXT: sb a0, 310(sp) -; VLEN512-NEXT: srli a0, t2, 40 -; VLEN512-NEXT: sb a0, 309(sp) -; VLEN512-NEXT: srli a0, t2, 32 -; VLEN512-NEXT: sb a0, 308(sp) -; VLEN512-NEXT: srli a0, t2, 24 -; VLEN512-NEXT: sb a0, 307(sp) -; VLEN512-NEXT: srli a0, t2, 16 -; VLEN512-NEXT: sb a0, 306(sp) -; VLEN512-NEXT: sb t2, 304(sp) -; VLEN512-NEXT: srli a0, t2, 8 -; VLEN512-NEXT: sb a0, 305(sp) -; VLEN512-NEXT: srli a0, t1, 56 -; VLEN512-NEXT: sb a0, 303(sp) -; VLEN512-NEXT: srli a0, t1, 48 -; VLEN512-NEXT: sb a0, 302(sp) -; VLEN512-NEXT: srli a0, t1, 40 -; VLEN512-NEXT: sb a0, 301(sp) -; VLEN512-NEXT: srli a0, t1, 32 -; VLEN512-NEXT: sb a0, 300(sp) -; VLEN512-NEXT: srli a0, t1, 24 -; VLEN512-NEXT: sb a0, 299(sp) -; VLEN512-NEXT: srli a0, t1, 16 -; VLEN512-NEXT: sb a0, 298(sp) -; VLEN512-NEXT: sb t1, 296(sp) -; VLEN512-NEXT: srli a0, t1, 8 -; VLEN512-NEXT: sb a0, 297(sp) -; VLEN512-NEXT: srli a0, t0, 56 -; VLEN512-NEXT: sb a0, 295(sp) -; VLEN512-NEXT: srli a0, t0, 48 -; VLEN512-NEXT: sb a0, 294(sp) -; VLEN512-NEXT: srli a0, t0, 40 -; VLEN512-NEXT: sb a0, 293(sp) -; VLEN512-NEXT: srli a0, t0, 32 -; VLEN512-NEXT: sb a0, 292(sp) -; VLEN512-NEXT: srli a0, t0, 24 -; VLEN512-NEXT: sb a0, 291(sp) -; VLEN512-NEXT: srli a0, t0, 16 -; VLEN512-NEXT: sb a0, 290(sp) -; VLEN512-NEXT: sb t0, 288(sp) -; VLEN512-NEXT: srli a0, t0, 8 -; VLEN512-NEXT: sb a0, 289(sp) -; VLEN512-NEXT: srli a0, a7, 56 -; VLEN512-NEXT: sb a0, 287(sp) -; VLEN512-NEXT: srli a0, a7, 48 -; VLEN512-NEXT: sb a0, 286(sp) -; VLEN512-NEXT: srli a0, a7, 40 -; VLEN512-NEXT: sb a0, 285(sp) -; VLEN512-NEXT: srli a0, a7, 32 -; VLEN512-NEXT: sb a0, 284(sp) -; VLEN512-NEXT: srli a0, a7, 24 -; VLEN512-NEXT: sb a0, 283(sp) -; VLEN512-NEXT: srli a0, a7, 16 -; VLEN512-NEXT: sb a0, 282(sp) -; VLEN512-NEXT: vmv.x.s a0, v0 -; VLEN512-NEXT: sb a7, 280(sp) -; VLEN512-NEXT: srli a1, a7, 8 -; VLEN512-NEXT: sb a1, 281(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 279(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 278(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 277(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 276(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 275(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 274(sp) -; VLEN512-NEXT: addi a1, sp, 920 -; VLEN512-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: sb a0, 272(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 273(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 271(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 270(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 269(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 268(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 267(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 266(sp) -; VLEN512-NEXT: srli a0, a6, 16 -; VLEN512-NEXT: sb a1, 264(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 265(sp) -; VLEN512-NEXT: srli a1, a6, 56 -; VLEN512-NEXT: sb a1, 263(sp) -; VLEN512-NEXT: srli a1, a6, 48 -; VLEN512-NEXT: sb a1, 262(sp) -; VLEN512-NEXT: srli a1, a6, 40 -; VLEN512-NEXT: sb a1, 261(sp) -; VLEN512-NEXT: srli a1, a6, 32 -; VLEN512-NEXT: sb a1, 260(sp) -; VLEN512-NEXT: srli a1, a6, 24 -; VLEN512-NEXT: sb a1, 259(sp) -; VLEN512-NEXT: vmv.x.s a1, v8 -; VLEN512-NEXT: sb a0, 258(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a6, 256(sp) -; VLEN512-NEXT: srli a2, a6, 8 -; VLEN512-NEXT: sb a2, 257(sp) -; VLEN512-NEXT: vslidedown.vi v24, v8, 31 -; VLEN512-NEXT: sb a0, 519(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 518(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 517(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 516(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 515(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 514(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 30 -; VLEN512-NEXT: sb a1, 512(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 513(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 767(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 766(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 765(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 764(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 763(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 762(sp) -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 29 -; VLEN512-NEXT: sb a0, 760(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 761(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 759(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 758(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 757(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 756(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 755(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 754(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 28 -; VLEN512-NEXT: sb a1, 752(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 753(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 751(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 750(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 749(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 748(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 747(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 746(sp) -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 27 -; VLEN512-NEXT: sb a0, 744(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 745(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 743(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 742(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 741(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 740(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 739(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 738(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 26 -; VLEN512-NEXT: sb a1, 736(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 737(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 735(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 734(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 733(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 732(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 731(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 730(sp) -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 25 -; VLEN512-NEXT: sb a0, 728(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 729(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 727(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 726(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 725(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 724(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 723(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 722(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 24 -; VLEN512-NEXT: sb a1, 720(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 721(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 719(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 718(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 717(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 716(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 715(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 714(sp) -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 23 -; VLEN512-NEXT: sb a0, 712(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 713(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 711(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 710(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 709(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 708(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 707(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 706(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 22 -; VLEN512-NEXT: sb a1, 704(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 705(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 703(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 702(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 701(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 700(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 699(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 698(sp) -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 21 -; VLEN512-NEXT: sb a0, 696(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 697(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 695(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 694(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 693(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 692(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 691(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 690(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 20 -; VLEN512-NEXT: sb a1, 688(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 689(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 687(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 686(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 685(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 684(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 683(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 682(sp) -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 19 -; VLEN512-NEXT: sb a0, 680(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 681(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 679(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 678(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 677(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 676(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 675(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 674(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 18 -; VLEN512-NEXT: sb a1, 672(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 673(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 671(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 670(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 669(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 668(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 667(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 666(sp) -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 17 -; VLEN512-NEXT: sb a0, 664(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 665(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 663(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 662(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 661(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 660(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 659(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 658(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 16 -; VLEN512-NEXT: sb a1, 656(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 657(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 655(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 654(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 653(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 652(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 651(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 650(sp) -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 15 -; VLEN512-NEXT: sb a0, 648(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 649(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 647(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 646(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 645(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 644(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 643(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 642(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 14 -; VLEN512-NEXT: sb a1, 640(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 641(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 639(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 638(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 637(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 636(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 635(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 634(sp) -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 13 -; VLEN512-NEXT: sb a0, 632(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 633(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 631(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 630(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 629(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 628(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 627(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 626(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 12 -; VLEN512-NEXT: sb a1, 624(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 625(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 623(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 622(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 621(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 620(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 619(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 618(sp) -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 11 -; VLEN512-NEXT: sb a0, 616(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 617(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 615(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 614(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 613(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 612(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 611(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 610(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 10 -; VLEN512-NEXT: sb a1, 608(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 609(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 607(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 606(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 605(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 604(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 603(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 602(sp) -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 9 -; VLEN512-NEXT: sb a0, 600(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 601(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 599(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 598(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 597(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 596(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 595(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 594(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 8 -; VLEN512-NEXT: sb a1, 592(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 593(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 591(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 590(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 589(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 588(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 587(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 586(sp) -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 7 -; VLEN512-NEXT: sb a0, 584(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 585(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 583(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 582(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 581(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 580(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 579(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 578(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 6 -; VLEN512-NEXT: sb a1, 576(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 577(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 575(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 574(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 573(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 572(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 571(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 570(sp) -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 5 -; VLEN512-NEXT: sb a0, 568(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 569(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 567(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 566(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 565(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 564(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 563(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 562(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 4 -; VLEN512-NEXT: sb a1, 560(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 561(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 559(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 558(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 557(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 556(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 555(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 554(sp) -; VLEN512-NEXT: vmv.x.s a1, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 3 -; VLEN512-NEXT: sb a0, 552(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 553(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 551(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 550(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 549(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 548(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 547(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 546(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: vslidedown.vi v24, v8, 1 -; VLEN512-NEXT: vslidedown.vi v8, v8, 2 -; VLEN512-NEXT: sb a1, 544(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 545(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 543(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 542(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 541(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 540(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 539(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 538(sp) -; VLEN512-NEXT: vmv.x.s a1, v8 -; VLEN512-NEXT: sb a0, 536(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 537(sp) -; VLEN512-NEXT: srli a0, a1, 56 -; VLEN512-NEXT: sb a0, 535(sp) -; VLEN512-NEXT: srli a0, a1, 48 -; VLEN512-NEXT: sb a0, 534(sp) -; VLEN512-NEXT: srli a0, a1, 40 -; VLEN512-NEXT: sb a0, 533(sp) -; VLEN512-NEXT: srli a0, a1, 32 -; VLEN512-NEXT: sb a0, 532(sp) -; VLEN512-NEXT: srli a0, a1, 24 -; VLEN512-NEXT: sb a0, 531(sp) -; VLEN512-NEXT: srli a0, a1, 16 -; VLEN512-NEXT: sb a0, 530(sp) -; VLEN512-NEXT: vmv.x.s a0, v24 -; VLEN512-NEXT: sb a1, 528(sp) -; VLEN512-NEXT: srli a1, a1, 8 -; VLEN512-NEXT: sb a1, 529(sp) -; VLEN512-NEXT: srli a1, a0, 56 -; VLEN512-NEXT: sb a1, 527(sp) -; VLEN512-NEXT: srli a1, a0, 48 -; VLEN512-NEXT: sb a1, 526(sp) -; VLEN512-NEXT: srli a1, a0, 40 -; VLEN512-NEXT: sb a1, 525(sp) -; VLEN512-NEXT: srli a1, a0, 32 -; VLEN512-NEXT: sb a1, 524(sp) -; VLEN512-NEXT: srli a1, a0, 24 -; VLEN512-NEXT: sb a1, 523(sp) -; VLEN512-NEXT: srli a1, a0, 16 -; VLEN512-NEXT: sb a1, 522(sp) -; VLEN512-NEXT: sb a0, 520(sp) -; VLEN512-NEXT: srli a0, a0, 8 -; VLEN512-NEXT: sb a0, 521(sp) -; VLEN512-NEXT: addi a0, zero, 256 -; VLEN512-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; VLEN512-NEXT: addi a0, sp, 512 -; VLEN512-NEXT: vle8.v v28, (a0) -; VLEN512-NEXT: addi a0, sp, 256 -; VLEN512-NEXT: vle8.v v12, (a0) -; VLEN512-NEXT: vadd.vv v8, v16, v28 -; VLEN512-NEXT: vadd.vv v12, v20, v12 -; VLEN512-NEXT: addi sp, s0, -1024 -; VLEN512-NEXT: ld s11, 920(sp) # 8-byte Folded Reload -; VLEN512-NEXT: ld s10, 928(sp) # 8-byte Folded Reload -; VLEN512-NEXT: ld s9, 936(sp) # 8-byte Folded Reload -; VLEN512-NEXT: ld s8, 944(sp) # 8-byte Folded Reload -; VLEN512-NEXT: ld s7, 952(sp) # 8-byte Folded Reload -; VLEN512-NEXT: ld s6, 960(sp) # 8-byte Folded Reload -; VLEN512-NEXT: ld s5, 968(sp) # 8-byte Folded Reload -; VLEN512-NEXT: ld s4, 976(sp) # 8-byte Folded Reload -; VLEN512-NEXT: ld s3, 984(sp) # 8-byte Folded Reload -; VLEN512-NEXT: ld s2, 992(sp) # 8-byte Folded Reload -; VLEN512-NEXT: ld s1, 1000(sp) # 8-byte Folded Reload -; VLEN512-NEXT: ld s0, 1008(sp) # 8-byte Folded Reload -; VLEN512-NEXT: ld ra, 1016(sp) # 8-byte Folded Reload -; VLEN512-NEXT: addi sp, sp, 1024 +; VLEN512-NEXT: addi a0, zero, 512 +; VLEN512-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; VLEN512-NEXT: vadd.vv v8, v16, v8 ; VLEN512-NEXT: ret +; +; VLEN1024-LABEL: bitcast_1024B: +; VLEN1024: # %bb.0: +; VLEN1024-NEXT: addi a0, zero, 512 +; VLEN1024-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; VLEN1024-NEXT: vadd.vv v8, v12, v8 +; VLEN1024-NEXT: ret %c = bitcast <256 x i16> %a to <512 x i8> %v = add <512 x i8> %b, %c ret <512 x i8> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll --- a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll @@ -129,849 +129,74 @@ define void @interleave512(<512 x i16>* %agg.result, <256 x i16>* %0, <256 x i16>* %1) local_unnamed_addr { ; RV64-1024-LABEL: interleave512: ; RV64-1024: # %bb.0: # %entry -; RV64-1024-NEXT: addi sp, sp, -2032 -; RV64-1024-NEXT: .cfi_def_cfa_offset 2032 -; RV64-1024-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sd s1, 2008(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sd s2, 2000(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sd s3, 1992(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sd s4, 1984(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sd s5, 1976(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sd s6, 1968(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sd s7, 1960(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sd s8, 1952(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sd s9, 1944(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sd s10, 1936(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sd s11, 1928(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: .cfi_offset ra, -8 -; RV64-1024-NEXT: .cfi_offset s0, -16 -; RV64-1024-NEXT: .cfi_offset s1, -24 -; RV64-1024-NEXT: .cfi_offset s2, -32 -; RV64-1024-NEXT: .cfi_offset s3, -40 -; RV64-1024-NEXT: .cfi_offset s4, -48 -; RV64-1024-NEXT: .cfi_offset s5, -56 -; RV64-1024-NEXT: .cfi_offset s6, -64 -; RV64-1024-NEXT: .cfi_offset s7, -72 -; RV64-1024-NEXT: .cfi_offset s8, -80 -; RV64-1024-NEXT: .cfi_offset s9, -88 -; RV64-1024-NEXT: .cfi_offset s10, -96 -; RV64-1024-NEXT: .cfi_offset s11, -104 -; RV64-1024-NEXT: addi s0, sp, 2032 -; RV64-1024-NEXT: .cfi_def_cfa s0, 0 ; RV64-1024-NEXT: addi sp, sp, -16 +; RV64-1024-NEXT: .cfi_def_cfa_offset 16 ; RV64-1024-NEXT: csrr a3, vlenb -; RV64-1024-NEXT: addi a4, zero, 12 +; RV64-1024-NEXT: addi a4, zero, 24 ; RV64-1024-NEXT: mul a3, a3, a4 ; RV64-1024-NEXT: sub sp, sp, a3 -; RV64-1024-NEXT: andi sp, sp, -512 ; RV64-1024-NEXT: addi a3, zero, 256 ; RV64-1024-NEXT: vsetvli zero, a3, e16, m4, ta, mu -; RV64-1024-NEXT: addi a3, zero, 256 -; RV64-1024-NEXT: vle16.v v16, (a1) -; RV64-1024-NEXT: lui a1, %hi(.LCPI1_0) -; RV64-1024-NEXT: addi a1, a1, %lo(.LCPI1_0) -; RV64-1024-NEXT: vle16.v v20, (a1) -; RV64-1024-NEXT: vle16.v v28, (a2) +; RV64-1024-NEXT: vle16.v v24, (a1) +; RV64-1024-NEXT: vle16.v v8, (a2) ; RV64-1024-NEXT: csrr a1, vlenb -; RV64-1024-NEXT: slli a1, a1, 3 +; RV64-1024-NEXT: slli a1, a1, 4 ; RV64-1024-NEXT: add a1, sp, a1 -; RV64-1024-NEXT: addi a1, a1, 1944 -; RV64-1024-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill -; RV64-1024-NEXT: vrgather.vv v0, v16, v20 -; RV64-1024-NEXT: vsetvli zero, zero, e32, m8, ta, mu +; RV64-1024-NEXT: addi a1, a1, 16 +; RV64-1024-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-1024-NEXT: addi a1, zero, 512 +; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; RV64-1024-NEXT: vmv.v.i v8, 0 -; RV64-1024-NEXT: addi a1, zero, 128 -; RV64-1024-NEXT: vsetvli zero, a1, e32, m8, tu, mu -; RV64-1024-NEXT: vslideup.vi v8, v0, 0 +; RV64-1024-NEXT: vsetvli zero, a3, e16, m8, tu, mu +; RV64-1024-NEXT: vmv8r.v v0, v8 +; RV64-1024-NEXT: vslideup.vi v0, v24, 0 +; RV64-1024-NEXT: vsetvli zero, a3, e16, m4, ta, mu +; RV64-1024-NEXT: vmv.v.i v16, 0 +; RV64-1024-NEXT: addi a2, sp, 16 +; RV64-1024-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; RV64-1024-NEXT: vslideup.vx v0, v16, a3 +; RV64-1024-NEXT: lui a2, %hi(.LCPI1_0) +; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI1_0) +; RV64-1024-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV64-1024-NEXT: vle16.v v16, (a2) +; RV64-1024-NEXT: vrgather.vv v24, v0, v16 +; RV64-1024-NEXT: csrr a2, vlenb +; RV64-1024-NEXT: slli a2, a2, 3 +; RV64-1024-NEXT: add a2, sp, a2 +; RV64-1024-NEXT: addi a2, a2, 16 +; RV64-1024-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV64-1024-NEXT: vsetvli zero, a3, e16, m8, tu, mu +; RV64-1024-NEXT: csrr a2, vlenb +; RV64-1024-NEXT: slli a2, a2, 4 +; RV64-1024-NEXT: add a2, sp, a2 +; RV64-1024-NEXT: addi a2, a2, 16 +; RV64-1024-NEXT: vl8re8.v v16, (a2) # Unknown-size Folded Reload +; RV64-1024-NEXT: vslideup.vi v8, v16, 0 +; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; RV64-1024-NEXT: addi a2, sp, 16 +; RV64-1024-NEXT: vl8re8.v v16, (a2) # Unknown-size Folded Reload +; RV64-1024-NEXT: vslideup.vx v8, v16, a3 ; RV64-1024-NEXT: lui a2, %hi(.LCPI1_1) ; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI1_1) -; RV64-1024-NEXT: vsetvli zero, a3, e16, m4, ta, mu +; RV64-1024-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; RV64-1024-NEXT: vle16.v v24, (a2) -; RV64-1024-NEXT: vrgather.vv v0, v16, v24 -; RV64-1024-NEXT: vrgather.vv v24, v0, v20 -; RV64-1024-NEXT: vsetvli zero, zero, e32, m8, tu, mu -; RV64-1024-NEXT: vslideup.vx v8, v24, a1 -; RV64-1024-NEXT: addi a1, zero, 127 -; RV64-1024-NEXT: vsetivli zero, 1, e64, m8, ta, mu -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s t5, v16 -; RV64-1024-NEXT: addi a1, zero, 126 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s t6, v16 -; RV64-1024-NEXT: addi a1, zero, 125 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s2, v16 -; RV64-1024-NEXT: addi a1, zero, 124 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s3, v16 -; RV64-1024-NEXT: addi a1, zero, 123 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s4, v16 -; RV64-1024-NEXT: addi a1, zero, 122 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s5, v16 -; RV64-1024-NEXT: addi a1, zero, 121 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s6, v16 -; RV64-1024-NEXT: addi a1, zero, 120 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s7, v16 -; RV64-1024-NEXT: addi a1, zero, 119 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s8, v16 -; RV64-1024-NEXT: addi a1, zero, 118 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s9, v16 -; RV64-1024-NEXT: addi a1, zero, 117 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s10, v16 -; RV64-1024-NEXT: addi a1, zero, 116 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s11, v16 -; RV64-1024-NEXT: addi a1, zero, 115 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s t4, v16 -; RV64-1024-NEXT: addi a1, zero, 114 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s ra, v16 -; RV64-1024-NEXT: addi a1, zero, 113 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s a6, v16 -; RV64-1024-NEXT: addi a1, zero, 112 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s a7, v16 -; RV64-1024-NEXT: addi a1, zero, 111 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s t0, v16 -; RV64-1024-NEXT: addi a1, zero, 110 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s t1, v16 -; RV64-1024-NEXT: addi a1, zero, 109 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s a5, v16 -; RV64-1024-NEXT: addi a1, zero, 108 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s a3, v16 -; RV64-1024-NEXT: addi a1, zero, 107 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: addi a2, zero, 106 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a2 -; RV64-1024-NEXT: vmv.x.s a2, v16 -; RV64-1024-NEXT: sd a2, 504(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: addi a4, zero, 105 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a4 -; RV64-1024-NEXT: vmv.x.s a2, v16 -; RV64-1024-NEXT: addi s1, zero, 104 -; RV64-1024-NEXT: vslidedown.vx v16, v8, s1 -; RV64-1024-NEXT: vmv.x.s a4, v16 -; RV64-1024-NEXT: addi s1, zero, 103 -; RV64-1024-NEXT: vslidedown.vx v24, v8, s1 -; RV64-1024-NEXT: addi s1, zero, 102 -; RV64-1024-NEXT: vslidedown.vx v16, v8, s1 -; RV64-1024-NEXT: addi s1, zero, 101 -; RV64-1024-NEXT: vslidedown.vx v0, v8, s1 -; RV64-1024-NEXT: vmv.x.s s1, v24 -; RV64-1024-NEXT: sd s1, 496(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sh t5, 1016(sp) -; RV64-1024-NEXT: srli s1, t5, 32 -; RV64-1024-NEXT: sh s1, 1020(sp) -; RV64-1024-NEXT: addi s1, zero, 100 -; RV64-1024-NEXT: vslidedown.vx v24, v8, s1 -; RV64-1024-NEXT: vmv.x.s t5, v16 -; RV64-1024-NEXT: sh t6, 1008(sp) -; RV64-1024-NEXT: srli s1, t6, 32 -; RV64-1024-NEXT: sh s1, 1012(sp) -; RV64-1024-NEXT: addi s1, zero, 99 -; RV64-1024-NEXT: vslidedown.vx v16, v8, s1 -; RV64-1024-NEXT: vmv.x.s t6, v0 -; RV64-1024-NEXT: sh s2, 1000(sp) -; RV64-1024-NEXT: srli s1, s2, 32 -; RV64-1024-NEXT: sh s1, 1004(sp) -; RV64-1024-NEXT: addi s1, zero, 98 -; RV64-1024-NEXT: vslidedown.vx v0, v8, s1 -; RV64-1024-NEXT: vmv.x.s s2, v24 -; RV64-1024-NEXT: sh s3, 992(sp) -; RV64-1024-NEXT: srli s1, s3, 32 -; RV64-1024-NEXT: sh s1, 996(sp) -; RV64-1024-NEXT: addi s1, zero, 97 -; RV64-1024-NEXT: vslidedown.vx v24, v8, s1 -; RV64-1024-NEXT: vmv.x.s s3, v16 -; RV64-1024-NEXT: sh s4, 984(sp) -; RV64-1024-NEXT: srli s1, s4, 32 -; RV64-1024-NEXT: sh s1, 988(sp) -; RV64-1024-NEXT: addi s1, zero, 96 -; RV64-1024-NEXT: vslidedown.vx v16, v8, s1 -; RV64-1024-NEXT: vmv.x.s s4, v0 -; RV64-1024-NEXT: sh s5, 976(sp) -; RV64-1024-NEXT: srli s1, s5, 32 -; RV64-1024-NEXT: sh s1, 980(sp) -; RV64-1024-NEXT: addi s1, zero, 95 -; RV64-1024-NEXT: vslidedown.vx v0, v8, s1 -; RV64-1024-NEXT: vmv.x.s s5, v24 -; RV64-1024-NEXT: sh s6, 968(sp) -; RV64-1024-NEXT: srli s1, s6, 32 -; RV64-1024-NEXT: sh s1, 972(sp) -; RV64-1024-NEXT: addi s1, zero, 94 -; RV64-1024-NEXT: vslidedown.vx v24, v8, s1 -; RV64-1024-NEXT: vmv.x.s s6, v16 -; RV64-1024-NEXT: sh s7, 960(sp) -; RV64-1024-NEXT: srli s1, s7, 32 -; RV64-1024-NEXT: sh s1, 964(sp) -; RV64-1024-NEXT: addi s1, zero, 93 -; RV64-1024-NEXT: vslidedown.vx v16, v8, s1 -; RV64-1024-NEXT: vmv.x.s s7, v0 -; RV64-1024-NEXT: sh s8, 952(sp) -; RV64-1024-NEXT: srli s1, s8, 32 -; RV64-1024-NEXT: sh s1, 956(sp) -; RV64-1024-NEXT: addi s1, zero, 92 -; RV64-1024-NEXT: vslidedown.vx v0, v8, s1 -; RV64-1024-NEXT: vmv.x.s s8, v24 -; RV64-1024-NEXT: sh s9, 944(sp) -; RV64-1024-NEXT: srli s1, s9, 32 -; RV64-1024-NEXT: sh s1, 948(sp) -; RV64-1024-NEXT: addi s1, zero, 91 -; RV64-1024-NEXT: vslidedown.vx v24, v8, s1 -; RV64-1024-NEXT: vmv.x.s s9, v16 -; RV64-1024-NEXT: sh s10, 936(sp) -; RV64-1024-NEXT: srli s1, s10, 32 -; RV64-1024-NEXT: sh s1, 940(sp) -; RV64-1024-NEXT: addi s1, zero, 90 -; RV64-1024-NEXT: vslidedown.vx v16, v8, s1 -; RV64-1024-NEXT: vmv.x.s s10, v0 -; RV64-1024-NEXT: sh s11, 928(sp) -; RV64-1024-NEXT: srli s1, s11, 32 -; RV64-1024-NEXT: sh s1, 932(sp) -; RV64-1024-NEXT: addi s1, zero, 89 -; RV64-1024-NEXT: vslidedown.vx v0, v8, s1 -; RV64-1024-NEXT: vmv.x.s s11, v24 -; RV64-1024-NEXT: sh t4, 920(sp) -; RV64-1024-NEXT: srli s1, t4, 32 -; RV64-1024-NEXT: sh s1, 924(sp) -; RV64-1024-NEXT: addi s1, zero, 88 -; RV64-1024-NEXT: vslidedown.vx v24, v8, s1 -; RV64-1024-NEXT: vmv.x.s s1, v16 -; RV64-1024-NEXT: sd s1, 488(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sh ra, 912(sp) -; RV64-1024-NEXT: srli s1, ra, 32 -; RV64-1024-NEXT: sh s1, 916(sp) -; RV64-1024-NEXT: addi s1, zero, 87 -; RV64-1024-NEXT: vslidedown.vx v16, v8, s1 -; RV64-1024-NEXT: vmv.x.s s1, v0 -; RV64-1024-NEXT: sd s1, 480(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sh a6, 904(sp) -; RV64-1024-NEXT: srli s1, a6, 32 -; RV64-1024-NEXT: sh s1, 908(sp) -; RV64-1024-NEXT: addi s1, zero, 86 -; RV64-1024-NEXT: vslidedown.vx v0, v8, s1 -; RV64-1024-NEXT: vmv.x.s s1, v24 -; RV64-1024-NEXT: sd s1, 472(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sh a7, 896(sp) -; RV64-1024-NEXT: srli s1, a7, 32 -; RV64-1024-NEXT: sh s1, 900(sp) -; RV64-1024-NEXT: addi s1, zero, 85 -; RV64-1024-NEXT: vslidedown.vx v24, v8, s1 -; RV64-1024-NEXT: vmv.x.s s1, v16 -; RV64-1024-NEXT: sd s1, 464(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sh t0, 888(sp) -; RV64-1024-NEXT: srli s1, t0, 32 -; RV64-1024-NEXT: sh s1, 892(sp) -; RV64-1024-NEXT: addi s1, zero, 84 -; RV64-1024-NEXT: vslidedown.vx v16, v8, s1 -; RV64-1024-NEXT: vmv.x.s s1, v0 -; RV64-1024-NEXT: sd s1, 456(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: sh t1, 880(sp) -; RV64-1024-NEXT: srli s1, t1, 32 -; RV64-1024-NEXT: sh s1, 884(sp) -; RV64-1024-NEXT: addi s1, zero, 83 -; RV64-1024-NEXT: vslidedown.vx v0, v8, s1 -; RV64-1024-NEXT: vmv.x.s t1, v24 -; RV64-1024-NEXT: sh a5, 872(sp) -; RV64-1024-NEXT: srli a5, a5, 32 -; RV64-1024-NEXT: sh a5, 876(sp) -; RV64-1024-NEXT: addi a5, zero, 82 -; RV64-1024-NEXT: vslidedown.vx v24, v8, a5 -; RV64-1024-NEXT: vmv.x.s t2, v16 -; RV64-1024-NEXT: sh a3, 864(sp) -; RV64-1024-NEXT: srli a3, a3, 32 -; RV64-1024-NEXT: sh a3, 868(sp) -; RV64-1024-NEXT: addi a3, zero, 81 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a3 -; RV64-1024-NEXT: vmv.x.s t3, v0 -; RV64-1024-NEXT: sh a1, 856(sp) -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 860(sp) -; RV64-1024-NEXT: addi a1, zero, 80 -; RV64-1024-NEXT: vslidedown.vx v0, v8, a1 -; RV64-1024-NEXT: vmv.x.s t4, v24 -; RV64-1024-NEXT: ld a1, 504(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: sh a1, 848(sp) -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 852(sp) -; RV64-1024-NEXT: addi a1, zero, 79 -; RV64-1024-NEXT: vslidedown.vx v24, v8, a1 -; RV64-1024-NEXT: vmv.x.s ra, v16 -; RV64-1024-NEXT: sh a2, 840(sp) -; RV64-1024-NEXT: srli a2, a2, 32 -; RV64-1024-NEXT: sh a2, 844(sp) -; RV64-1024-NEXT: addi a2, zero, 78 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a2 -; RV64-1024-NEXT: vmv.x.s a6, v0 -; RV64-1024-NEXT: sh a4, 832(sp) -; RV64-1024-NEXT: srli a4, a4, 32 -; RV64-1024-NEXT: sh a4, 836(sp) -; RV64-1024-NEXT: addi a4, zero, 77 -; RV64-1024-NEXT: vslidedown.vx v0, v8, a4 -; RV64-1024-NEXT: vmv.x.s a7, v24 -; RV64-1024-NEXT: ld a1, 496(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: sh a1, 824(sp) -; RV64-1024-NEXT: srli s1, a1, 32 -; RV64-1024-NEXT: sh s1, 828(sp) -; RV64-1024-NEXT: addi s1, zero, 76 -; RV64-1024-NEXT: vslidedown.vx v24, v8, s1 -; RV64-1024-NEXT: vmv.x.s t0, v16 -; RV64-1024-NEXT: sh t5, 816(sp) -; RV64-1024-NEXT: srli a5, t5, 32 -; RV64-1024-NEXT: sh a5, 820(sp) -; RV64-1024-NEXT: addi a5, zero, 75 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a5 -; RV64-1024-NEXT: vmv.x.s t5, v0 -; RV64-1024-NEXT: sh t6, 808(sp) -; RV64-1024-NEXT: srli a3, t6, 32 -; RV64-1024-NEXT: sh a3, 812(sp) -; RV64-1024-NEXT: addi a3, zero, 74 -; RV64-1024-NEXT: vslidedown.vx v0, v8, a3 -; RV64-1024-NEXT: vmv.x.s t6, v24 -; RV64-1024-NEXT: sh s2, 800(sp) -; RV64-1024-NEXT: srli a1, s2, 32 -; RV64-1024-NEXT: sh a1, 804(sp) -; RV64-1024-NEXT: addi a1, zero, 73 -; RV64-1024-NEXT: vslidedown.vx v24, v8, a1 -; RV64-1024-NEXT: vmv.x.s s2, v16 -; RV64-1024-NEXT: sh s3, 792(sp) -; RV64-1024-NEXT: srli a2, s3, 32 -; RV64-1024-NEXT: sh a2, 796(sp) -; RV64-1024-NEXT: addi a2, zero, 72 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a2 -; RV64-1024-NEXT: vmv.x.s s3, v0 -; RV64-1024-NEXT: sh s4, 784(sp) -; RV64-1024-NEXT: srli a4, s4, 32 -; RV64-1024-NEXT: sh a4, 788(sp) -; RV64-1024-NEXT: addi a4, zero, 71 -; RV64-1024-NEXT: vslidedown.vx v0, v8, a4 -; RV64-1024-NEXT: vmv.x.s s4, v24 -; RV64-1024-NEXT: sh s5, 776(sp) -; RV64-1024-NEXT: srli s1, s5, 32 -; RV64-1024-NEXT: sh s1, 780(sp) -; RV64-1024-NEXT: addi s1, zero, 70 -; RV64-1024-NEXT: vslidedown.vx v24, v8, s1 -; RV64-1024-NEXT: vmv.x.s s5, v16 -; RV64-1024-NEXT: sh s6, 768(sp) -; RV64-1024-NEXT: srli a5, s6, 32 -; RV64-1024-NEXT: sh a5, 772(sp) -; RV64-1024-NEXT: addi a5, zero, 69 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a5 -; RV64-1024-NEXT: vmv.x.s s6, v0 -; RV64-1024-NEXT: sh s7, 760(sp) -; RV64-1024-NEXT: srli a3, s7, 32 -; RV64-1024-NEXT: sh a3, 764(sp) -; RV64-1024-NEXT: addi a3, zero, 68 -; RV64-1024-NEXT: vslidedown.vx v0, v8, a3 -; RV64-1024-NEXT: vmv.x.s s7, v24 -; RV64-1024-NEXT: sh s8, 752(sp) -; RV64-1024-NEXT: srli a1, s8, 32 -; RV64-1024-NEXT: sh a1, 756(sp) -; RV64-1024-NEXT: addi a1, zero, 67 -; RV64-1024-NEXT: vslidedown.vx v24, v8, a1 -; RV64-1024-NEXT: vmv.x.s s8, v16 -; RV64-1024-NEXT: sh s9, 744(sp) -; RV64-1024-NEXT: srli a2, s9, 32 -; RV64-1024-NEXT: sh a2, 748(sp) -; RV64-1024-NEXT: addi a2, zero, 66 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a2 -; RV64-1024-NEXT: vmv.x.s s9, v0 -; RV64-1024-NEXT: sh s10, 736(sp) -; RV64-1024-NEXT: srli a4, s10, 32 -; RV64-1024-NEXT: sh a4, 740(sp) -; RV64-1024-NEXT: addi a4, zero, 65 -; RV64-1024-NEXT: vslidedown.vx v0, v8, a4 -; RV64-1024-NEXT: addi a1, sp, 1944 -; RV64-1024-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill -; RV64-1024-NEXT: vmv.x.s s10, v24 -; RV64-1024-NEXT: sh s11, 728(sp) -; RV64-1024-NEXT: srli s1, s11, 32 -; RV64-1024-NEXT: sh s1, 732(sp) -; RV64-1024-NEXT: addi s1, zero, 64 -; RV64-1024-NEXT: vslidedown.vx v0, v8, s1 -; RV64-1024-NEXT: vmv.x.s s11, v16 -; RV64-1024-NEXT: ld a1, 488(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: sh a1, 720(sp) -; RV64-1024-NEXT: ld a3, 480(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: sh a3, 712(sp) -; RV64-1024-NEXT: ld a2, 472(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: sh a2, 704(sp) -; RV64-1024-NEXT: ld a4, 464(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: sh a4, 696(sp) -; RV64-1024-NEXT: ld s1, 456(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: sh s1, 688(sp) -; RV64-1024-NEXT: sh t1, 680(sp) -; RV64-1024-NEXT: sh t2, 672(sp) -; RV64-1024-NEXT: sh t3, 664(sp) -; RV64-1024-NEXT: sh t4, 656(sp) -; RV64-1024-NEXT: sh ra, 648(sp) -; RV64-1024-NEXT: sh a6, 640(sp) -; RV64-1024-NEXT: sh a7, 632(sp) -; RV64-1024-NEXT: sh t0, 624(sp) -; RV64-1024-NEXT: sh t5, 616(sp) -; RV64-1024-NEXT: sh t6, 608(sp) -; RV64-1024-NEXT: sh s2, 600(sp) -; RV64-1024-NEXT: sh s3, 592(sp) -; RV64-1024-NEXT: sh s4, 584(sp) -; RV64-1024-NEXT: sh s5, 576(sp) -; RV64-1024-NEXT: sh s6, 568(sp) -; RV64-1024-NEXT: sh s7, 560(sp) -; RV64-1024-NEXT: sh s8, 552(sp) -; RV64-1024-NEXT: sh s9, 544(sp) -; RV64-1024-NEXT: sh s10, 536(sp) -; RV64-1024-NEXT: sh s11, 528(sp) -; RV64-1024-NEXT: srli a5, a1, 32 -; RV64-1024-NEXT: sh a5, 724(sp) -; RV64-1024-NEXT: addi a1, sp, 1944 -; RV64-1024-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload -; RV64-1024-NEXT: vmv.x.s a5, v16 -; RV64-1024-NEXT: sh a5, 520(sp) -; RV64-1024-NEXT: srli a3, a3, 32 -; RV64-1024-NEXT: sh a3, 716(sp) -; RV64-1024-NEXT: vmv.x.s a3, v0 -; RV64-1024-NEXT: sh a3, 512(sp) -; RV64-1024-NEXT: srli a1, a2, 32 -; RV64-1024-NEXT: sh a1, 708(sp) -; RV64-1024-NEXT: srli a1, a4, 32 -; RV64-1024-NEXT: sh a1, 700(sp) -; RV64-1024-NEXT: srli a1, s1, 32 -; RV64-1024-NEXT: sh a1, 692(sp) -; RV64-1024-NEXT: srli a1, t1, 32 -; RV64-1024-NEXT: sh a1, 684(sp) -; RV64-1024-NEXT: srli a1, t2, 32 -; RV64-1024-NEXT: sh a1, 676(sp) -; RV64-1024-NEXT: srli a1, t3, 32 -; RV64-1024-NEXT: sh a1, 668(sp) -; RV64-1024-NEXT: srli a1, t4, 32 -; RV64-1024-NEXT: sh a1, 660(sp) -; RV64-1024-NEXT: srli a1, ra, 32 -; RV64-1024-NEXT: sh a1, 652(sp) -; RV64-1024-NEXT: srli a1, a6, 32 -; RV64-1024-NEXT: sh a1, 644(sp) -; RV64-1024-NEXT: srli a1, a7, 32 -; RV64-1024-NEXT: sh a1, 636(sp) -; RV64-1024-NEXT: srli a1, t0, 32 -; RV64-1024-NEXT: sh a1, 628(sp) -; RV64-1024-NEXT: srli a1, t5, 32 -; RV64-1024-NEXT: sh a1, 620(sp) -; RV64-1024-NEXT: srli a1, t6, 32 -; RV64-1024-NEXT: sh a1, 612(sp) -; RV64-1024-NEXT: srli a1, s2, 32 -; RV64-1024-NEXT: sh a1, 604(sp) -; RV64-1024-NEXT: srli a1, s3, 32 -; RV64-1024-NEXT: sh a1, 596(sp) -; RV64-1024-NEXT: srli a1, s4, 32 -; RV64-1024-NEXT: sh a1, 588(sp) -; RV64-1024-NEXT: srli a1, s5, 32 -; RV64-1024-NEXT: sh a1, 580(sp) -; RV64-1024-NEXT: srli a1, s6, 32 -; RV64-1024-NEXT: sh a1, 572(sp) -; RV64-1024-NEXT: srli a1, s7, 32 -; RV64-1024-NEXT: sh a1, 564(sp) -; RV64-1024-NEXT: srli a1, s8, 32 -; RV64-1024-NEXT: sh a1, 556(sp) -; RV64-1024-NEXT: srli a1, s9, 32 -; RV64-1024-NEXT: sh a1, 548(sp) -; RV64-1024-NEXT: srli a1, s10, 32 -; RV64-1024-NEXT: sh a1, 540(sp) -; RV64-1024-NEXT: srli a1, s11, 32 -; RV64-1024-NEXT: sh a1, 532(sp) -; RV64-1024-NEXT: srli a1, a5, 32 -; RV64-1024-NEXT: sh a1, 524(sp) -; RV64-1024-NEXT: srli a1, a3, 32 -; RV64-1024-NEXT: sh a1, 516(sp) -; RV64-1024-NEXT: addi a1, zero, 63 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s5, v16 -; RV64-1024-NEXT: addi a1, zero, 62 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s4, v16 -; RV64-1024-NEXT: addi a1, zero, 61 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s2, v16 -; RV64-1024-NEXT: addi a1, zero, 60 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s t5, v16 -; RV64-1024-NEXT: addi a1, zero, 59 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sd a1, 488(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: addi a1, zero, 58 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sd a1, 504(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: addi a1, zero, 57 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sd a1, 496(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: addi a1, zero, 56 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sd a1, 480(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: addi a1, zero, 55 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sd a1, 472(sp) # 8-byte Folded Spill -; RV64-1024-NEXT: addi a1, zero, 54 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s t4, v16 -; RV64-1024-NEXT: addi a1, zero, 53 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s t6, v16 -; RV64-1024-NEXT: addi a1, zero, 52 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s3, v16 -; RV64-1024-NEXT: addi a1, zero, 51 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s6, v16 -; RV64-1024-NEXT: addi a1, zero, 50 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s7, v16 -; RV64-1024-NEXT: addi a1, zero, 49 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s8, v16 -; RV64-1024-NEXT: addi a1, zero, 48 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s9, v16 -; RV64-1024-NEXT: addi a1, zero, 47 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s10, v16 -; RV64-1024-NEXT: addi a1, zero, 46 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s s11, v16 -; RV64-1024-NEXT: addi a1, zero, 45 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s ra, v16 -; RV64-1024-NEXT: addi a1, zero, 44 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s a6, v16 -; RV64-1024-NEXT: addi a1, zero, 43 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s a7, v16 -; RV64-1024-NEXT: addi a1, zero, 42 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s t0, v16 -; RV64-1024-NEXT: addi a1, zero, 41 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s t1, v16 -; RV64-1024-NEXT: addi a1, zero, 40 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a1 -; RV64-1024-NEXT: vmv.x.s t2, v16 -; RV64-1024-NEXT: addi s1, zero, 39 -; RV64-1024-NEXT: vslidedown.vx v16, v8, s1 -; RV64-1024-NEXT: addi s1, zero, 38 -; RV64-1024-NEXT: vslidedown.vx v0, v8, s1 -; RV64-1024-NEXT: vmv.x.s s1, v8 -; RV64-1024-NEXT: sh s1, 1024(sp) -; RV64-1024-NEXT: srli s1, s1, 32 -; RV64-1024-NEXT: sh s1, 1028(sp) -; RV64-1024-NEXT: addi s1, zero, 37 -; RV64-1024-NEXT: vslidedown.vx v24, v8, s1 -; RV64-1024-NEXT: vmv.x.s t3, v16 -; RV64-1024-NEXT: sh s5, 1528(sp) -; RV64-1024-NEXT: srli a2, s5, 32 -; RV64-1024-NEXT: sh a2, 1532(sp) -; RV64-1024-NEXT: addi a2, zero, 36 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a2 -; RV64-1024-NEXT: vmv.x.s a2, v0 -; RV64-1024-NEXT: sh s4, 1520(sp) -; RV64-1024-NEXT: srli a3, s4, 32 -; RV64-1024-NEXT: sh a3, 1524(sp) -; RV64-1024-NEXT: addi a3, zero, 35 -; RV64-1024-NEXT: vslidedown.vx v0, v8, a3 -; RV64-1024-NEXT: vmv.x.s a3, v24 -; RV64-1024-NEXT: sh s2, 1512(sp) -; RV64-1024-NEXT: srli a4, s2, 32 -; RV64-1024-NEXT: sh a4, 1516(sp) -; RV64-1024-NEXT: addi a4, zero, 34 -; RV64-1024-NEXT: vslidedown.vx v24, v8, a4 -; RV64-1024-NEXT: vmv.x.s a4, v16 -; RV64-1024-NEXT: sh t5, 1504(sp) -; RV64-1024-NEXT: srli a5, t5, 32 -; RV64-1024-NEXT: sh a5, 1508(sp) -; RV64-1024-NEXT: addi a5, zero, 33 -; RV64-1024-NEXT: vslidedown.vx v16, v8, a5 -; RV64-1024-NEXT: vmv.x.s a5, v0 -; RV64-1024-NEXT: ld a1, 488(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: sh a1, 1496(sp) -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1500(sp) -; RV64-1024-NEXT: addi a1, zero, 32 -; RV64-1024-NEXT: vslidedown.vx v0, v8, a1 -; RV64-1024-NEXT: vmv.x.s a1, v24 -; RV64-1024-NEXT: ld s1, 504(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: sh s1, 1488(sp) -; RV64-1024-NEXT: srli s1, s1, 32 -; RV64-1024-NEXT: sh s1, 1492(sp) -; RV64-1024-NEXT: ld s1, 496(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: sh s1, 1480(sp) -; RV64-1024-NEXT: srli s1, s1, 32 -; RV64-1024-NEXT: sh s1, 1484(sp) -; RV64-1024-NEXT: ld s1, 480(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: sh s1, 1472(sp) -; RV64-1024-NEXT: srli s1, s1, 32 -; RV64-1024-NEXT: sh s1, 1476(sp) -; RV64-1024-NEXT: ld s1, 472(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: sh s1, 1464(sp) -; RV64-1024-NEXT: srli s1, s1, 32 -; RV64-1024-NEXT: sh s1, 1468(sp) -; RV64-1024-NEXT: sh t4, 1456(sp) -; RV64-1024-NEXT: srli s1, t4, 32 -; RV64-1024-NEXT: sh s1, 1460(sp) -; RV64-1024-NEXT: sh t6, 1448(sp) -; RV64-1024-NEXT: srli s1, t6, 32 -; RV64-1024-NEXT: sh s1, 1452(sp) -; RV64-1024-NEXT: sh s3, 1440(sp) -; RV64-1024-NEXT: srli s1, s3, 32 -; RV64-1024-NEXT: sh s1, 1444(sp) -; RV64-1024-NEXT: sh s6, 1432(sp) -; RV64-1024-NEXT: srli s1, s6, 32 -; RV64-1024-NEXT: sh s1, 1436(sp) -; RV64-1024-NEXT: sh s7, 1424(sp) -; RV64-1024-NEXT: srli s1, s7, 32 -; RV64-1024-NEXT: sh s1, 1428(sp) -; RV64-1024-NEXT: sh s8, 1416(sp) -; RV64-1024-NEXT: srli s1, s8, 32 -; RV64-1024-NEXT: sh s1, 1420(sp) -; RV64-1024-NEXT: sh s9, 1408(sp) -; RV64-1024-NEXT: srli s1, s9, 32 -; RV64-1024-NEXT: sh s1, 1412(sp) -; RV64-1024-NEXT: sh s10, 1400(sp) -; RV64-1024-NEXT: srli s1, s10, 32 -; RV64-1024-NEXT: sh s1, 1404(sp) -; RV64-1024-NEXT: sh s11, 1392(sp) -; RV64-1024-NEXT: srli s1, s11, 32 -; RV64-1024-NEXT: sh s1, 1396(sp) -; RV64-1024-NEXT: sh ra, 1384(sp) -; RV64-1024-NEXT: srli s1, ra, 32 -; RV64-1024-NEXT: sh s1, 1388(sp) -; RV64-1024-NEXT: sh a6, 1376(sp) -; RV64-1024-NEXT: srli s1, a6, 32 -; RV64-1024-NEXT: sh s1, 1380(sp) -; RV64-1024-NEXT: sh a7, 1368(sp) -; RV64-1024-NEXT: srli s1, a7, 32 -; RV64-1024-NEXT: sh s1, 1372(sp) -; RV64-1024-NEXT: sh t0, 1360(sp) -; RV64-1024-NEXT: srli s1, t0, 32 -; RV64-1024-NEXT: sh s1, 1364(sp) -; RV64-1024-NEXT: sh t1, 1352(sp) -; RV64-1024-NEXT: srli s1, t1, 32 -; RV64-1024-NEXT: sh s1, 1356(sp) -; RV64-1024-NEXT: sh t2, 1344(sp) -; RV64-1024-NEXT: srli s1, t2, 32 -; RV64-1024-NEXT: sh s1, 1348(sp) -; RV64-1024-NEXT: sh t3, 1336(sp) -; RV64-1024-NEXT: srli s1, t3, 32 -; RV64-1024-NEXT: sh s1, 1340(sp) -; RV64-1024-NEXT: sh a2, 1328(sp) -; RV64-1024-NEXT: srli a2, a2, 32 -; RV64-1024-NEXT: sh a2, 1332(sp) -; RV64-1024-NEXT: sh a3, 1320(sp) -; RV64-1024-NEXT: srli a2, a3, 32 -; RV64-1024-NEXT: sh a2, 1324(sp) -; RV64-1024-NEXT: sh a4, 1312(sp) -; RV64-1024-NEXT: srli a2, a4, 32 -; RV64-1024-NEXT: sh a2, 1316(sp) -; RV64-1024-NEXT: sh a5, 1304(sp) -; RV64-1024-NEXT: srli a2, a5, 32 -; RV64-1024-NEXT: sh a2, 1308(sp) -; RV64-1024-NEXT: sh a1, 1296(sp) -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1300(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1288(sp) -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1292(sp) -; RV64-1024-NEXT: vmv.x.s a1, v0 -; RV64-1024-NEXT: sh a1, 1280(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 31 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1284(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1272(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 30 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1276(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1264(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 29 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1268(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1256(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 28 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1260(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1248(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 27 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1252(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1240(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 26 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1244(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1232(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 25 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1236(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1224(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 24 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1228(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1216(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 23 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1220(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1208(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 22 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1212(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1200(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 21 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1204(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1192(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 20 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1196(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1184(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 19 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1188(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1176(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 18 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1180(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1168(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 17 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1172(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1160(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 16 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1164(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1152(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 15 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1156(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1144(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 14 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1148(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1136(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 13 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1140(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1128(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 12 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1132(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1120(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 11 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1124(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1112(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 10 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1116(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1104(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 9 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1108(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1096(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 8 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1100(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1088(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 7 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1092(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1080(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 6 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1084(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1072(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 5 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1076(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1064(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 4 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1068(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1056(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 3 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1060(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1048(sp) -; RV64-1024-NEXT: vslidedown.vi v16, v8, 2 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1052(sp) -; RV64-1024-NEXT: vmv.x.s a1, v16 -; RV64-1024-NEXT: sh a1, 1040(sp) -; RV64-1024-NEXT: vslidedown.vi v8, v8, 1 -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1044(sp) -; RV64-1024-NEXT: vmv.x.s a1, v8 -; RV64-1024-NEXT: sh a1, 1032(sp) -; RV64-1024-NEXT: srli a1, a1, 32 -; RV64-1024-NEXT: sh a1, 1036(sp) -; RV64-1024-NEXT: addi a2, zero, 256 -; RV64-1024-NEXT: vsetvli zero, a2, e16, m4, ta, mu -; RV64-1024-NEXT: addi a1, sp, 512 -; RV64-1024-NEXT: vle16.v v8, (a1) -; RV64-1024-NEXT: addi a1, sp, 1024 -; RV64-1024-NEXT: vle16.v v28, (a1) -; RV64-1024-NEXT: lui a1, 1026731 -; RV64-1024-NEXT: addiw a1, a1, -1365 -; RV64-1024-NEXT: slli a1, a1, 12 -; RV64-1024-NEXT: addi a1, a1, -1365 -; RV64-1024-NEXT: slli a1, a1, 12 -; RV64-1024-NEXT: addi a1, a1, -1365 -; RV64-1024-NEXT: slli a1, a1, 12 -; RV64-1024-NEXT: addi a1, a1, -1366 -; RV64-1024-NEXT: vsetivli zero, 4, e64, m1, ta, mu -; RV64-1024-NEXT: vmv.s.x v25, a1 +; RV64-1024-NEXT: csrr a2, vlenb +; RV64-1024-NEXT: slli a2, a2, 3 +; RV64-1024-NEXT: add a2, sp, a2 +; RV64-1024-NEXT: addi a2, a2, 16 +; RV64-1024-NEXT: vl8re8.v v0, (a2) # Unknown-size Folded Reload +; RV64-1024-NEXT: vrgather.vv v16, v0, v24 +; RV64-1024-NEXT: lui a2, 1026731 +; RV64-1024-NEXT: addiw a2, a2, -1365 +; RV64-1024-NEXT: slli a2, a2, 12 +; RV64-1024-NEXT: addi a2, a2, -1365 +; RV64-1024-NEXT: slli a2, a2, 12 +; RV64-1024-NEXT: addi a2, a2, -1365 +; RV64-1024-NEXT: slli a2, a2, 12 +; RV64-1024-NEXT: addi a2, a2, -1366 +; RV64-1024-NEXT: vsetivli zero, 8, e64, m1, ta, mu +; RV64-1024-NEXT: vmv.s.x v25, a2 ; RV64-1024-NEXT: vsetivli zero, 2, e64, m1, tu, mu ; RV64-1024-NEXT: vmv1r.v v0, v25 ; RV64-1024-NEXT: vslideup.vi v0, v25, 1 @@ -979,893 +204,69 @@ ; RV64-1024-NEXT: vslideup.vi v0, v25, 2 ; RV64-1024-NEXT: vsetivli zero, 4, e64, m1, tu, mu ; RV64-1024-NEXT: vslideup.vi v0, v25, 3 -; RV64-1024-NEXT: vsetvli zero, a2, e16, m4, ta, mu -; RV64-1024-NEXT: lui a1, %hi(.LCPI1_2) -; RV64-1024-NEXT: addi a1, a1, %lo(.LCPI1_2) -; RV64-1024-NEXT: vle16.v v12, (a1) -; RV64-1024-NEXT: lui a1, %hi(.LCPI1_3) -; RV64-1024-NEXT: addi a1, a1, %lo(.LCPI1_3) -; RV64-1024-NEXT: vle16.v v16, (a1) -; RV64-1024-NEXT: vrgather.vv v20, v28, v12 -; RV64-1024-NEXT: vsetvli zero, zero, e16, m4, tu, mu -; RV64-1024-NEXT: csrr a1, vlenb -; RV64-1024-NEXT: slli a1, a1, 3 -; RV64-1024-NEXT: add a1, sp, a1 -; RV64-1024-NEXT: addi a1, a1, 1944 -; RV64-1024-NEXT: vl4re8.v v24, (a1) # Unknown-size Folded Reload -; RV64-1024-NEXT: vrgather.vv v20, v24, v16, v0.t -; RV64-1024-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV64-1024-NEXT: lui a1, %hi(.LCPI1_4) -; RV64-1024-NEXT: addi a1, a1, %lo(.LCPI1_4) -; RV64-1024-NEXT: vle16.v v28, (a1) -; RV64-1024-NEXT: vrgather.vv v16, v8, v12 -; RV64-1024-NEXT: vsetvli zero, zero, e16, m4, tu, mu -; RV64-1024-NEXT: vrgather.vv v16, v24, v28, v0.t -; RV64-1024-NEXT: addi a1, a0, 512 -; RV64-1024-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; RV64-1024-NEXT: vse16.v v16, (a1) -; RV64-1024-NEXT: vse16.v v20, (a0) -; RV64-1024-NEXT: addi sp, s0, -2048 +; RV64-1024-NEXT: vsetivli zero, 5, e64, m1, tu, mu +; RV64-1024-NEXT: vslideup.vi v0, v25, 4 +; RV64-1024-NEXT: vsetivli zero, 6, e64, m1, tu, mu +; RV64-1024-NEXT: vslideup.vi v0, v25, 5 +; RV64-1024-NEXT: vsetivli zero, 7, e64, m1, tu, mu +; RV64-1024-NEXT: vslideup.vi v0, v25, 6 +; RV64-1024-NEXT: vsetivli zero, 8, e64, m1, tu, mu +; RV64-1024-NEXT: vslideup.vi v0, v25, 7 +; RV64-1024-NEXT: lui a2, %hi(.LCPI1_2) +; RV64-1024-NEXT: addi a2, a2, %lo(.LCPI1_2) +; RV64-1024-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; RV64-1024-NEXT: vle16.v v24, (a2) +; RV64-1024-NEXT: vsetvli zero, zero, e16, m8, tu, mu +; RV64-1024-NEXT: vrgather.vv v16, v8, v24, v0.t +; RV64-1024-NEXT: vsetvli zero, zero, e16, m8, ta, mu +; RV64-1024-NEXT: vse16.v v16, (a0) +; RV64-1024-NEXT: csrr a0, vlenb +; RV64-1024-NEXT: addi a1, zero, 24 +; RV64-1024-NEXT: mul a0, a0, a1 +; RV64-1024-NEXT: add sp, sp, a0 ; RV64-1024-NEXT: addi sp, sp, 16 -; RV64-1024-NEXT: ld s11, 1928(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: ld s10, 1936(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: ld s9, 1944(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: ld s8, 1952(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: ld s7, 1960(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: ld s6, 1968(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: ld s5, 1976(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: ld s4, 1984(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: ld s3, 1992(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: ld s2, 2000(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: ld s1, 2008(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload -; RV64-1024-NEXT: addi sp, sp, 2032 ; RV64-1024-NEXT: ret ; ; RV64-2048-LABEL: interleave512: ; RV64-2048: # %bb.0: # %entry -; RV64-2048-NEXT: addi sp, sp, -2032 -; RV64-2048-NEXT: .cfi_def_cfa_offset 2032 -; RV64-2048-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sd s1, 2008(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sd s2, 2000(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sd s3, 1992(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sd s4, 1984(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sd s5, 1976(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sd s6, 1968(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sd s7, 1960(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sd s8, 1952(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sd s9, 1944(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sd s10, 1936(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sd s11, 1928(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: .cfi_offset ra, -8 -; RV64-2048-NEXT: .cfi_offset s0, -16 -; RV64-2048-NEXT: .cfi_offset s1, -24 -; RV64-2048-NEXT: .cfi_offset s2, -32 -; RV64-2048-NEXT: .cfi_offset s3, -40 -; RV64-2048-NEXT: .cfi_offset s4, -48 -; RV64-2048-NEXT: .cfi_offset s5, -56 -; RV64-2048-NEXT: .cfi_offset s6, -64 -; RV64-2048-NEXT: .cfi_offset s7, -72 -; RV64-2048-NEXT: .cfi_offset s8, -80 -; RV64-2048-NEXT: .cfi_offset s9, -88 -; RV64-2048-NEXT: .cfi_offset s10, -96 -; RV64-2048-NEXT: .cfi_offset s11, -104 -; RV64-2048-NEXT: addi s0, sp, 2032 -; RV64-2048-NEXT: .cfi_def_cfa s0, 0 -; RV64-2048-NEXT: addi sp, sp, -16 -; RV64-2048-NEXT: csrr a3, vlenb -; RV64-2048-NEXT: addi a4, zero, 6 -; RV64-2048-NEXT: mul a3, a3, a4 -; RV64-2048-NEXT: sub sp, sp, a3 -; RV64-2048-NEXT: andi sp, sp, -512 ; RV64-2048-NEXT: addi a3, zero, 256 ; RV64-2048-NEXT: vsetvli zero, a3, e16, m2, ta, mu -; RV64-2048-NEXT: addi a3, zero, 256 -; RV64-2048-NEXT: vle16.v v26, (a1) -; RV64-2048-NEXT: lui a1, %hi(.LCPI1_0) -; RV64-2048-NEXT: addi a1, a1, %lo(.LCPI1_0) -; RV64-2048-NEXT: vle16.v v8, (a1) -; RV64-2048-NEXT: vle16.v v28, (a2) -; RV64-2048-NEXT: csrr a1, vlenb -; RV64-2048-NEXT: slli a1, a1, 2 -; RV64-2048-NEXT: add a1, sp, a1 -; RV64-2048-NEXT: addi a1, a1, 1944 -; RV64-2048-NEXT: vs2r.v v28, (a1) # Unknown-size Folded Spill -; RV64-2048-NEXT: vrgather.vv v12, v26, v8 -; RV64-2048-NEXT: vsetvli zero, zero, e32, m4, ta, mu +; RV64-2048-NEXT: vle16.v v12, (a1) +; RV64-2048-NEXT: vle16.v v8, (a2) +; RV64-2048-NEXT: addi a1, zero, 512 +; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; RV64-2048-NEXT: vmv.v.i v28, 0 -; RV64-2048-NEXT: addi a1, zero, 128 -; RV64-2048-NEXT: vsetvli zero, a1, e32, m4, tu, mu -; RV64-2048-NEXT: vslideup.vi v28, v12, 0 +; RV64-2048-NEXT: vsetvli zero, a3, e16, m4, tu, mu +; RV64-2048-NEXT: vmv4r.v v16, v28 +; RV64-2048-NEXT: vslideup.vi v16, v12, 0 +; RV64-2048-NEXT: vsetvli zero, a3, e16, m2, ta, mu +; RV64-2048-NEXT: vmv.v.i v12, 0 +; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, tu, mu +; RV64-2048-NEXT: vslideup.vx v16, v12, a3 +; RV64-2048-NEXT: lui a2, %hi(.LCPI1_0) +; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI1_0) +; RV64-2048-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; RV64-2048-NEXT: vle16.v v20, (a2) +; RV64-2048-NEXT: vrgather.vv v24, v16, v20 +; RV64-2048-NEXT: vsetvli zero, a3, e16, m4, tu, mu +; RV64-2048-NEXT: vslideup.vi v28, v8, 0 +; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, tu, mu +; RV64-2048-NEXT: vslideup.vx v28, v12, a3 ; RV64-2048-NEXT: lui a2, %hi(.LCPI1_1) ; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI1_1) -; RV64-2048-NEXT: vsetvli zero, a3, e16, m2, ta, mu -; RV64-2048-NEXT: vle16.v v10, (a2) -; RV64-2048-NEXT: vrgather.vv v12, v26, v10 -; RV64-2048-NEXT: vrgather.vv v16, v12, v8 -; RV64-2048-NEXT: vsetvli zero, zero, e32, m4, tu, mu -; RV64-2048-NEXT: vslideup.vx v28, v16, a1 -; RV64-2048-NEXT: addi a1, zero, 127 -; RV64-2048-NEXT: vsetivli zero, 1, e64, m4, ta, mu -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s a6, v8 -; RV64-2048-NEXT: addi a1, zero, 126 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s3, v8 -; RV64-2048-NEXT: addi a1, zero, 125 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s4, v8 -; RV64-2048-NEXT: addi a1, zero, 124 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s5, v8 -; RV64-2048-NEXT: addi a1, zero, 123 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s6, v8 -; RV64-2048-NEXT: addi a1, zero, 122 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s7, v8 -; RV64-2048-NEXT: addi a1, zero, 121 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s8, v8 -; RV64-2048-NEXT: addi a1, zero, 120 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s9, v8 -; RV64-2048-NEXT: addi a1, zero, 119 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s t4, v8 -; RV64-2048-NEXT: addi a1, zero, 118 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s t5, v8 -; RV64-2048-NEXT: addi a1, zero, 117 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s t6, v8 -; RV64-2048-NEXT: addi a1, zero, 116 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s10, v8 -; RV64-2048-NEXT: addi a1, zero, 115 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s11, v8 -; RV64-2048-NEXT: addi a1, zero, 114 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s t0, v8 -; RV64-2048-NEXT: addi a1, zero, 113 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s t1, v8 -; RV64-2048-NEXT: addi a1, zero, 112 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s t2, v8 -; RV64-2048-NEXT: addi a1, zero, 111 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s a3, v8 -; RV64-2048-NEXT: addi a1, zero, 110 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: addi a2, zero, 109 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a2 -; RV64-2048-NEXT: vmv.x.s a2, v8 -; RV64-2048-NEXT: addi a4, zero, 108 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a4 -; RV64-2048-NEXT: vmv.x.s a4, v8 -; RV64-2048-NEXT: addi s1, zero, 107 -; RV64-2048-NEXT: vslidedown.vx v8, v28, s1 -; RV64-2048-NEXT: vmv.x.s s2, v8 -; RV64-2048-NEXT: addi a5, zero, 106 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a5 -; RV64-2048-NEXT: vmv.x.s a5, v8 -; RV64-2048-NEXT: sd a5, 504(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: addi a5, zero, 105 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a5 -; RV64-2048-NEXT: vmv.x.s a5, v8 -; RV64-2048-NEXT: sd a5, 496(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: addi a5, zero, 104 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a5 -; RV64-2048-NEXT: vmv.x.s a5, v8 -; RV64-2048-NEXT: sd a5, 488(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: addi a5, zero, 103 -; RV64-2048-NEXT: vslidedown.vx v24, v28, a5 -; RV64-2048-NEXT: addi a5, zero, 102 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a5 -; RV64-2048-NEXT: addi a5, zero, 101 -; RV64-2048-NEXT: vslidedown.vx v12, v28, a5 -; RV64-2048-NEXT: addi a5, zero, 100 -; RV64-2048-NEXT: vslidedown.vx v16, v28, a5 -; RV64-2048-NEXT: addi a5, zero, 99 -; RV64-2048-NEXT: vslidedown.vx v20, v28, a5 -; RV64-2048-NEXT: addi a5, zero, 98 -; RV64-2048-NEXT: vslidedown.vx v0, v28, a5 -; RV64-2048-NEXT: addi a5, zero, 97 -; RV64-2048-NEXT: vslidedown.vx v4, v28, a5 -; RV64-2048-NEXT: vmv.x.s a5, v24 -; RV64-2048-NEXT: sd a5, 480(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sh a6, 1016(sp) -; RV64-2048-NEXT: srli a5, a6, 32 -; RV64-2048-NEXT: sh a5, 1020(sp) -; RV64-2048-NEXT: addi a5, zero, 96 -; RV64-2048-NEXT: vslidedown.vx v24, v28, a5 -; RV64-2048-NEXT: vmv.x.s s1, v8 -; RV64-2048-NEXT: sh s3, 1008(sp) -; RV64-2048-NEXT: srli a5, s3, 32 -; RV64-2048-NEXT: sh a5, 1012(sp) -; RV64-2048-NEXT: addi a5, zero, 95 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a5 -; RV64-2048-NEXT: vmv.x.s ra, v12 -; RV64-2048-NEXT: sh s4, 1000(sp) -; RV64-2048-NEXT: srli a5, s4, 32 -; RV64-2048-NEXT: sh a5, 1004(sp) -; RV64-2048-NEXT: addi a5, zero, 94 -; RV64-2048-NEXT: vslidedown.vx v12, v28, a5 -; RV64-2048-NEXT: vmv.x.s a6, v16 -; RV64-2048-NEXT: sh s5, 992(sp) -; RV64-2048-NEXT: srli a5, s5, 32 -; RV64-2048-NEXT: sh a5, 996(sp) -; RV64-2048-NEXT: addi a5, zero, 93 -; RV64-2048-NEXT: vslidedown.vx v16, v28, a5 -; RV64-2048-NEXT: vmv.x.s s5, v20 -; RV64-2048-NEXT: sh s6, 984(sp) -; RV64-2048-NEXT: srli a5, s6, 32 -; RV64-2048-NEXT: sh a5, 988(sp) -; RV64-2048-NEXT: addi a5, zero, 92 -; RV64-2048-NEXT: vslidedown.vx v20, v28, a5 -; RV64-2048-NEXT: vmv.x.s s6, v0 -; RV64-2048-NEXT: sh s7, 976(sp) -; RV64-2048-NEXT: srli a5, s7, 32 -; RV64-2048-NEXT: sh a5, 980(sp) -; RV64-2048-NEXT: addi a5, zero, 91 -; RV64-2048-NEXT: vslidedown.vx v0, v28, a5 -; RV64-2048-NEXT: vmv.x.s s7, v4 -; RV64-2048-NEXT: sh s8, 968(sp) -; RV64-2048-NEXT: srli a5, s8, 32 -; RV64-2048-NEXT: sh a5, 972(sp) -; RV64-2048-NEXT: addi a5, zero, 90 -; RV64-2048-NEXT: vslidedown.vx v4, v28, a5 -; RV64-2048-NEXT: vmv.x.s s8, v24 -; RV64-2048-NEXT: sh s9, 960(sp) -; RV64-2048-NEXT: srli a5, s9, 32 -; RV64-2048-NEXT: sh a5, 964(sp) -; RV64-2048-NEXT: addi a5, zero, 89 -; RV64-2048-NEXT: vslidedown.vx v24, v28, a5 -; RV64-2048-NEXT: vmv.x.s s9, v8 -; RV64-2048-NEXT: sh t4, 952(sp) -; RV64-2048-NEXT: srli a5, t4, 32 -; RV64-2048-NEXT: sh a5, 956(sp) -; RV64-2048-NEXT: addi a5, zero, 88 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a5 -; RV64-2048-NEXT: vmv.x.s a5, v12 -; RV64-2048-NEXT: sd a5, 440(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sh t5, 944(sp) -; RV64-2048-NEXT: srli a5, t5, 32 -; RV64-2048-NEXT: sh a5, 948(sp) -; RV64-2048-NEXT: addi a5, zero, 87 -; RV64-2048-NEXT: vslidedown.vx v12, v28, a5 -; RV64-2048-NEXT: vmv.x.s a5, v16 -; RV64-2048-NEXT: sd a5, 472(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sh t6, 936(sp) -; RV64-2048-NEXT: srli a5, t6, 32 -; RV64-2048-NEXT: sh a5, 940(sp) -; RV64-2048-NEXT: addi a5, zero, 86 -; RV64-2048-NEXT: vslidedown.vx v16, v28, a5 -; RV64-2048-NEXT: vmv.x.s a5, v20 -; RV64-2048-NEXT: sd a5, 464(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sh s10, 928(sp) -; RV64-2048-NEXT: srli a5, s10, 32 -; RV64-2048-NEXT: sh a5, 932(sp) -; RV64-2048-NEXT: addi a5, zero, 85 -; RV64-2048-NEXT: vslidedown.vx v20, v28, a5 -; RV64-2048-NEXT: vmv.x.s a5, v0 -; RV64-2048-NEXT: sd a5, 456(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sh s11, 920(sp) -; RV64-2048-NEXT: srli a5, s11, 32 -; RV64-2048-NEXT: sh a5, 924(sp) -; RV64-2048-NEXT: addi a5, zero, 84 -; RV64-2048-NEXT: vslidedown.vx v0, v28, a5 -; RV64-2048-NEXT: vmv.x.s a5, v4 -; RV64-2048-NEXT: sd a5, 448(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sh t0, 912(sp) -; RV64-2048-NEXT: srli a5, t0, 32 -; RV64-2048-NEXT: sh a5, 916(sp) -; RV64-2048-NEXT: addi a5, zero, 83 -; RV64-2048-NEXT: vslidedown.vx v4, v28, a5 -; RV64-2048-NEXT: vmv.x.s a5, v24 -; RV64-2048-NEXT: sd a5, 432(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sh t1, 904(sp) -; RV64-2048-NEXT: srli a5, t1, 32 -; RV64-2048-NEXT: sh a5, 908(sp) -; RV64-2048-NEXT: addi a5, zero, 82 -; RV64-2048-NEXT: vslidedown.vx v24, v28, a5 -; RV64-2048-NEXT: vmv.x.s a7, v8 -; RV64-2048-NEXT: sh t2, 896(sp) -; RV64-2048-NEXT: srli a5, t2, 32 -; RV64-2048-NEXT: sh a5, 900(sp) -; RV64-2048-NEXT: addi a5, zero, 81 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a5 -; RV64-2048-NEXT: vmv.x.s t0, v12 -; RV64-2048-NEXT: sh a3, 888(sp) -; RV64-2048-NEXT: srli a3, a3, 32 -; RV64-2048-NEXT: sh a3, 892(sp) -; RV64-2048-NEXT: addi a3, zero, 80 -; RV64-2048-NEXT: vslidedown.vx v12, v28, a3 -; RV64-2048-NEXT: vmv.x.s t1, v16 -; RV64-2048-NEXT: sh a1, 880(sp) -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 884(sp) -; RV64-2048-NEXT: addi a1, zero, 79 -; RV64-2048-NEXT: vslidedown.vx v16, v28, a1 -; RV64-2048-NEXT: vmv.x.s t2, v20 -; RV64-2048-NEXT: sh a2, 872(sp) -; RV64-2048-NEXT: srli a2, a2, 32 -; RV64-2048-NEXT: sh a2, 876(sp) -; RV64-2048-NEXT: addi a2, zero, 78 -; RV64-2048-NEXT: vslidedown.vx v20, v28, a2 -; RV64-2048-NEXT: vmv.x.s t3, v0 -; RV64-2048-NEXT: sh a4, 864(sp) -; RV64-2048-NEXT: srli a4, a4, 32 -; RV64-2048-NEXT: sh a4, 868(sp) -; RV64-2048-NEXT: addi a4, zero, 77 -; RV64-2048-NEXT: vslidedown.vx v0, v28, a4 -; RV64-2048-NEXT: vmv.x.s t4, v4 -; RV64-2048-NEXT: sh s2, 856(sp) -; RV64-2048-NEXT: srli a5, s2, 32 -; RV64-2048-NEXT: sh a5, 860(sp) -; RV64-2048-NEXT: addi a5, zero, 76 -; RV64-2048-NEXT: vslidedown.vx v4, v28, a5 -; RV64-2048-NEXT: vmv.x.s t5, v24 -; RV64-2048-NEXT: ld a1, 504(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: sh a1, 848(sp) -; RV64-2048-NEXT: srli a5, a1, 32 -; RV64-2048-NEXT: sh a5, 852(sp) -; RV64-2048-NEXT: addi a5, zero, 75 -; RV64-2048-NEXT: vslidedown.vx v24, v28, a5 -; RV64-2048-NEXT: vmv.x.s t6, v8 -; RV64-2048-NEXT: ld a1, 496(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: sh a1, 840(sp) -; RV64-2048-NEXT: srli a3, a1, 32 -; RV64-2048-NEXT: sh a3, 844(sp) -; RV64-2048-NEXT: addi a3, zero, 74 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a3 -; RV64-2048-NEXT: vmv.x.s s10, v12 -; RV64-2048-NEXT: ld a1, 488(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: sh a1, 832(sp) -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 836(sp) -; RV64-2048-NEXT: addi a1, zero, 73 -; RV64-2048-NEXT: vslidedown.vx v12, v28, a1 -; RV64-2048-NEXT: vmv.x.s s11, v16 -; RV64-2048-NEXT: ld a1, 480(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: sh a1, 824(sp) -; RV64-2048-NEXT: srli a2, a1, 32 -; RV64-2048-NEXT: sh a2, 828(sp) -; RV64-2048-NEXT: addi a2, zero, 72 -; RV64-2048-NEXT: vslidedown.vx v16, v28, a2 -; RV64-2048-NEXT: vmv.x.s s2, v20 -; RV64-2048-NEXT: sh s1, 816(sp) -; RV64-2048-NEXT: srli a4, s1, 32 -; RV64-2048-NEXT: sh a4, 820(sp) -; RV64-2048-NEXT: addi a4, zero, 71 -; RV64-2048-NEXT: vslidedown.vx v20, v28, a4 -; RV64-2048-NEXT: vmv.x.s s3, v0 -; RV64-2048-NEXT: sh ra, 808(sp) -; RV64-2048-NEXT: srli s1, ra, 32 -; RV64-2048-NEXT: sh s1, 812(sp) -; RV64-2048-NEXT: addi s1, zero, 70 -; RV64-2048-NEXT: vslidedown.vx v0, v28, s1 -; RV64-2048-NEXT: vmv.x.s s4, v4 -; RV64-2048-NEXT: sh a6, 800(sp) -; RV64-2048-NEXT: srli a5, a6, 32 -; RV64-2048-NEXT: sh a5, 804(sp) -; RV64-2048-NEXT: addi a5, zero, 69 -; RV64-2048-NEXT: vslidedown.vx v4, v28, a5 -; RV64-2048-NEXT: addi a1, sp, 1944 -; RV64-2048-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill -; RV64-2048-NEXT: vmv.x.s a5, v24 -; RV64-2048-NEXT: sd a5, 504(sp) # 8-byte Folded Spill -; RV64-2048-NEXT: sh s5, 792(sp) -; RV64-2048-NEXT: srli a3, s5, 32 -; RV64-2048-NEXT: sh a3, 796(sp) -; RV64-2048-NEXT: addi a3, zero, 68 -; RV64-2048-NEXT: vslidedown.vx v4, v28, a3 -; RV64-2048-NEXT: vmv.x.s s5, v8 -; RV64-2048-NEXT: sh s6, 784(sp) -; RV64-2048-NEXT: srli a1, s6, 32 -; RV64-2048-NEXT: sh a1, 788(sp) -; RV64-2048-NEXT: addi a1, zero, 67 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s6, v12 -; RV64-2048-NEXT: sh s7, 776(sp) -; RV64-2048-NEXT: srli a2, s7, 32 -; RV64-2048-NEXT: sh a2, 780(sp) -; RV64-2048-NEXT: addi a2, zero, 66 -; RV64-2048-NEXT: vslidedown.vx v12, v28, a2 -; RV64-2048-NEXT: vmv.x.s s7, v16 -; RV64-2048-NEXT: sh s8, 768(sp) -; RV64-2048-NEXT: srli a4, s8, 32 -; RV64-2048-NEXT: sh a4, 772(sp) -; RV64-2048-NEXT: addi a4, zero, 65 -; RV64-2048-NEXT: vslidedown.vx v24, v28, a4 -; RV64-2048-NEXT: vmv.x.s s8, v20 -; RV64-2048-NEXT: sh s9, 760(sp) -; RV64-2048-NEXT: srli s1, s9, 32 -; RV64-2048-NEXT: sh s1, 764(sp) -; RV64-2048-NEXT: addi s1, zero, 64 -; RV64-2048-NEXT: vslidedown.vx v16, v28, s1 -; RV64-2048-NEXT: vmv.x.s s9, v0 -; RV64-2048-NEXT: ld ra, 440(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: sh ra, 752(sp) -; RV64-2048-NEXT: ld a1, 472(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: sh a1, 744(sp) -; RV64-2048-NEXT: ld a2, 464(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: sh a2, 736(sp) -; RV64-2048-NEXT: ld a4, 456(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: sh a4, 728(sp) -; RV64-2048-NEXT: ld s1, 448(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: sh s1, 720(sp) -; RV64-2048-NEXT: ld a6, 432(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: sh a6, 712(sp) -; RV64-2048-NEXT: sh a7, 704(sp) -; RV64-2048-NEXT: sh t0, 696(sp) -; RV64-2048-NEXT: sh t1, 688(sp) -; RV64-2048-NEXT: sh t2, 680(sp) -; RV64-2048-NEXT: sh t3, 672(sp) -; RV64-2048-NEXT: sh t4, 664(sp) -; RV64-2048-NEXT: sh t5, 656(sp) -; RV64-2048-NEXT: sh t6, 648(sp) -; RV64-2048-NEXT: sh s10, 640(sp) -; RV64-2048-NEXT: sh s11, 632(sp) -; RV64-2048-NEXT: sh s2, 624(sp) -; RV64-2048-NEXT: sh s3, 616(sp) -; RV64-2048-NEXT: sh s4, 608(sp) -; RV64-2048-NEXT: sh a5, 600(sp) -; RV64-2048-NEXT: sh s5, 592(sp) -; RV64-2048-NEXT: sh s6, 584(sp) -; RV64-2048-NEXT: sh s7, 576(sp) -; RV64-2048-NEXT: sh s8, 568(sp) -; RV64-2048-NEXT: sh s9, 560(sp) -; RV64-2048-NEXT: srli a5, ra, 32 -; RV64-2048-NEXT: sh a5, 756(sp) -; RV64-2048-NEXT: addi a3, sp, 1944 -; RV64-2048-NEXT: vl4re8.v v20, (a3) # Unknown-size Folded Reload -; RV64-2048-NEXT: vmv.x.s ra, v20 -; RV64-2048-NEXT: sh ra, 552(sp) -; RV64-2048-NEXT: srli a3, a1, 32 -; RV64-2048-NEXT: sh a3, 748(sp) -; RV64-2048-NEXT: vmv.x.s a3, v4 -; RV64-2048-NEXT: sh a3, 544(sp) -; RV64-2048-NEXT: srli a1, a2, 32 -; RV64-2048-NEXT: sh a1, 740(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 536(sp) -; RV64-2048-NEXT: srli a2, a4, 32 -; RV64-2048-NEXT: sh a2, 732(sp) -; RV64-2048-NEXT: vmv.x.s a2, v12 -; RV64-2048-NEXT: sh a2, 528(sp) -; RV64-2048-NEXT: srli a4, s1, 32 -; RV64-2048-NEXT: sh a4, 724(sp) -; RV64-2048-NEXT: vmv.x.s a4, v24 -; RV64-2048-NEXT: sh a4, 520(sp) -; RV64-2048-NEXT: srli s1, a6, 32 -; RV64-2048-NEXT: sh s1, 716(sp) -; RV64-2048-NEXT: vmv.x.s s1, v16 -; RV64-2048-NEXT: sh s1, 512(sp) -; RV64-2048-NEXT: srli a5, a7, 32 -; RV64-2048-NEXT: sh a5, 708(sp) -; RV64-2048-NEXT: srli a5, t0, 32 -; RV64-2048-NEXT: sh a5, 700(sp) -; RV64-2048-NEXT: srli a5, t1, 32 -; RV64-2048-NEXT: sh a5, 692(sp) -; RV64-2048-NEXT: srli a5, t2, 32 -; RV64-2048-NEXT: sh a5, 684(sp) -; RV64-2048-NEXT: srli a5, t3, 32 -; RV64-2048-NEXT: sh a5, 676(sp) -; RV64-2048-NEXT: srli a5, t4, 32 -; RV64-2048-NEXT: sh a5, 668(sp) -; RV64-2048-NEXT: srli a5, t5, 32 -; RV64-2048-NEXT: sh a5, 660(sp) -; RV64-2048-NEXT: srli a5, t6, 32 -; RV64-2048-NEXT: sh a5, 652(sp) -; RV64-2048-NEXT: srli a5, s10, 32 -; RV64-2048-NEXT: sh a5, 644(sp) -; RV64-2048-NEXT: srli a5, s11, 32 -; RV64-2048-NEXT: sh a5, 636(sp) -; RV64-2048-NEXT: srli a5, s2, 32 -; RV64-2048-NEXT: sh a5, 628(sp) -; RV64-2048-NEXT: srli a5, s3, 32 -; RV64-2048-NEXT: sh a5, 620(sp) -; RV64-2048-NEXT: srli a5, s4, 32 -; RV64-2048-NEXT: sh a5, 612(sp) -; RV64-2048-NEXT: ld a5, 504(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: srli a5, a5, 32 -; RV64-2048-NEXT: sh a5, 604(sp) -; RV64-2048-NEXT: srli a5, s5, 32 -; RV64-2048-NEXT: sh a5, 596(sp) -; RV64-2048-NEXT: srli a5, s6, 32 -; RV64-2048-NEXT: sh a5, 588(sp) -; RV64-2048-NEXT: srli a5, s7, 32 -; RV64-2048-NEXT: sh a5, 580(sp) -; RV64-2048-NEXT: srli a5, s8, 32 -; RV64-2048-NEXT: sh a5, 572(sp) -; RV64-2048-NEXT: srli a5, s9, 32 -; RV64-2048-NEXT: sh a5, 564(sp) -; RV64-2048-NEXT: srli a5, ra, 32 -; RV64-2048-NEXT: sh a5, 556(sp) -; RV64-2048-NEXT: srli a3, a3, 32 -; RV64-2048-NEXT: sh a3, 548(sp) -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 540(sp) -; RV64-2048-NEXT: srli a1, a2, 32 -; RV64-2048-NEXT: sh a1, 532(sp) -; RV64-2048-NEXT: srli a1, a4, 32 -; RV64-2048-NEXT: sh a1, 524(sp) -; RV64-2048-NEXT: srli a1, s1, 32 -; RV64-2048-NEXT: sh a1, 516(sp) -; RV64-2048-NEXT: addi a1, zero, 63 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s t1, v8 -; RV64-2048-NEXT: addi a1, zero, 62 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s a4, v8 -; RV64-2048-NEXT: addi a1, zero, 61 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s t0, v8 -; RV64-2048-NEXT: addi a1, zero, 60 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s t2, v8 -; RV64-2048-NEXT: addi a1, zero, 59 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s t3, v8 -; RV64-2048-NEXT: addi a1, zero, 58 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s3, v8 -; RV64-2048-NEXT: addi a1, zero, 57 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s t4, v8 -; RV64-2048-NEXT: addi a1, zero, 56 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s t5, v8 -; RV64-2048-NEXT: addi a1, zero, 55 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s t6, v8 -; RV64-2048-NEXT: addi a1, zero, 54 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s2, v8 -; RV64-2048-NEXT: addi a1, zero, 53 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s4, v8 -; RV64-2048-NEXT: addi a1, zero, 52 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s5, v8 -; RV64-2048-NEXT: addi a1, zero, 51 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s6, v8 -; RV64-2048-NEXT: addi a1, zero, 50 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s7, v8 -; RV64-2048-NEXT: addi a1, zero, 49 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s8, v8 -; RV64-2048-NEXT: addi a1, zero, 48 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s9, v8 -; RV64-2048-NEXT: addi a1, zero, 47 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s10, v8 -; RV64-2048-NEXT: addi a1, zero, 46 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s11, v8 -; RV64-2048-NEXT: addi a1, zero, 45 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s ra, v8 -; RV64-2048-NEXT: addi a1, zero, 44 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s a6, v8 -; RV64-2048-NEXT: addi a1, zero, 43 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s a7, v8 -; RV64-2048-NEXT: addi a1, zero, 42 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s s1, v8 -; RV64-2048-NEXT: addi a1, zero, 41 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s a5, v8 -; RV64-2048-NEXT: addi a1, zero, 40 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a1 -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: addi a2, zero, 39 -; RV64-2048-NEXT: vslidedown.vx v0, v28, a2 -; RV64-2048-NEXT: addi a2, zero, 38 -; RV64-2048-NEXT: vslidedown.vx v4, v28, a2 -; RV64-2048-NEXT: addi a2, zero, 37 -; RV64-2048-NEXT: vslidedown.vx v8, v28, a2 -; RV64-2048-NEXT: addi a2, zero, 36 -; RV64-2048-NEXT: vslidedown.vx v12, v28, a2 -; RV64-2048-NEXT: addi a2, zero, 35 -; RV64-2048-NEXT: vslidedown.vx v16, v28, a2 -; RV64-2048-NEXT: addi a2, zero, 34 -; RV64-2048-NEXT: vslidedown.vx v20, v28, a2 -; RV64-2048-NEXT: vmv.x.s a2, v28 -; RV64-2048-NEXT: sh a2, 1024(sp) -; RV64-2048-NEXT: srli a2, a2, 32 -; RV64-2048-NEXT: sh a2, 1028(sp) -; RV64-2048-NEXT: addi a2, zero, 33 -; RV64-2048-NEXT: vslidedown.vx v24, v28, a2 -; RV64-2048-NEXT: vmv.x.s a2, v0 -; RV64-2048-NEXT: sh t1, 1528(sp) -; RV64-2048-NEXT: srli a3, t1, 32 -; RV64-2048-NEXT: sh a3, 1532(sp) -; RV64-2048-NEXT: addi a3, zero, 32 -; RV64-2048-NEXT: vslidedown.vx v0, v28, a3 -; RV64-2048-NEXT: vmv.x.s a3, v4 -; RV64-2048-NEXT: sh a4, 1520(sp) -; RV64-2048-NEXT: srli a4, a4, 32 -; RV64-2048-NEXT: sh a4, 1524(sp) -; RV64-2048-NEXT: sh t0, 1512(sp) -; RV64-2048-NEXT: srli a4, t0, 32 -; RV64-2048-NEXT: sh a4, 1516(sp) -; RV64-2048-NEXT: sh t2, 1504(sp) -; RV64-2048-NEXT: srli a4, t2, 32 -; RV64-2048-NEXT: sh a4, 1508(sp) -; RV64-2048-NEXT: sh t3, 1496(sp) -; RV64-2048-NEXT: srli a4, t3, 32 -; RV64-2048-NEXT: sh a4, 1500(sp) -; RV64-2048-NEXT: sh s3, 1488(sp) -; RV64-2048-NEXT: srli a4, s3, 32 -; RV64-2048-NEXT: sh a4, 1492(sp) -; RV64-2048-NEXT: sh t4, 1480(sp) -; RV64-2048-NEXT: srli a4, t4, 32 -; RV64-2048-NEXT: sh a4, 1484(sp) -; RV64-2048-NEXT: sh t5, 1472(sp) -; RV64-2048-NEXT: srli a4, t5, 32 -; RV64-2048-NEXT: sh a4, 1476(sp) -; RV64-2048-NEXT: sh t6, 1464(sp) -; RV64-2048-NEXT: srli a4, t6, 32 -; RV64-2048-NEXT: sh a4, 1468(sp) -; RV64-2048-NEXT: sh s2, 1456(sp) -; RV64-2048-NEXT: srli a4, s2, 32 -; RV64-2048-NEXT: sh a4, 1460(sp) -; RV64-2048-NEXT: sh s4, 1448(sp) -; RV64-2048-NEXT: srli a4, s4, 32 -; RV64-2048-NEXT: sh a4, 1452(sp) -; RV64-2048-NEXT: sh s5, 1440(sp) -; RV64-2048-NEXT: srli a4, s5, 32 -; RV64-2048-NEXT: sh a4, 1444(sp) -; RV64-2048-NEXT: sh s6, 1432(sp) -; RV64-2048-NEXT: srli a4, s6, 32 -; RV64-2048-NEXT: sh a4, 1436(sp) -; RV64-2048-NEXT: sh s7, 1424(sp) -; RV64-2048-NEXT: srli a4, s7, 32 -; RV64-2048-NEXT: sh a4, 1428(sp) -; RV64-2048-NEXT: sh s8, 1416(sp) -; RV64-2048-NEXT: srli a4, s8, 32 -; RV64-2048-NEXT: sh a4, 1420(sp) -; RV64-2048-NEXT: sh s9, 1408(sp) -; RV64-2048-NEXT: srli a4, s9, 32 -; RV64-2048-NEXT: sh a4, 1412(sp) -; RV64-2048-NEXT: sh s10, 1400(sp) -; RV64-2048-NEXT: srli a4, s10, 32 -; RV64-2048-NEXT: sh a4, 1404(sp) -; RV64-2048-NEXT: sh s11, 1392(sp) -; RV64-2048-NEXT: srli a4, s11, 32 -; RV64-2048-NEXT: sh a4, 1396(sp) -; RV64-2048-NEXT: sh ra, 1384(sp) -; RV64-2048-NEXT: srli a4, ra, 32 -; RV64-2048-NEXT: sh a4, 1388(sp) -; RV64-2048-NEXT: sh a6, 1376(sp) -; RV64-2048-NEXT: srli a4, a6, 32 -; RV64-2048-NEXT: sh a4, 1380(sp) -; RV64-2048-NEXT: sh a7, 1368(sp) -; RV64-2048-NEXT: srli a4, a7, 32 -; RV64-2048-NEXT: sh a4, 1372(sp) -; RV64-2048-NEXT: sh s1, 1360(sp) -; RV64-2048-NEXT: srli a4, s1, 32 -; RV64-2048-NEXT: sh a4, 1364(sp) -; RV64-2048-NEXT: sh a5, 1352(sp) -; RV64-2048-NEXT: srli a4, a5, 32 -; RV64-2048-NEXT: sh a4, 1356(sp) -; RV64-2048-NEXT: sh a1, 1344(sp) -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1348(sp) -; RV64-2048-NEXT: sh a2, 1336(sp) -; RV64-2048-NEXT: srli a1, a2, 32 -; RV64-2048-NEXT: sh a1, 1340(sp) -; RV64-2048-NEXT: sh a3, 1328(sp) -; RV64-2048-NEXT: srli a1, a3, 32 -; RV64-2048-NEXT: sh a1, 1332(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1320(sp) -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1324(sp) -; RV64-2048-NEXT: vmv.x.s a1, v12 -; RV64-2048-NEXT: sh a1, 1312(sp) -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1316(sp) -; RV64-2048-NEXT: vmv.x.s a1, v16 -; RV64-2048-NEXT: sh a1, 1304(sp) -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1308(sp) -; RV64-2048-NEXT: vmv.x.s a1, v20 -; RV64-2048-NEXT: sh a1, 1296(sp) -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1300(sp) -; RV64-2048-NEXT: vmv.x.s a1, v24 -; RV64-2048-NEXT: sh a1, 1288(sp) -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1292(sp) -; RV64-2048-NEXT: vmv.x.s a1, v0 -; RV64-2048-NEXT: sh a1, 1280(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 31 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1284(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1272(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 30 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1276(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1264(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 29 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1268(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1256(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 28 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1260(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1248(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 27 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1252(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1240(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 26 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1244(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1232(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 25 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1236(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1224(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 24 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1228(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1216(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 23 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1220(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1208(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 22 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1212(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1200(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 21 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1204(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1192(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 20 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1196(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1184(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 19 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1188(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1176(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 18 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1180(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1168(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 17 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1172(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1160(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 16 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1164(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1152(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 15 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1156(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1144(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 14 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1148(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1136(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 13 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1140(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1128(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 12 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1132(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1120(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 11 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1124(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1112(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 10 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1116(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1104(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 9 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1108(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1096(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 8 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1100(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1088(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 7 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1092(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1080(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 6 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1084(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1072(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 5 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1076(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1064(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 4 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1068(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1056(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 3 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1060(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1048(sp) -; RV64-2048-NEXT: vslidedown.vi v8, v28, 2 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1052(sp) -; RV64-2048-NEXT: vmv.x.s a1, v8 -; RV64-2048-NEXT: sh a1, 1040(sp) -; RV64-2048-NEXT: vslidedown.vi v28, v28, 1 -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1044(sp) -; RV64-2048-NEXT: vmv.x.s a1, v28 -; RV64-2048-NEXT: sh a1, 1032(sp) -; RV64-2048-NEXT: srli a1, a1, 32 -; RV64-2048-NEXT: sh a1, 1036(sp) -; RV64-2048-NEXT: addi a2, zero, 256 -; RV64-2048-NEXT: vsetvli zero, a2, e16, m2, ta, mu -; RV64-2048-NEXT: addi a1, sp, 512 -; RV64-2048-NEXT: vle16.v v26, (a1) -; RV64-2048-NEXT: addi a1, sp, 1024 -; RV64-2048-NEXT: vle16.v v28, (a1) -; RV64-2048-NEXT: lui a1, 1026731 -; RV64-2048-NEXT: addiw a1, a1, -1365 -; RV64-2048-NEXT: slli a1, a1, 12 -; RV64-2048-NEXT: addi a1, a1, -1365 -; RV64-2048-NEXT: slli a1, a1, 12 -; RV64-2048-NEXT: addi a1, a1, -1365 -; RV64-2048-NEXT: slli a1, a1, 12 -; RV64-2048-NEXT: addi a1, a1, -1366 -; RV64-2048-NEXT: vsetivli zero, 4, e64, m1, ta, mu -; RV64-2048-NEXT: vmv.s.x v25, a1 +; RV64-2048-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; RV64-2048-NEXT: vle16.v v12, (a2) +; RV64-2048-NEXT: vrgather.vv v8, v24, v12 +; RV64-2048-NEXT: lui a2, 1026731 +; RV64-2048-NEXT: addiw a2, a2, -1365 +; RV64-2048-NEXT: slli a2, a2, 12 +; RV64-2048-NEXT: addi a2, a2, -1365 +; RV64-2048-NEXT: slli a2, a2, 12 +; RV64-2048-NEXT: addi a2, a2, -1365 +; RV64-2048-NEXT: slli a2, a2, 12 +; RV64-2048-NEXT: addi a2, a2, -1366 +; RV64-2048-NEXT: vsetivli zero, 8, e64, m1, ta, mu +; RV64-2048-NEXT: vmv.s.x v25, a2 ; RV64-2048-NEXT: vsetivli zero, 2, e64, m1, tu, mu ; RV64-2048-NEXT: vmv1r.v v0, v25 ; RV64-2048-NEXT: vslideup.vi v0, v25, 1 @@ -1873,48 +274,22 @@ ; RV64-2048-NEXT: vslideup.vi v0, v25, 2 ; RV64-2048-NEXT: vsetivli zero, 4, e64, m1, tu, mu ; RV64-2048-NEXT: vslideup.vi v0, v25, 3 -; RV64-2048-NEXT: vsetvli zero, a2, e16, m2, ta, mu -; RV64-2048-NEXT: lui a1, %hi(.LCPI1_2) -; RV64-2048-NEXT: addi a1, a1, %lo(.LCPI1_2) -; RV64-2048-NEXT: vle16.v v30, (a1) -; RV64-2048-NEXT: lui a1, %hi(.LCPI1_3) -; RV64-2048-NEXT: addi a1, a1, %lo(.LCPI1_3) -; RV64-2048-NEXT: vle16.v v8, (a1) -; RV64-2048-NEXT: vrgather.vv v10, v28, v30 -; RV64-2048-NEXT: vsetvli zero, zero, e16, m2, tu, mu -; RV64-2048-NEXT: csrr a1, vlenb -; RV64-2048-NEXT: slli a1, a1, 2 -; RV64-2048-NEXT: add a1, sp, a1 -; RV64-2048-NEXT: addi a1, a1, 1944 -; RV64-2048-NEXT: vl2re8.v v12, (a1) # Unknown-size Folded Reload -; RV64-2048-NEXT: vrgather.vv v10, v12, v8, v0.t -; RV64-2048-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-2048-NEXT: lui a1, %hi(.LCPI1_4) -; RV64-2048-NEXT: addi a1, a1, %lo(.LCPI1_4) -; RV64-2048-NEXT: vle16.v v28, (a1) -; RV64-2048-NEXT: vrgather.vv v8, v26, v30 -; RV64-2048-NEXT: vsetvli zero, zero, e16, m2, tu, mu -; RV64-2048-NEXT: vrgather.vv v8, v12, v28, v0.t -; RV64-2048-NEXT: addi a1, a0, 512 -; RV64-2048-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; RV64-2048-NEXT: vse16.v v8, (a1) -; RV64-2048-NEXT: vse16.v v10, (a0) -; RV64-2048-NEXT: addi sp, s0, -2048 -; RV64-2048-NEXT: addi sp, sp, 16 -; RV64-2048-NEXT: ld s11, 1928(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: ld s10, 1936(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: ld s9, 1944(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: ld s8, 1952(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: ld s7, 1960(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: ld s6, 1968(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: ld s5, 1976(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: ld s4, 1984(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: ld s3, 1992(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: ld s2, 2000(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: ld s1, 2008(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload -; RV64-2048-NEXT: addi sp, sp, 2032 +; RV64-2048-NEXT: vsetivli zero, 5, e64, m1, tu, mu +; RV64-2048-NEXT: vslideup.vi v0, v25, 4 +; RV64-2048-NEXT: vsetivli zero, 6, e64, m1, tu, mu +; RV64-2048-NEXT: vslideup.vi v0, v25, 5 +; RV64-2048-NEXT: vsetivli zero, 7, e64, m1, tu, mu +; RV64-2048-NEXT: vslideup.vi v0, v25, 6 +; RV64-2048-NEXT: vsetivli zero, 8, e64, m1, tu, mu +; RV64-2048-NEXT: vslideup.vi v0, v25, 7 +; RV64-2048-NEXT: lui a2, %hi(.LCPI1_2) +; RV64-2048-NEXT: addi a2, a2, %lo(.LCPI1_2) +; RV64-2048-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; RV64-2048-NEXT: vle16.v v12, (a2) +; RV64-2048-NEXT: vsetvli zero, zero, e16, m4, tu, mu +; RV64-2048-NEXT: vrgather.vv v8, v28, v12, v0.t +; RV64-2048-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; RV64-2048-NEXT: vse16.v v8, (a0) ; RV64-2048-NEXT: ret entry: %ve = load <256 x i16>, <256 x i16>* %0, align 512 diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -99,6 +99,8 @@ case MVT::v64i8: return "MVT::v64i8"; case MVT::v128i8: return "MVT::v128i8"; case MVT::v256i8: return "MVT::v256i8"; + case MVT::v512i8: return "MVT::v512i8"; + case MVT::v1024i8: return "MVT::v1024i8"; case MVT::v1i16: return "MVT::v1i16"; case MVT::v2i16: return "MVT::v2i16"; case MVT::v3i16: return "MVT::v3i16"; @@ -109,6 +111,7 @@ case MVT::v64i16: return "MVT::v64i16"; case MVT::v128i16: return "MVT::v128i16"; case MVT::v256i16: return "MVT::v256i16"; + case MVT::v512i16: return "MVT::v512i16"; case MVT::v1i32: return "MVT::v1i32"; case MVT::v2i32: return "MVT::v2i32"; case MVT::v3i32: return "MVT::v3i32"; @@ -143,6 +146,7 @@ case MVT::v64f16: return "MVT::v64f16"; case MVT::v128f16: return "MVT::v128f16"; case MVT::v256f16: return "MVT::v256f16"; + case MVT::v512f16: return "MVT::v512f16"; case MVT::v2bf16: return "MVT::v2bf16"; case MVT::v3bf16: return "MVT::v3bf16"; case MVT::v4bf16: return "MVT::v4bf16";