diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -2322,8 +2322,10 @@ // Expand mask indices to byte indices and materialize them as operands for (int M : Mask) { for (size_t J = 0; J < LaneBytes; ++J) { - // Lower undefs (represented by -1 in mask) to zero - uint64_t ByteIndex = M == -1 ? 0 : (uint64_t)M * LaneBytes + J; + // Lower undefs (represented by -1 in mask) to {0..J}, which use a + // whole lane of vector input, to allow further reduction at VM. E.g. + // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle. + uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J; Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32); } } diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -31,7 +31,7 @@ ; CHECK-NEXT: i64x2.gt_s ; CHECK-NEXT: v128.bitselect ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <2 x double> %x to <2 x i64> @@ -75,7 +75,7 @@ ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: v128.bitselect ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui <2 x double> %x to <2 x i64> @@ -113,7 +113,7 @@ ; CHECK-NEXT: i64x2.gt_s ; CHECK-NEXT: v128.bitselect ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <2 x double> %x to <2 x i64> @@ -302,7 +302,7 @@ ; CHECK-NEXT: v128.const -32768, -32768, 0, 0 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <2 x double> %x to <2 x i32> @@ -329,7 +329,7 @@ ; CHECK-NEXT: v128.const 65535, 65535, 0, 0 ; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui <2 x double> %x to <2 x i32> @@ -356,7 +356,7 @@ ; CHECK-NEXT: v128.const 0, 0, 0, 0 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <2 x double> %x to <2 x i32> @@ -379,7 +379,7 @@ ; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <4 x float> %x to <4 x i32> @@ -400,7 +400,7 @@ ; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 ; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui <4 x float> %x to <4 x i32> @@ -421,7 +421,7 @@ ; CHECK-NEXT: v128.const 0, 0, 0, 0 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <4 x float> %x to <4 x i32> @@ -1539,7 +1539,7 @@ ; CHECK-NEXT: i64x2.gt_s ; CHECK-NEXT: v128.bitselect ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <2 x double> %x to <2 x i64> @@ -1581,7 +1581,7 @@ ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: v128.bitselect ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui <2 x double> %x to <2 x i64> @@ -1618,7 +1618,7 @@ ; CHECK-NEXT: i64x2.gt_s ; CHECK-NEXT: v128.bitselect ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <2 x double> %x to <2 x i64> @@ -1795,7 +1795,7 @@ ; CHECK-NEXT: v128.const -32768, -32768, 0, 0 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <2 x double> %x to <2 x i32> @@ -1820,7 +1820,7 @@ ; CHECK-NEXT: v128.const 65535, 65535, 0, 0 ; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui <2 x double> %x to <2 x i32> @@ -1846,7 +1846,7 @@ ; CHECK-NEXT: v128.const 0, 0, 0, 0 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <2 x double> %x to <2 x i32> @@ -1867,7 +1867,7 @@ ; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <4 x float> %x to <4 x i32> @@ -1886,7 +1886,7 @@ ; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 ; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui <4 x float> %x to <4 x i32> @@ -1906,7 +1906,7 @@ ; CHECK-NEXT: v128.const 0, 0, 0, 0 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <4 x float> %x to <4 x i32> diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll --- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll @@ -190,7 +190,7 @@ ; CHECK-LABEL: half_shuffle_i32x4: ; CHECK: .functype half_shuffle_i32x4 (v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: i8x16.shuffle $push0=, $0, $0, 0, 0, 0, 0, 8, 9, 10, 11, 0, 1, 2, 3, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle $push0=, $0, $0, 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: i32.const $push1=, 0 ; CHECK-NEXT: i32x4.replace_lane $push2=, $pop0, 0, $pop1 ; CHECK-NEXT: i32.const $push3=, 3 diff --git a/llvm/test/CodeGen/WebAssembly/simd-concat.ll b/llvm/test/CodeGen/WebAssembly/simd-concat.ll --- a/llvm/test/CodeGen/WebAssembly/simd-concat.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-concat.ll @@ -59,7 +59,7 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 16, 17, 18, 19, 0, 1, 0, 1, 0, 1, 0, 1 ; CHECK-NEXT: # fallthrough-return %v = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> ret <4 x i16> %v diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll --- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll @@ -314,7 +314,7 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: f64x2.convert_low_i32x4_s ; CHECK-NEXT: v128.store 16 ; CHECK-NEXT: local.get 0 @@ -334,7 +334,7 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: f64x2.convert_low_i32x4_u ; CHECK-NEXT: v128.store 16 ; CHECK-NEXT: local.get 0 @@ -355,7 +355,7 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: f64x2.convert_low_i32x4_s ; CHECK-NEXT: v128.store 16 ; CHECK-NEXT: local.get 0 @@ -375,7 +375,7 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: f64x2.convert_low_i32x4_u ; CHECK-NEXT: v128.store 16 ; CHECK-NEXT: local.get 0 @@ -395,7 +395,7 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: f64x2.promote_low_f32x4 ; CHECK-NEXT: v128.store 16 ; CHECK-NEXT: local.get 0 @@ -415,7 +415,7 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: f64x2.promote_low_f32x4 ; CHECK-NEXT: v128.store 16 ; CHECK-NEXT: local.get 0 @@ -434,7 +434,7 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i8x16.shuffle 8, 9, 10, 11, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3 ; CHECK-NEXT: f64x2.promote_low_f32x4 ; CHECK-NEXT: # fallthrough-return %v = shufflevector <4 x float> %x, <4 x float> %y, <2 x i32> diff --git a/llvm/test/CodeGen/WebAssembly/simd-extending.ll b/llvm/test/CodeGen/WebAssembly/simd-extending.ll --- a/llvm/test/CodeGen/WebAssembly/simd-extending.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-extending.ll @@ -188,7 +188,7 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i8x16.shuffle 2, 3, 0, 0, 4, 5, 0, 0, 6, 7, 0, 0, 8, 9, 0, 0 +; CHECK-NEXT: i8x16.shuffle 2, 3, 0, 1, 4, 5, 0, 1, 6, 7, 0, 1, 8, 9, 0, 1 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32x4.shl ; CHECK-NEXT: i32.const 16 diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll --- a/llvm/test/CodeGen/WebAssembly/simd.ll +++ b/llvm/test/CodeGen/WebAssembly/simd.ll @@ -483,7 +483,7 @@ ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v8i16 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, -; SIMD128-SAME: 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}} +; SIMD128-SAME: 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) { %res = shufflevector <8 x i16> %x, <8 x i16> %y, @@ -644,7 +644,7 @@ ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v4i32 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, -; SIMD128-SAME: 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}} +; SIMD128-SAME: 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) { %res = shufflevector <4 x i32> %x, <4 x i32> %y, @@ -793,7 +793,7 @@ ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v2i64 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, -; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0{{$}} +; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shuffle_undef_v2i64(<2 x i64> %x, <2 x i64> %y) { %res = shufflevector <2 x i64> %x, <2 x i64> %y, @@ -942,7 +942,7 @@ ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v4f32 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, -; SIMD128-SAME: 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0{{$}} +; SIMD128-SAME: 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) { %res = shufflevector <4 x float> %x, <4 x float> %y, @@ -1092,7 +1092,7 @@ ; NO-SIMD128-NOT: i8x16 ; SIMD128-NEXT: .functype shuffle_undef_v2f64 (v128, v128) -> (v128){{$}} ; SIMD128-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $0, $0, -; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0{{$}} +; SIMD128-SAME: 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @shuffle_undef_v2f64(<2 x double> %x, <2 x double> %y) { %res = shufflevector <2 x double> %x, <2 x double> %y,