Index: test/CodeGen/WebAssembly/simd-arith.ll =================================================================== --- test/CodeGen/WebAssembly/simd-arith.ll +++ test/CodeGen/WebAssembly/simd-arith.ll @@ -1,9 +1,9 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-SLOW -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128,SIMD128-FAST -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128-VM -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes CHECK,SIMD128-VM -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefixes CHECK,NO-SIMD128 -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel | FileCheck %s --check-prefixes CHECK,NO-SIMD128 +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes=CHECK,SIMD128,SIMD128-SLOW,SIMD128-UNIMPL,SIMD128-UNIMPL-SLOW +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 -fast-isel | FileCheck %s --check-prefixes=CHECK,SIMD128,SIMD128-FAST,SIMD128-UNIMPL,SIMD128-UNIMPL-FAST +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes=CHECK,SIMD128,SIMD128-SLOW,NO-SIMD64,NO-SIMD64-SLOW,SIMD128-VM,SIMD128-VM-SLOW +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 -fast-isel | FileCheck %s --check-prefixes=CHECK,SIMD128,SIMD128-FAST,NO-SIMD64,NO-SIMD64-FAST,SIMD128-VM,SIMD128-VM-FAST +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefixes=CHECK,NO-SIMD64,NO-SIMD64-SLOW,NO-SIMD128,NO-SIMD128-SLOW +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel | FileCheck %s --check-prefixes=CHECK,NO-SIMD64,NO-SIMD64-FAST,NO-SIMD128,NO-SIMD128-FAST ; check that a non-test run (including explicit locals pass) at least finishes ; RUN: llc < %s -O0 -mattr=+unimplemented-simd128 @@ -17,54 +17,878 @@ ; ============================================================================== ; 16 x i8 ; ============================================================================== -; CHECK-LABEL: add_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype add_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.add $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} + define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: add_v16i8: +; SIMD128: .functype add_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: i8x16.add $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: add_v16i8: +; NO-SIMD128-SLOW: .functype add_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.add $push0=, $16, $32 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $15, $31 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop5), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $14, $30 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop8), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.const $push10=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-SLOW-NEXT: i32.add $push9=, $13, $29 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop11), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push13=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $12, $28 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-SLOW-NEXT: i32.const $push16=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-SLOW-NEXT: i32.add $push15=, $11, $27 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop17), $pop15 +; NO-SIMD128-SLOW-NEXT: i32.const $push19=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.add $push18=, $10, $26 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-SLOW-NEXT: i32.add $push21=, $9, $25 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop21 +; NO-SIMD128-SLOW-NEXT: i32.const $push23=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-SLOW-NEXT: i32.add $push22=, $8, $24 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push26=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-SLOW-NEXT: i32.add $push25=, $7, $23 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop27), $pop25 +; NO-SIMD128-SLOW-NEXT: i32.const $push29=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-SLOW-NEXT: i32.add $push28=, $6, $22 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-SLOW-NEXT: i32.add $push31=, $5, $21 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop31 +; NO-SIMD128-SLOW-NEXT: i32.const $push33=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-SLOW-NEXT: i32.add $push32=, $4, $20 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-SLOW-NEXT: i32.add $push35=, $3, $19 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop35 +; NO-SIMD128-SLOW-NEXT: i32.add $push36=, $2, $18 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop36 +; NO-SIMD128-SLOW-NEXT: i32.add $push37=, $1, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop37 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: add_v16i8: +; NO-SIMD128-FAST: .functype add_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.add $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.add $push1=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.add $push16=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.add $push25=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.add $push37=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: return %a = add <16 x i8> %x, %y ret <16 x i8> %a } -; CHECK-LABEL: sub_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype sub_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.sub $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: sub_v16i8: +; SIMD128: .functype sub_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: i8x16.sub $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: sub_v16i8: +; NO-SIMD128-SLOW: .functype sub_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.sub $push0=, $16, $32 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.sub $push3=, $15, $31 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop5), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.sub $push6=, $14, $30 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop8), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.const $push10=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-SLOW-NEXT: i32.sub $push9=, $13, $29 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop11), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push13=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-SLOW-NEXT: i32.sub $push12=, $12, $28 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-SLOW-NEXT: i32.const $push16=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-SLOW-NEXT: i32.sub $push15=, $11, $27 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop17), $pop15 +; NO-SIMD128-SLOW-NEXT: i32.const $push19=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.sub $push18=, $10, $26 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-SLOW-NEXT: i32.sub $push21=, $9, $25 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop21 +; NO-SIMD128-SLOW-NEXT: i32.const $push23=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-SLOW-NEXT: i32.sub $push22=, $8, $24 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push26=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-SLOW-NEXT: i32.sub $push25=, $7, $23 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop27), $pop25 +; NO-SIMD128-SLOW-NEXT: i32.const $push29=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-SLOW-NEXT: i32.sub $push28=, $6, $22 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-SLOW-NEXT: i32.sub $push31=, $5, $21 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop31 +; NO-SIMD128-SLOW-NEXT: i32.const $push33=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-SLOW-NEXT: i32.sub $push32=, $4, $20 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-SLOW-NEXT: i32.sub $push35=, $3, $19 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop35 +; NO-SIMD128-SLOW-NEXT: i32.sub $push36=, $2, $18 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop36 +; NO-SIMD128-SLOW-NEXT: i32.sub $push37=, $1, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop37 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: sub_v16i8: +; NO-SIMD128-FAST: .functype sub_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.sub $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.sub $push19=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-FAST-NEXT: i32.sub $push22=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.sub $push28=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.sub $push31=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.sub $push34=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.sub $push37=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: return %a = sub <16 x i8> %x, %y ret <16 x i8> %a } -; CHECK-LABEL: mul_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype mul_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.mul $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: mul_v16i8: +; SIMD128: .functype mul_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: i8x16.mul $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: mul_v16i8: +; NO-SIMD128-SLOW: .functype mul_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.mul $push0=, $16, $32 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.mul $push3=, $15, $31 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop5), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.mul $push6=, $14, $30 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop8), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.const $push10=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-SLOW-NEXT: i32.mul $push9=, $13, $29 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop11), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push13=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-SLOW-NEXT: i32.mul $push12=, $12, $28 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-SLOW-NEXT: i32.const $push16=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-SLOW-NEXT: i32.mul $push15=, $11, $27 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop17), $pop15 +; NO-SIMD128-SLOW-NEXT: i32.const $push19=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.mul $push18=, $10, $26 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-SLOW-NEXT: i32.mul $push21=, $9, $25 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop21 +; NO-SIMD128-SLOW-NEXT: i32.const $push23=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-SLOW-NEXT: i32.mul $push22=, $8, $24 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push26=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-SLOW-NEXT: i32.mul $push25=, $7, $23 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop27), $pop25 +; NO-SIMD128-SLOW-NEXT: i32.const $push29=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-SLOW-NEXT: i32.mul $push28=, $6, $22 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-SLOW-NEXT: i32.mul $push31=, $5, $21 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop31 +; NO-SIMD128-SLOW-NEXT: i32.const $push33=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-SLOW-NEXT: i32.mul $push32=, $4, $20 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-SLOW-NEXT: i32.mul $push35=, $3, $19 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop35 +; NO-SIMD128-SLOW-NEXT: i32.mul $push36=, $2, $18 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop36 +; NO-SIMD128-SLOW-NEXT: i32.mul $push37=, $1, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop37 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: mul_v16i8: +; NO-SIMD128-FAST: .functype mul_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.mul $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.mul $push1=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.mul $push19=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-FAST-NEXT: i32.mul $push22=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.mul $push28=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.mul $push34=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.mul $push37=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: return %a = mul <16 x i8> %x, %y ret <16 x i8> %a } -; CHECK-LABEL: neg_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype neg_v16i8 (v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.neg $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @neg_v16i8(<16 x i8> %x) { +; SIMD128-LABEL: neg_v16i8: +; SIMD128: .functype neg_v16i8 (v128) -> (v128) +; SIMD128-NEXT: i8x16.neg $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: neg_v16i8: +; NO-SIMD128-SLOW: .functype neg_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push1=, $pop0, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop3), $pop1 +; NO-SIMD128-SLOW-NEXT: i32.const $push5=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push53=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push4=, $pop53, $15 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop6), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push8=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-SLOW-NEXT: i32.const $push52=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push7=, $pop52, $14 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop9), $pop7 +; NO-SIMD128-SLOW-NEXT: i32.const $push11=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.const $push51=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push10=, $pop51, $13 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop12), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.const $push14=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-SLOW-NEXT: i32.const $push50=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push13=, $pop50, $12 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop15), $pop13 +; NO-SIMD128-SLOW-NEXT: i32.const $push17=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-SLOW-NEXT: i32.const $push49=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push16=, $pop49, $11 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-SLOW-NEXT: i32.const $push20=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-SLOW-NEXT: i32.const $push48=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push19=, $pop48, $10 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop21), $pop19 +; NO-SIMD128-SLOW-NEXT: i32.const $push47=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push22=, $pop47, $9 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push24=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-SLOW-NEXT: i32.const $push46=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push23=, $pop46, $8 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop25), $pop23 +; NO-SIMD128-SLOW-NEXT: i32.const $push27=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-SLOW-NEXT: i32.const $push45=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push26=, $pop45, $7 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-SLOW-NEXT: i32.const $push30=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-SLOW-NEXT: i32.const $push44=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push29=, $pop44, $6 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop31), $pop29 +; NO-SIMD128-SLOW-NEXT: i32.const $push43=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push32=, $pop43, $5 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop32 +; NO-SIMD128-SLOW-NEXT: i32.const $push34=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push35=, $0, $pop34 +; NO-SIMD128-SLOW-NEXT: i32.const $push42=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push33=, $pop42, $4 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop35), $pop33 +; NO-SIMD128-SLOW-NEXT: i32.const $push41=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push36=, $pop41, $3 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop36 +; NO-SIMD128-SLOW-NEXT: i32.const $push40=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push37=, $pop40, $2 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop37 +; NO-SIMD128-SLOW-NEXT: i32.const $push39=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push38=, $pop39, $1 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop38 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: neg_v16i8: +; NO-SIMD128-FAST: .functype neg_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop53, $2 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop52, $3 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop51, $4 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop50, $5 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop49, $6 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push48=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop48, $7 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop47, $8 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop46, $9 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop45, $10 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push23=, $pop44, $11 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push26=, $pop43, $12 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push29=, $pop42, $13 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push32=, $pop41, $14 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push35=, $pop40, $15 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push38=, $pop39, $16 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: return %a = sub <16 x i8> , %x ret <16 x i8> %a } -; CHECK-LABEL: shl_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shl_v16i8 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) { +; SIMD128-UNIMPL-LABEL: shl_v16i8: +; SIMD128-UNIMPL: .functype shl_v16i8 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i8x16.shl $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shl_v16i8: +; SIMD128-VM-SLOW: .functype shl_v16i8 (v128, i32) -> (v128) +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push9=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push2=, $1 +; SIMD128-VM-SLOW-NEXT: i32.const $push0=, 7 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push1=, $pop0 +; SIMD128-VM-SLOW-NEXT: v128.and $push100=, $pop2, $pop1 +; SIMD128-VM-SLOW-NEXT: local.tee $push99=, $2=, $pop100 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push7=, $pop99, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push98=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push8=, $pop7, $pop98 +; SIMD128-VM-SLOW-NEXT: i32.shl $push10=, $pop9, $pop8 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push11=, $pop10 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push5=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push3=, $2, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push97=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push4=, $pop3, $pop97 +; SIMD128-VM-SLOW-NEXT: i32.shl $push6=, $pop5, $pop4 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push12=, $pop11, 1, $pop6 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push15=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push13=, $2, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push96=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push14=, $pop13, $pop96 +; SIMD128-VM-SLOW-NEXT: i32.shl $push16=, $pop15, $pop14 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push17=, $pop12, 2, $pop16 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push20=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push18=, $2, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push95=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push19=, $pop18, $pop95 +; SIMD128-VM-SLOW-NEXT: i32.shl $push21=, $pop20, $pop19 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push22=, $pop17, 3, $pop21 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push25=, $0, 4 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push23=, $2, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push94=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push24=, $pop23, $pop94 +; SIMD128-VM-SLOW-NEXT: i32.shl $push26=, $pop25, $pop24 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push27=, $pop22, 4, $pop26 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push30=, $0, 5 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push28=, $2, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push93=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push29=, $pop28, $pop93 +; SIMD128-VM-SLOW-NEXT: i32.shl $push31=, $pop30, $pop29 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push32=, $pop27, 5, $pop31 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push35=, $0, 6 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push33=, $2, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push92=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push34=, $pop33, $pop92 +; SIMD128-VM-SLOW-NEXT: i32.shl $push36=, $pop35, $pop34 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push37=, $pop32, 6, $pop36 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push40=, $0, 7 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push38=, $2, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push91=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push39=, $pop38, $pop91 +; SIMD128-VM-SLOW-NEXT: i32.shl $push41=, $pop40, $pop39 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push42=, $pop37, 7, $pop41 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push45=, $0, 8 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push43=, $2, 8 +; SIMD128-VM-SLOW-NEXT: i32.const $push90=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push44=, $pop43, $pop90 +; SIMD128-VM-SLOW-NEXT: i32.shl $push46=, $pop45, $pop44 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push47=, $pop42, 8, $pop46 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push50=, $0, 9 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push48=, $2, 9 +; SIMD128-VM-SLOW-NEXT: i32.const $push89=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push49=, $pop48, $pop89 +; SIMD128-VM-SLOW-NEXT: i32.shl $push51=, $pop50, $pop49 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push52=, $pop47, 9, $pop51 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push55=, $0, 10 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push53=, $2, 10 +; SIMD128-VM-SLOW-NEXT: i32.const $push88=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push54=, $pop53, $pop88 +; SIMD128-VM-SLOW-NEXT: i32.shl $push56=, $pop55, $pop54 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push57=, $pop52, 10, $pop56 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push60=, $0, 11 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push58=, $2, 11 +; SIMD128-VM-SLOW-NEXT: i32.const $push87=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push59=, $pop58, $pop87 +; SIMD128-VM-SLOW-NEXT: i32.shl $push61=, $pop60, $pop59 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push62=, $pop57, 11, $pop61 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push65=, $0, 12 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push63=, $2, 12 +; SIMD128-VM-SLOW-NEXT: i32.const $push86=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push64=, $pop63, $pop86 +; SIMD128-VM-SLOW-NEXT: i32.shl $push66=, $pop65, $pop64 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push67=, $pop62, 12, $pop66 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push70=, $0, 13 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push68=, $2, 13 +; SIMD128-VM-SLOW-NEXT: i32.const $push85=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push69=, $pop68, $pop85 +; SIMD128-VM-SLOW-NEXT: i32.shl $push71=, $pop70, $pop69 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push72=, $pop67, 13, $pop71 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push75=, $0, 14 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push73=, $2, 14 +; SIMD128-VM-SLOW-NEXT: i32.const $push84=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push74=, $pop73, $pop84 +; SIMD128-VM-SLOW-NEXT: i32.shl $push76=, $pop75, $pop74 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push77=, $pop72, 14, $pop76 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push80=, $0, 15 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push78=, $2, 15 +; SIMD128-VM-SLOW-NEXT: i32.const $push83=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push79=, $pop78, $pop83 +; SIMD128-VM-SLOW-NEXT: i32.shl $push81=, $pop80, $pop79 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push82=, $pop77, 15, $pop81 +; SIMD128-VM-SLOW-NEXT: return $pop82 +; +; SIMD128-VM-FAST-LABEL: shl_v16i8: +; SIMD128-VM-FAST: .functype shl_v16i8 (v128, i32) -> (v128) +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push10=, $0, 0 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push1=, $1 +; SIMD128-VM-FAST-NEXT: i32.const $push2=, 7 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push3=, $pop2 +; SIMD128-VM-FAST-NEXT: v128.and $push100=, $pop1, $pop3 +; SIMD128-VM-FAST-NEXT: local.tee $push99=, $2=, $pop100 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push8=, $pop99, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push98=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push9=, $pop8, $pop98 +; SIMD128-VM-FAST-NEXT: i32.shl $push11=, $pop10, $pop9 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push12=, $pop11 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push6=, $0, 1 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push4=, $2, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push97=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push5=, $pop4, $pop97 +; SIMD128-VM-FAST-NEXT: i32.shl $push7=, $pop6, $pop5 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push13=, $pop12, 1, $pop7 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push16=, $0, 2 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push14=, $2, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push96=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push15=, $pop14, $pop96 +; SIMD128-VM-FAST-NEXT: i32.shl $push17=, $pop16, $pop15 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push18=, $pop13, 2, $pop17 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push21=, $0, 3 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push19=, $2, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push95=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push20=, $pop19, $pop95 +; SIMD128-VM-FAST-NEXT: i32.shl $push22=, $pop21, $pop20 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push23=, $pop18, 3, $pop22 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push26=, $0, 4 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push24=, $2, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push94=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push25=, $pop24, $pop94 +; SIMD128-VM-FAST-NEXT: i32.shl $push27=, $pop26, $pop25 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push28=, $pop23, 4, $pop27 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push31=, $0, 5 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push29=, $2, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push93=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push30=, $pop29, $pop93 +; SIMD128-VM-FAST-NEXT: i32.shl $push32=, $pop31, $pop30 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push33=, $pop28, 5, $pop32 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push36=, $0, 6 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push34=, $2, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push92=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push35=, $pop34, $pop92 +; SIMD128-VM-FAST-NEXT: i32.shl $push37=, $pop36, $pop35 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push38=, $pop33, 6, $pop37 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push41=, $0, 7 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push39=, $2, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push91=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push40=, $pop39, $pop91 +; SIMD128-VM-FAST-NEXT: i32.shl $push42=, $pop41, $pop40 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push43=, $pop38, 7, $pop42 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push46=, $0, 8 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push44=, $2, 8 +; SIMD128-VM-FAST-NEXT: i32.const $push90=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push45=, $pop44, $pop90 +; SIMD128-VM-FAST-NEXT: i32.shl $push47=, $pop46, $pop45 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push48=, $pop43, 8, $pop47 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push51=, $0, 9 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push49=, $2, 9 +; SIMD128-VM-FAST-NEXT: i32.const $push89=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push50=, $pop49, $pop89 +; SIMD128-VM-FAST-NEXT: i32.shl $push52=, $pop51, $pop50 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push53=, $pop48, 9, $pop52 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push56=, $0, 10 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push54=, $2, 10 +; SIMD128-VM-FAST-NEXT: i32.const $push88=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push55=, $pop54, $pop88 +; SIMD128-VM-FAST-NEXT: i32.shl $push57=, $pop56, $pop55 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push58=, $pop53, 10, $pop57 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push61=, $0, 11 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push59=, $2, 11 +; SIMD128-VM-FAST-NEXT: i32.const $push87=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push60=, $pop59, $pop87 +; SIMD128-VM-FAST-NEXT: i32.shl $push62=, $pop61, $pop60 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push63=, $pop58, 11, $pop62 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push66=, $0, 12 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push64=, $2, 12 +; SIMD128-VM-FAST-NEXT: i32.const $push86=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push65=, $pop64, $pop86 +; SIMD128-VM-FAST-NEXT: i32.shl $push67=, $pop66, $pop65 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push68=, $pop63, 12, $pop67 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push71=, $0, 13 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push69=, $2, 13 +; SIMD128-VM-FAST-NEXT: i32.const $push85=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push70=, $pop69, $pop85 +; SIMD128-VM-FAST-NEXT: i32.shl $push72=, $pop71, $pop70 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push73=, $pop68, 13, $pop72 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push76=, $0, 14 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push74=, $2, 14 +; SIMD128-VM-FAST-NEXT: i32.const $push84=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push75=, $pop74, $pop84 +; SIMD128-VM-FAST-NEXT: i32.shl $push77=, $pop76, $pop75 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push78=, $pop73, 14, $pop77 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push81=, $0, 15 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push79=, $2, 15 +; SIMD128-VM-FAST-NEXT: i32.const $push83=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push80=, $pop79, $pop83 +; SIMD128-VM-FAST-NEXT: i32.shl $push82=, $pop81, $pop80 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push0=, $pop78, 15, $pop82 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shl_v16i8: +; NO-SIMD128-SLOW: .functype shl_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push40=, $17, $pop2 +; NO-SIMD128-SLOW-NEXT: local.tee $push39=, $17=, $pop40 +; NO-SIMD128-SLOW-NEXT: i32.shl $push3=, $16, $pop39 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop1), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.shl $push6=, $15, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop5), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.shl $push9=, $14, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push10=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-SLOW-NEXT: i32.shl $push12=, $13, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-SLOW-NEXT: i32.const $push13=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-SLOW-NEXT: i32.shl $push15=, $12, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-SLOW-NEXT: i32.const $push16=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push18=, $11, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop17), $pop18 +; NO-SIMD128-SLOW-NEXT: i32.const $push19=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.shl $push21=, $10, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop20), $pop21 +; NO-SIMD128-SLOW-NEXT: i32.shl $push22=, $9, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push23=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-SLOW-NEXT: i32.shl $push25=, $8, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-SLOW-NEXT: i32.const $push26=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-SLOW-NEXT: i32.shl $push28=, $7, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-SLOW-NEXT: i32.const $push29=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-SLOW-NEXT: i32.shl $push31=, $6, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-SLOW-NEXT: i32.shl $push32=, $5, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop32 +; NO-SIMD128-SLOW-NEXT: i32.const $push33=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-SLOW-NEXT: i32.shl $push35=, $4, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop34), $pop35 +; NO-SIMD128-SLOW-NEXT: i32.shl $push36=, $3, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop36 +; NO-SIMD128-SLOW-NEXT: i32.shl $push37=, $2, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop37 +; NO-SIMD128-SLOW-NEXT: i32.shl $push38=, $1, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop38 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_v16i8: +; NO-SIMD128-FAST: .functype shl_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push39=, $17=, $pop40 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.shl $push17=, $9, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $10, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.shl $push23=, $11, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $12, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.shl $push29=, $13, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-FAST-NEXT: i32.shl $push32=, $14, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.shl $push35=, $15, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-FAST-NEXT: i32.shl $push38=, $16, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, <16 x i32> %a } -; CHECK-LABEL: shl_const_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shl_const_v16i8 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5 -; SIMD128-NEXT: i8x16.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shl_const_v16i8(<16 x i8> %v) { +; SIMD128-UNIMPL-SLOW-LABEL: shl_const_v16i8: +; SIMD128-UNIMPL-SLOW: .functype shl_const_v16i8 (v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i32.const $push0=, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.shl $push1=, $0, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop1 +; +; SIMD128-UNIMPL-FAST-LABEL: shl_const_v16i8: +; SIMD128-UNIMPL-FAST: .functype shl_const_v16i8 (v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i32.const $push1=, 5 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.shl $push0=, $0, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shl_const_v16i8: +; SIMD128-VM-SLOW: .functype shl_const_v16i8 (v128) -> (v128) +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push3=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push1=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push4=, $pop3, $pop1 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push5=, $pop4 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push0=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push63=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push2=, $pop0, $pop63 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push6=, $pop5, 1, $pop2 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push7=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push62=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push8=, $pop7, $pop62 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push9=, $pop6, 2, $pop8 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push10=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push61=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push11=, $pop10, $pop61 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push12=, $pop9, 3, $pop11 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push13=, $0, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push60=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push14=, $pop13, $pop60 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push15=, $pop12, 4, $pop14 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push16=, $0, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push59=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push17=, $pop16, $pop59 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push18=, $pop15, 5, $pop17 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push19=, $0, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push58=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push20=, $pop19, $pop58 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push21=, $pop18, 6, $pop20 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push22=, $0, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push57=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push23=, $pop22, $pop57 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push24=, $pop21, 7, $pop23 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push25=, $0, 8 +; SIMD128-VM-SLOW-NEXT: i32.const $push56=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push26=, $pop25, $pop56 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push27=, $pop24, 8, $pop26 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push28=, $0, 9 +; SIMD128-VM-SLOW-NEXT: i32.const $push55=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push29=, $pop28, $pop55 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push30=, $pop27, 9, $pop29 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push31=, $0, 10 +; SIMD128-VM-SLOW-NEXT: i32.const $push54=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push32=, $pop31, $pop54 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push33=, $pop30, 10, $pop32 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push34=, $0, 11 +; SIMD128-VM-SLOW-NEXT: i32.const $push53=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push35=, $pop34, $pop53 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push36=, $pop33, 11, $pop35 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push37=, $0, 12 +; SIMD128-VM-SLOW-NEXT: i32.const $push52=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push38=, $pop37, $pop52 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push39=, $pop36, 12, $pop38 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push40=, $0, 13 +; SIMD128-VM-SLOW-NEXT: i32.const $push51=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push41=, $pop40, $pop51 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push42=, $pop39, 13, $pop41 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push43=, $0, 14 +; SIMD128-VM-SLOW-NEXT: i32.const $push50=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push44=, $pop43, $pop50 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push45=, $pop42, 14, $pop44 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push46=, $0, 15 +; SIMD128-VM-SLOW-NEXT: i32.const $push49=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push47=, $pop46, $pop49 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push48=, $pop45, 15, $pop47 +; SIMD128-VM-SLOW-NEXT: return $pop48 +; +; SIMD128-VM-FAST-LABEL: shl_const_v16i8: +; SIMD128-VM-FAST: .functype shl_const_v16i8 (v128) -> (v128) +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push4=, $0, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push2=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push5=, $pop4, $pop2 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push6=, $pop5 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push1=, $0, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push63=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push3=, $pop1, $pop63 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push7=, $pop6, 1, $pop3 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push8=, $0, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push62=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push9=, $pop8, $pop62 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push10=, $pop7, 2, $pop9 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push11=, $0, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push61=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push12=, $pop11, $pop61 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push13=, $pop10, 3, $pop12 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push14=, $0, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push60=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push15=, $pop14, $pop60 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push16=, $pop13, 4, $pop15 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push17=, $0, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push59=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push18=, $pop17, $pop59 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push19=, $pop16, 5, $pop18 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push20=, $0, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push58=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push21=, $pop20, $pop58 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push22=, $pop19, 6, $pop21 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push23=, $0, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push57=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push24=, $pop23, $pop57 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push25=, $pop22, 7, $pop24 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push26=, $0, 8 +; SIMD128-VM-FAST-NEXT: i32.const $push56=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push27=, $pop26, $pop56 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push28=, $pop25, 8, $pop27 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push29=, $0, 9 +; SIMD128-VM-FAST-NEXT: i32.const $push55=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push30=, $pop29, $pop55 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push31=, $pop28, 9, $pop30 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push32=, $0, 10 +; SIMD128-VM-FAST-NEXT: i32.const $push54=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push33=, $pop32, $pop54 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push34=, $pop31, 10, $pop33 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push35=, $0, 11 +; SIMD128-VM-FAST-NEXT: i32.const $push53=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push36=, $pop35, $pop53 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push37=, $pop34, 11, $pop36 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push38=, $0, 12 +; SIMD128-VM-FAST-NEXT: i32.const $push52=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push39=, $pop38, $pop52 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push40=, $pop37, 12, $pop39 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push41=, $0, 13 +; SIMD128-VM-FAST-NEXT: i32.const $push51=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push42=, $pop41, $pop51 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push43=, $pop40, 13, $pop42 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push44=, $0, 14 +; SIMD128-VM-FAST-NEXT: i32.const $push50=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push45=, $pop44, $pop50 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push46=, $pop43, 14, $pop45 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push47=, $0, 15 +; SIMD128-VM-FAST-NEXT: i32.const $push49=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push48=, $pop47, $pop49 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push0=, $pop46, 15, $pop48 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shl_const_v16i8: +; NO-SIMD128-SLOW: .functype shl_const_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push1=, $16, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop3), $pop1 +; NO-SIMD128-SLOW-NEXT: i32.const $push5=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push53=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push4=, $15, $pop53 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop6), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push8=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-SLOW-NEXT: i32.const $push52=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push7=, $14, $pop52 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop9), $pop7 +; NO-SIMD128-SLOW-NEXT: i32.const $push11=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.const $push51=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push10=, $13, $pop51 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop12), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.const $push14=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-SLOW-NEXT: i32.const $push50=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push13=, $12, $pop50 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop15), $pop13 +; NO-SIMD128-SLOW-NEXT: i32.const $push17=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-SLOW-NEXT: i32.const $push49=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push16=, $11, $pop49 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-SLOW-NEXT: i32.const $push20=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-SLOW-NEXT: i32.const $push48=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push19=, $10, $pop48 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop21), $pop19 +; NO-SIMD128-SLOW-NEXT: i32.const $push47=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push22=, $9, $pop47 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push24=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-SLOW-NEXT: i32.const $push46=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push23=, $8, $pop46 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop25), $pop23 +; NO-SIMD128-SLOW-NEXT: i32.const $push27=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-SLOW-NEXT: i32.const $push45=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push26=, $7, $pop45 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-SLOW-NEXT: i32.const $push44=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push30=, $0, $pop44 +; NO-SIMD128-SLOW-NEXT: i32.const $push43=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push29=, $6, $pop43 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop30), $pop29 +; NO-SIMD128-SLOW-NEXT: i32.const $push42=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push31=, $5, $pop42 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop31 +; NO-SIMD128-SLOW-NEXT: i32.const $push33=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-SLOW-NEXT: i32.const $push41=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push32=, $4, $pop41 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-SLOW-NEXT: i32.const $push40=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push35=, $3, $pop40 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop35 +; NO-SIMD128-SLOW-NEXT: i32.const $push39=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push36=, $2, $pop39 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop36 +; NO-SIMD128-SLOW-NEXT: i32.const $push38=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push37=, $1, $pop38 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop37 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_const_v16i8: +; NO-SIMD128-FAST: .functype shl_const_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop53 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop52 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $pop50 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push48=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $6, $pop48 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $7, $pop47 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $8, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $9, $pop45 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push19=, $10, $pop44 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push22=, $11, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push25=, $12, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push28=, $13, $pop41 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push31=, $14, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push34=, $15, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push37=, $16, $pop38 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: return %a = shl <16 x i8> %v, ret <16 x i8> %a } -; CHECK-LABEL: shl_vec_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shl_vec_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}} -; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}} -; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}} -; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}} -; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; Skip 14 lanes -; SIMD128: i8x16.extract_lane_s $push[[L4:[0-9]+]]=, $0, 15{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}} -; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 15, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { +; SIMD128-UNIMPL-SLOW-LABEL: shl_vec_v16i8: +; SIMD128-UNIMPL-SLOW: .functype shl_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push6=, $0, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i32.const $push0=, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.splat $push1=, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: v128.and $push67=, $1, $pop1 +; SIMD128-UNIMPL-SLOW-NEXT: local.tee $push66=, $1=, $pop67 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push5=, $pop66, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push7=, $pop6, $pop5 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.splat $push8=, $pop7 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push3=, $0, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push2=, $1, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push4=, $pop3, $pop2 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push9=, $pop8, 1, $pop4 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push11=, $0, 2 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push10=, $1, 2 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push12=, $pop11, $pop10 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push13=, $pop9, 2, $pop12 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push15=, $0, 3 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push14=, $1, 3 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push16=, $pop15, $pop14 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push17=, $pop13, 3, $pop16 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push19=, $0, 4 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push18=, $1, 4 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push20=, $pop19, $pop18 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push21=, $pop17, 4, $pop20 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push23=, $0, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push22=, $1, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push24=, $pop23, $pop22 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push25=, $pop21, 5, $pop24 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push27=, $0, 6 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push26=, $1, 6 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push28=, $pop27, $pop26 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push29=, $pop25, 6, $pop28 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push31=, $0, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push30=, $1, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push32=, $pop31, $pop30 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push33=, $pop29, 7, $pop32 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push35=, $0, 8 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push34=, $1, 8 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push36=, $pop35, $pop34 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push37=, $pop33, 8, $pop36 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push39=, $0, 9 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push38=, $1, 9 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push40=, $pop39, $pop38 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push41=, $pop37, 9, $pop40 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push43=, $0, 10 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push42=, $1, 10 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push44=, $pop43, $pop42 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push45=, $pop41, 10, $pop44 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push47=, $0, 11 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push46=, $1, 11 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push48=, $pop47, $pop46 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push49=, $pop45, 11, $pop48 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push51=, $0, 12 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push50=, $1, 12 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push52=, $pop51, $pop50 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push53=, $pop49, 12, $pop52 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push55=, $0, 13 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push54=, $1, 13 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push56=, $pop55, $pop54 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push57=, $pop53, 13, $pop56 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push59=, $0, 14 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push58=, $1, 14 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push60=, $pop59, $pop58 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push61=, $pop57, 14, $pop60 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push63=, $0, 15 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push62=, $1, 15 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push64=, $pop63, $pop62 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push65=, $pop61, 15, $pop64 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop65 +; +; SIMD128-UNIMPL-FAST-LABEL: shl_vec_v16i8: +; SIMD128-UNIMPL-FAST: .functype shl_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push7=, $0, 0 +; SIMD128-UNIMPL-FAST-NEXT: i32.const $push1=, 7 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.splat $push2=, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: v128.and $push67=, $1, $pop2 +; SIMD128-UNIMPL-FAST-NEXT: local.tee $push66=, $1=, $pop67 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push6=, $pop66, 0 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push8=, $pop7, $pop6 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.splat $push9=, $pop8 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push4=, $0, 1 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push3=, $1, 1 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push5=, $pop4, $pop3 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push10=, $pop9, 1, $pop5 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push12=, $0, 2 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push11=, $1, 2 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push13=, $pop12, $pop11 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push14=, $pop10, 2, $pop13 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push16=, $0, 3 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push15=, $1, 3 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push17=, $pop16, $pop15 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push18=, $pop14, 3, $pop17 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push20=, $0, 4 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push19=, $1, 4 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push21=, $pop20, $pop19 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push22=, $pop18, 4, $pop21 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push24=, $0, 5 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push23=, $1, 5 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push25=, $pop24, $pop23 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push26=, $pop22, 5, $pop25 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push28=, $0, 6 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push27=, $1, 6 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push29=, $pop28, $pop27 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push30=, $pop26, 6, $pop29 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push32=, $0, 7 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push31=, $1, 7 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push33=, $pop32, $pop31 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push34=, $pop30, 7, $pop33 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push36=, $0, 8 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push35=, $1, 8 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push37=, $pop36, $pop35 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push38=, $pop34, 8, $pop37 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push40=, $0, 9 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push39=, $1, 9 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push41=, $pop40, $pop39 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push42=, $pop38, 9, $pop41 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push44=, $0, 10 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push43=, $1, 10 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push45=, $pop44, $pop43 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push46=, $pop42, 10, $pop45 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push48=, $0, 11 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push47=, $1, 11 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push49=, $pop48, $pop47 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push50=, $pop46, 11, $pop49 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push52=, $0, 12 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push51=, $1, 12 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push53=, $pop52, $pop51 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push54=, $pop50, 12, $pop53 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push56=, $0, 13 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push55=, $1, 13 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push57=, $pop56, $pop55 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push58=, $pop54, 13, $pop57 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push60=, $0, 14 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push59=, $1, 14 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push61=, $pop60, $pop59 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push62=, $pop58, 14, $pop61 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push64=, $0, 15 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push63=, $1, 15 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push65=, $pop64, $pop63 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push0=, $pop62, 15, $pop65 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shl_vec_v16i8: +; SIMD128-VM-SLOW: .functype shl_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push8=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push0=, 7 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push1=, $pop0 +; SIMD128-VM-SLOW-NEXT: v128.and $push99=, $1, $pop1 +; SIMD128-VM-SLOW-NEXT: local.tee $push98=, $1=, $pop99 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push6=, $pop98, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push97=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push7=, $pop6, $pop97 +; SIMD128-VM-SLOW-NEXT: i32.shl $push9=, $pop8, $pop7 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push10=, $pop9 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push4=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push2=, $1, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push96=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push3=, $pop2, $pop96 +; SIMD128-VM-SLOW-NEXT: i32.shl $push5=, $pop4, $pop3 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push11=, $pop10, 1, $pop5 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push14=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push12=, $1, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push95=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push13=, $pop12, $pop95 +; SIMD128-VM-SLOW-NEXT: i32.shl $push15=, $pop14, $pop13 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push16=, $pop11, 2, $pop15 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push19=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push17=, $1, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push94=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push18=, $pop17, $pop94 +; SIMD128-VM-SLOW-NEXT: i32.shl $push20=, $pop19, $pop18 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push21=, $pop16, 3, $pop20 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push24=, $0, 4 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push22=, $1, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push93=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push23=, $pop22, $pop93 +; SIMD128-VM-SLOW-NEXT: i32.shl $push25=, $pop24, $pop23 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push26=, $pop21, 4, $pop25 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push29=, $0, 5 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push27=, $1, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push92=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push28=, $pop27, $pop92 +; SIMD128-VM-SLOW-NEXT: i32.shl $push30=, $pop29, $pop28 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push31=, $pop26, 5, $pop30 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push34=, $0, 6 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push32=, $1, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push91=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push33=, $pop32, $pop91 +; SIMD128-VM-SLOW-NEXT: i32.shl $push35=, $pop34, $pop33 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push36=, $pop31, 6, $pop35 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push39=, $0, 7 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push37=, $1, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push90=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push38=, $pop37, $pop90 +; SIMD128-VM-SLOW-NEXT: i32.shl $push40=, $pop39, $pop38 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push41=, $pop36, 7, $pop40 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push44=, $0, 8 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push42=, $1, 8 +; SIMD128-VM-SLOW-NEXT: i32.const $push89=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push43=, $pop42, $pop89 +; SIMD128-VM-SLOW-NEXT: i32.shl $push45=, $pop44, $pop43 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push46=, $pop41, 8, $pop45 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push49=, $0, 9 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push47=, $1, 9 +; SIMD128-VM-SLOW-NEXT: i32.const $push88=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push48=, $pop47, $pop88 +; SIMD128-VM-SLOW-NEXT: i32.shl $push50=, $pop49, $pop48 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push51=, $pop46, 9, $pop50 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push54=, $0, 10 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push52=, $1, 10 +; SIMD128-VM-SLOW-NEXT: i32.const $push87=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push53=, $pop52, $pop87 +; SIMD128-VM-SLOW-NEXT: i32.shl $push55=, $pop54, $pop53 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push56=, $pop51, 10, $pop55 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push59=, $0, 11 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push57=, $1, 11 +; SIMD128-VM-SLOW-NEXT: i32.const $push86=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push58=, $pop57, $pop86 +; SIMD128-VM-SLOW-NEXT: i32.shl $push60=, $pop59, $pop58 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push61=, $pop56, 11, $pop60 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push64=, $0, 12 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push62=, $1, 12 +; SIMD128-VM-SLOW-NEXT: i32.const $push85=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push63=, $pop62, $pop85 +; SIMD128-VM-SLOW-NEXT: i32.shl $push65=, $pop64, $pop63 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push66=, $pop61, 12, $pop65 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push69=, $0, 13 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push67=, $1, 13 +; SIMD128-VM-SLOW-NEXT: i32.const $push84=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push68=, $pop67, $pop84 +; SIMD128-VM-SLOW-NEXT: i32.shl $push70=, $pop69, $pop68 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push71=, $pop66, 13, $pop70 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push74=, $0, 14 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push72=, $1, 14 +; SIMD128-VM-SLOW-NEXT: i32.const $push83=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push73=, $pop72, $pop83 +; SIMD128-VM-SLOW-NEXT: i32.shl $push75=, $pop74, $pop73 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push76=, $pop71, 14, $pop75 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push79=, $0, 15 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push77=, $1, 15 +; SIMD128-VM-SLOW-NEXT: i32.const $push82=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push78=, $pop77, $pop82 +; SIMD128-VM-SLOW-NEXT: i32.shl $push80=, $pop79, $pop78 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push81=, $pop76, 15, $pop80 +; SIMD128-VM-SLOW-NEXT: return $pop81 +; +; SIMD128-VM-FAST-LABEL: shl_vec_v16i8: +; SIMD128-VM-FAST: .functype shl_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push9=, $0, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push1=, 7 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push2=, $pop1 +; SIMD128-VM-FAST-NEXT: v128.and $push99=, $1, $pop2 +; SIMD128-VM-FAST-NEXT: local.tee $push98=, $1=, $pop99 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push7=, $pop98, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push97=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push8=, $pop7, $pop97 +; SIMD128-VM-FAST-NEXT: i32.shl $push10=, $pop9, $pop8 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push11=, $pop10 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push5=, $0, 1 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push3=, $1, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push96=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push4=, $pop3, $pop96 +; SIMD128-VM-FAST-NEXT: i32.shl $push6=, $pop5, $pop4 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push12=, $pop11, 1, $pop6 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push15=, $0, 2 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push13=, $1, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push95=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push14=, $pop13, $pop95 +; SIMD128-VM-FAST-NEXT: i32.shl $push16=, $pop15, $pop14 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push17=, $pop12, 2, $pop16 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push20=, $0, 3 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push18=, $1, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push94=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push19=, $pop18, $pop94 +; SIMD128-VM-FAST-NEXT: i32.shl $push21=, $pop20, $pop19 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push22=, $pop17, 3, $pop21 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push25=, $0, 4 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push23=, $1, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push93=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push24=, $pop23, $pop93 +; SIMD128-VM-FAST-NEXT: i32.shl $push26=, $pop25, $pop24 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push27=, $pop22, 4, $pop26 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push30=, $0, 5 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push28=, $1, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push92=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push29=, $pop28, $pop92 +; SIMD128-VM-FAST-NEXT: i32.shl $push31=, $pop30, $pop29 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push32=, $pop27, 5, $pop31 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push35=, $0, 6 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push33=, $1, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push91=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push34=, $pop33, $pop91 +; SIMD128-VM-FAST-NEXT: i32.shl $push36=, $pop35, $pop34 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push37=, $pop32, 6, $pop36 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push40=, $0, 7 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push38=, $1, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push90=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push39=, $pop38, $pop90 +; SIMD128-VM-FAST-NEXT: i32.shl $push41=, $pop40, $pop39 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push42=, $pop37, 7, $pop41 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push45=, $0, 8 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push43=, $1, 8 +; SIMD128-VM-FAST-NEXT: i32.const $push89=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push44=, $pop43, $pop89 +; SIMD128-VM-FAST-NEXT: i32.shl $push46=, $pop45, $pop44 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push47=, $pop42, 8, $pop46 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push50=, $0, 9 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push48=, $1, 9 +; SIMD128-VM-FAST-NEXT: i32.const $push88=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push49=, $pop48, $pop88 +; SIMD128-VM-FAST-NEXT: i32.shl $push51=, $pop50, $pop49 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push52=, $pop47, 9, $pop51 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push55=, $0, 10 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push53=, $1, 10 +; SIMD128-VM-FAST-NEXT: i32.const $push87=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push54=, $pop53, $pop87 +; SIMD128-VM-FAST-NEXT: i32.shl $push56=, $pop55, $pop54 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push57=, $pop52, 10, $pop56 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push60=, $0, 11 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push58=, $1, 11 +; SIMD128-VM-FAST-NEXT: i32.const $push86=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push59=, $pop58, $pop86 +; SIMD128-VM-FAST-NEXT: i32.shl $push61=, $pop60, $pop59 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push62=, $pop57, 11, $pop61 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push65=, $0, 12 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push63=, $1, 12 +; SIMD128-VM-FAST-NEXT: i32.const $push85=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push64=, $pop63, $pop85 +; SIMD128-VM-FAST-NEXT: i32.shl $push66=, $pop65, $pop64 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push67=, $pop62, 12, $pop66 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push70=, $0, 13 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push68=, $1, 13 +; SIMD128-VM-FAST-NEXT: i32.const $push84=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push69=, $pop68, $pop84 +; SIMD128-VM-FAST-NEXT: i32.shl $push71=, $pop70, $pop69 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push72=, $pop67, 13, $pop71 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push75=, $0, 14 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push73=, $1, 14 +; SIMD128-VM-FAST-NEXT: i32.const $push83=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push74=, $pop73, $pop83 +; SIMD128-VM-FAST-NEXT: i32.shl $push76=, $pop75, $pop74 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push77=, $pop72, 14, $pop76 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push80=, $0, 15 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push78=, $1, 15 +; SIMD128-VM-FAST-NEXT: i32.const $push82=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push79=, $pop78, $pop82 +; SIMD128-VM-FAST-NEXT: i32.shl $push81=, $pop80, $pop79 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push0=, $pop77, 15, $pop81 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shl_vec_v16i8: +; NO-SIMD128-SLOW: .functype shl_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push3=, $32, $pop2 +; NO-SIMD128-SLOW-NEXT: i32.shl $push4=, $16, $pop3 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop1), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push5=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push69=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push7=, $31, $pop69 +; NO-SIMD128-SLOW-NEXT: i32.shl $push8=, $15, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop6), $pop8 +; NO-SIMD128-SLOW-NEXT: i32.const $push9=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push68=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push11=, $30, $pop68 +; NO-SIMD128-SLOW-NEXT: i32.shl $push12=, $14, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop10), $pop12 +; NO-SIMD128-SLOW-NEXT: i32.const $push13=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-SLOW-NEXT: i32.const $push67=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push15=, $29, $pop67 +; NO-SIMD128-SLOW-NEXT: i32.shl $push16=, $13, $pop15 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop14), $pop16 +; NO-SIMD128-SLOW-NEXT: i32.const $push17=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-SLOW-NEXT: i32.const $push66=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push19=, $28, $pop66 +; NO-SIMD128-SLOW-NEXT: i32.shl $push20=, $12, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop18), $pop20 +; NO-SIMD128-SLOW-NEXT: i32.const $push21=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-SLOW-NEXT: i32.const $push65=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push23=, $27, $pop65 +; NO-SIMD128-SLOW-NEXT: i32.shl $push24=, $11, $pop23 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop22), $pop24 +; NO-SIMD128-SLOW-NEXT: i32.const $push25=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push26=, $0, $pop25 +; NO-SIMD128-SLOW-NEXT: i32.const $push64=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push27=, $26, $pop64 +; NO-SIMD128-SLOW-NEXT: i32.shl $push28=, $10, $pop27 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop26), $pop28 +; NO-SIMD128-SLOW-NEXT: i32.const $push63=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push29=, $25, $pop63 +; NO-SIMD128-SLOW-NEXT: i32.shl $push30=, $9, $pop29 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop30 +; NO-SIMD128-SLOW-NEXT: i32.const $push31=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-SLOW-NEXT: i32.const $push62=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push33=, $24, $pop62 +; NO-SIMD128-SLOW-NEXT: i32.shl $push34=, $8, $pop33 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop32), $pop34 +; NO-SIMD128-SLOW-NEXT: i32.const $push35=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-SLOW-NEXT: i32.const $push61=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push37=, $23, $pop61 +; NO-SIMD128-SLOW-NEXT: i32.shl $push38=, $7, $pop37 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop36), $pop38 +; NO-SIMD128-SLOW-NEXT: i32.const $push39=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-SLOW-NEXT: i32.const $push60=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push41=, $22, $pop60 +; NO-SIMD128-SLOW-NEXT: i32.shl $push42=, $6, $pop41 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop40), $pop42 +; NO-SIMD128-SLOW-NEXT: i32.const $push59=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push43=, $21, $pop59 +; NO-SIMD128-SLOW-NEXT: i32.shl $push44=, $5, $pop43 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop44 +; NO-SIMD128-SLOW-NEXT: i32.const $push45=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push46=, $0, $pop45 +; NO-SIMD128-SLOW-NEXT: i32.const $push58=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push47=, $20, $pop58 +; NO-SIMD128-SLOW-NEXT: i32.shl $push48=, $4, $pop47 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop46), $pop48 +; NO-SIMD128-SLOW-NEXT: i32.const $push57=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push49=, $19, $pop57 +; NO-SIMD128-SLOW-NEXT: i32.shl $push50=, $3, $pop49 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop50 +; NO-SIMD128-SLOW-NEXT: i32.const $push56=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push51=, $18, $pop56 +; NO-SIMD128-SLOW-NEXT: i32.shl $push52=, $2, $pop51 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop52 +; NO-SIMD128-SLOW-NEXT: i32.const $push55=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push53=, $17, $pop55 +; NO-SIMD128-SLOW-NEXT: i32.shl $push54=, $1, $pop53 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop54 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_vec_v16i8: +; NO-SIMD128-FAST: .functype shl_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $18, $pop69 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $19, $pop68 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $3, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $20, $pop67 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $21, $pop66 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $5, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $22, $pop65 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $6, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop64 +; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $7, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $24, $pop63 +; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $8, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $25, $pop62 +; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $9, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $26, $pop61 +; NO-SIMD128-FAST-NEXT: i32.shl $push30=, $10, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $27, $pop60 +; NO-SIMD128-FAST-NEXT: i32.shl $push34=, $11, $pop33 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $28, $pop59 +; NO-SIMD128-FAST-NEXT: i32.shl $push38=, $12, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $29, $pop58 +; NO-SIMD128-FAST-NEXT: i32.shl $push42=, $13, $pop41 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $30, $pop57 +; NO-SIMD128-FAST-NEXT: i32.shl $push46=, $14, $pop45 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $31, $pop56 +; NO-SIMD128-FAST-NEXT: i32.shl $push50=, $15, $pop49 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $32, $pop55 +; NO-SIMD128-FAST-NEXT: i32.shl $push54=, $16, $pop53 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54 +; NO-SIMD128-FAST-NEXT: return %a = shl <16 x i8> %v, %x ret <16 x i8> %a } -; CHECK-LABEL: shr_s_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shr_s_v16i8 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) { +; SIMD128-UNIMPL-LABEL: shr_s_v16i8: +; SIMD128-UNIMPL: .functype shr_s_v16i8 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i8x16.shr_s $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shr_s_v16i8: +; SIMD128-VM-SLOW: .functype shr_s_v16i8 (v128, i32) -> (v128) +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push9=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push2=, $1 +; SIMD128-VM-SLOW-NEXT: i32.const $push0=, 7 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push1=, $pop0 +; SIMD128-VM-SLOW-NEXT: v128.and $push100=, $pop2, $pop1 +; SIMD128-VM-SLOW-NEXT: local.tee $push99=, $2=, $pop100 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push7=, $pop99, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push98=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push8=, $pop7, $pop98 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push10=, $pop9, $pop8 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push11=, $pop10 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push5=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push3=, $2, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push97=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push4=, $pop3, $pop97 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push6=, $pop5, $pop4 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push12=, $pop11, 1, $pop6 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push15=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push13=, $2, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push96=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push14=, $pop13, $pop96 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push16=, $pop15, $pop14 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push17=, $pop12, 2, $pop16 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push20=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push18=, $2, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push95=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push19=, $pop18, $pop95 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push22=, $pop17, 3, $pop21 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push25=, $0, 4 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push23=, $2, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push94=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push24=, $pop23, $pop94 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push26=, $pop25, $pop24 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push27=, $pop22, 4, $pop26 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push30=, $0, 5 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push28=, $2, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push93=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push29=, $pop28, $pop93 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push31=, $pop30, $pop29 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push32=, $pop27, 5, $pop31 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push35=, $0, 6 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push33=, $2, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push92=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push34=, $pop33, $pop92 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push36=, $pop35, $pop34 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push37=, $pop32, 6, $pop36 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push40=, $0, 7 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push38=, $2, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push91=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push39=, $pop38, $pop91 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push41=, $pop40, $pop39 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push42=, $pop37, 7, $pop41 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push45=, $0, 8 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push43=, $2, 8 +; SIMD128-VM-SLOW-NEXT: i32.const $push90=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push44=, $pop43, $pop90 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push46=, $pop45, $pop44 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push47=, $pop42, 8, $pop46 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push50=, $0, 9 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push48=, $2, 9 +; SIMD128-VM-SLOW-NEXT: i32.const $push89=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push49=, $pop48, $pop89 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push51=, $pop50, $pop49 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push52=, $pop47, 9, $pop51 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push55=, $0, 10 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push53=, $2, 10 +; SIMD128-VM-SLOW-NEXT: i32.const $push88=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push54=, $pop53, $pop88 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push56=, $pop55, $pop54 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push57=, $pop52, 10, $pop56 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push60=, $0, 11 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push58=, $2, 11 +; SIMD128-VM-SLOW-NEXT: i32.const $push87=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push59=, $pop58, $pop87 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push61=, $pop60, $pop59 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push62=, $pop57, 11, $pop61 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push65=, $0, 12 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push63=, $2, 12 +; SIMD128-VM-SLOW-NEXT: i32.const $push86=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push64=, $pop63, $pop86 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push66=, $pop65, $pop64 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push67=, $pop62, 12, $pop66 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push70=, $0, 13 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push68=, $2, 13 +; SIMD128-VM-SLOW-NEXT: i32.const $push85=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push69=, $pop68, $pop85 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push71=, $pop70, $pop69 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push72=, $pop67, 13, $pop71 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push75=, $0, 14 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push73=, $2, 14 +; SIMD128-VM-SLOW-NEXT: i32.const $push84=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push74=, $pop73, $pop84 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push76=, $pop75, $pop74 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push77=, $pop72, 14, $pop76 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push80=, $0, 15 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push78=, $2, 15 +; SIMD128-VM-SLOW-NEXT: i32.const $push83=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push79=, $pop78, $pop83 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push81=, $pop80, $pop79 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push82=, $pop77, 15, $pop81 +; SIMD128-VM-SLOW-NEXT: return $pop82 +; +; SIMD128-VM-FAST-LABEL: shr_s_v16i8: +; SIMD128-VM-FAST: .functype shr_s_v16i8 (v128, i32) -> (v128) +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push10=, $0, 0 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push1=, $1 +; SIMD128-VM-FAST-NEXT: i32.const $push2=, 7 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push3=, $pop2 +; SIMD128-VM-FAST-NEXT: v128.and $push100=, $pop1, $pop3 +; SIMD128-VM-FAST-NEXT: local.tee $push99=, $2=, $pop100 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push8=, $pop99, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push98=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push9=, $pop8, $pop98 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push11=, $pop10, $pop9 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push12=, $pop11 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push6=, $0, 1 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push4=, $2, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push97=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push5=, $pop4, $pop97 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push7=, $pop6, $pop5 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push13=, $pop12, 1, $pop7 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push16=, $0, 2 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push14=, $2, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push96=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push15=, $pop14, $pop96 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop15 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push18=, $pop13, 2, $pop17 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push21=, $0, 3 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push19=, $2, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push95=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push20=, $pop19, $pop95 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push22=, $pop21, $pop20 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push23=, $pop18, 3, $pop22 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push26=, $0, 4 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push24=, $2, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push94=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push25=, $pop24, $pop94 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push27=, $pop26, $pop25 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push28=, $pop23, 4, $pop27 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push31=, $0, 5 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push29=, $2, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push93=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push30=, $pop29, $pop93 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push32=, $pop31, $pop30 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push33=, $pop28, 5, $pop32 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push36=, $0, 6 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push34=, $2, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push92=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push35=, $pop34, $pop92 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push37=, $pop36, $pop35 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push38=, $pop33, 6, $pop37 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push41=, $0, 7 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push39=, $2, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push91=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push40=, $pop39, $pop91 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push42=, $pop41, $pop40 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push43=, $pop38, 7, $pop42 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push46=, $0, 8 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push44=, $2, 8 +; SIMD128-VM-FAST-NEXT: i32.const $push90=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push45=, $pop44, $pop90 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push47=, $pop46, $pop45 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push48=, $pop43, 8, $pop47 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push51=, $0, 9 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push49=, $2, 9 +; SIMD128-VM-FAST-NEXT: i32.const $push89=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push50=, $pop49, $pop89 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push52=, $pop51, $pop50 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push53=, $pop48, 9, $pop52 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push56=, $0, 10 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push54=, $2, 10 +; SIMD128-VM-FAST-NEXT: i32.const $push88=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push55=, $pop54, $pop88 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push57=, $pop56, $pop55 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push58=, $pop53, 10, $pop57 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push61=, $0, 11 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push59=, $2, 11 +; SIMD128-VM-FAST-NEXT: i32.const $push87=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push60=, $pop59, $pop87 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push62=, $pop61, $pop60 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push63=, $pop58, 11, $pop62 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push66=, $0, 12 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push64=, $2, 12 +; SIMD128-VM-FAST-NEXT: i32.const $push86=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push65=, $pop64, $pop86 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push67=, $pop66, $pop65 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push68=, $pop63, 12, $pop67 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push71=, $0, 13 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push69=, $2, 13 +; SIMD128-VM-FAST-NEXT: i32.const $push85=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push70=, $pop69, $pop85 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push72=, $pop71, $pop70 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push73=, $pop68, 13, $pop72 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push76=, $0, 14 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push74=, $2, 14 +; SIMD128-VM-FAST-NEXT: i32.const $push84=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push75=, $pop74, $pop84 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push77=, $pop76, $pop75 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push78=, $pop73, 14, $pop77 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push81=, $0, 15 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push79=, $2, 15 +; SIMD128-VM-FAST-NEXT: i32.const $push83=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push80=, $pop79, $pop83 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push82=, $pop81, $pop80 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push0=, $pop78, 15, $pop82 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shr_s_v16i8: +; NO-SIMD128-SLOW: .functype shr_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push5=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push2=, $16, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.const $push104=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push3=, $pop2, $pop104 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push103=, $17, $pop0 +; NO-SIMD128-SLOW-NEXT: local.tee $push102=, $16=, $pop103 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push4=, $pop3, $pop102 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop6), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push10=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-SLOW-NEXT: i32.const $push101=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push7=, $15, $pop101 +; NO-SIMD128-SLOW-NEXT: i32.const $push100=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push8=, $pop7, $pop100 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push9=, $pop8, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop11), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push15=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-SLOW-NEXT: i32.const $push99=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push12=, $14, $pop99 +; NO-SIMD128-SLOW-NEXT: i32.const $push98=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push13=, $pop12, $pop98 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push14=, $pop13, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop16), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.const $push20=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-SLOW-NEXT: i32.const $push97=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push17=, $13, $pop97 +; NO-SIMD128-SLOW-NEXT: i32.const $push96=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push18=, $pop17, $pop96 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push19=, $pop18, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop21), $pop19 +; NO-SIMD128-SLOW-NEXT: i32.const $push25=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push26=, $0, $pop25 +; NO-SIMD128-SLOW-NEXT: i32.const $push95=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push22=, $12, $pop95 +; NO-SIMD128-SLOW-NEXT: i32.const $push94=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push23=, $pop22, $pop94 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push24=, $pop23, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop26), $pop24 +; NO-SIMD128-SLOW-NEXT: i32.const $push30=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-SLOW-NEXT: i32.const $push93=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push27=, $11, $pop93 +; NO-SIMD128-SLOW-NEXT: i32.const $push92=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push28=, $pop27, $pop92 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push29=, $pop28, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop31), $pop29 +; NO-SIMD128-SLOW-NEXT: i32.const $push35=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-SLOW-NEXT: i32.const $push91=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push32=, $10, $pop91 +; NO-SIMD128-SLOW-NEXT: i32.const $push90=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push33=, $pop32, $pop90 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push34=, $pop33, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop36), $pop34 +; NO-SIMD128-SLOW-NEXT: i32.const $push89=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push37=, $9, $pop89 +; NO-SIMD128-SLOW-NEXT: i32.const $push88=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push38=, $pop37, $pop88 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push39=, $pop38, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop39 +; NO-SIMD128-SLOW-NEXT: i32.const $push43=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push44=, $0, $pop43 +; NO-SIMD128-SLOW-NEXT: i32.const $push87=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push40=, $8, $pop87 +; NO-SIMD128-SLOW-NEXT: i32.const $push86=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push41=, $pop40, $pop86 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push42=, $pop41, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop44), $pop42 +; NO-SIMD128-SLOW-NEXT: i32.const $push48=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push49=, $0, $pop48 +; NO-SIMD128-SLOW-NEXT: i32.const $push85=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push45=, $7, $pop85 +; NO-SIMD128-SLOW-NEXT: i32.const $push84=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push46=, $pop45, $pop84 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push47=, $pop46, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop49), $pop47 +; NO-SIMD128-SLOW-NEXT: i32.const $push53=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push54=, $0, $pop53 +; NO-SIMD128-SLOW-NEXT: i32.const $push83=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push50=, $6, $pop83 +; NO-SIMD128-SLOW-NEXT: i32.const $push82=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push51=, $pop50, $pop82 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push52=, $pop51, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-SLOW-NEXT: i32.const $push81=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push55=, $5, $pop81 +; NO-SIMD128-SLOW-NEXT: i32.const $push80=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push56=, $pop55, $pop80 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push57=, $pop56, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop57 +; NO-SIMD128-SLOW-NEXT: i32.const $push61=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push62=, $0, $pop61 +; NO-SIMD128-SLOW-NEXT: i32.const $push79=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push58=, $4, $pop79 +; NO-SIMD128-SLOW-NEXT: i32.const $push78=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push59=, $pop58, $pop78 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push60=, $pop59, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop62), $pop60 +; NO-SIMD128-SLOW-NEXT: i32.const $push77=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push63=, $3, $pop77 +; NO-SIMD128-SLOW-NEXT: i32.const $push76=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push64=, $pop63, $pop76 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push65=, $pop64, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop65 +; NO-SIMD128-SLOW-NEXT: i32.const $push75=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push66=, $2, $pop75 +; NO-SIMD128-SLOW-NEXT: i32.const $push74=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push67=, $pop66, $pop74 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push68=, $pop67, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop68 +; NO-SIMD128-SLOW-NEXT: i32.const $push73=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push69=, $1, $pop73 +; NO-SIMD128-SLOW-NEXT: i32.const $push72=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push70=, $pop69, $pop72 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push71=, $pop70, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop71 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_v16i8: +; NO-SIMD128-FAST: .functype shr_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push1=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push104=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $pop2, $pop104 +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push103=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push102=, $17=, $pop103 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push4=, $pop3, $pop102 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $2, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop100 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push7=, $pop6, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push99=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $3, $pop99 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop98 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $pop9, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push11=, $4, $pop97 +; NO-SIMD128-FAST-NEXT: i32.const $push96=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $pop96 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push13=, $pop12, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $5, $pop95 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop94 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $pop17, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push19=, $6, $pop93 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push20=, $pop19, $pop92 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $pop20, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop23), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $7, $pop91 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push25=, $pop24, $pop90 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push26=, $pop25, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push29=, $8, $pop89 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push30=, $pop29, $pop88 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push31=, $pop30, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push34=, $9, $pop87 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push35=, $pop34, $pop86 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push36=, $pop35, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push41=, $0, $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push37=, $10, $pop85 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push38=, $pop37, $pop84 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push39=, $pop38, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop41), $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push46=, $0, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push42=, $11, $pop83 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push43=, $pop42, $pop82 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push44=, $pop43, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop46), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push51=, $0, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push47=, $12, $pop81 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push48=, $pop47, $pop80 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push49=, $pop48, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop51), $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push52=, $13, $pop79 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push53=, $pop52, $pop78 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push54=, $pop53, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push57=, $14, $pop77 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push58=, $pop57, $pop76 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push59=, $pop58, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop59 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push66=, $0, $pop65 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push62=, $15, $pop75 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push63=, $pop62, $pop74 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push64=, $pop63, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop66), $pop64 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push71=, $0, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push67=, $16, $pop73 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push68=, $pop67, $pop72 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push69=, $pop68, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop71), $pop69 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, <16 x i32> %a } -; CHECK-LABEL: shr_s_vec_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shr_s_vec_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}} -; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}} -; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}} -; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}} -; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; Skip 14 lanes -; SIMD128: i8x16.extract_lane_s $push[[L0:[0-9]+]]=, $0, 15{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $1, 15{{$}} -; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop{{[0-9]+}}, 15, $pop[[L2]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { +; SIMD128-UNIMPL-SLOW-LABEL: shr_s_vec_v16i8: +; SIMD128-UNIMPL-SLOW: .functype shr_s_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push6=, $0, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i32.const $push0=, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.splat $push1=, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: v128.and $push67=, $1, $pop1 +; SIMD128-UNIMPL-SLOW-NEXT: local.tee $push66=, $1=, $pop67 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push5=, $pop66, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push7=, $pop6, $pop5 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.splat $push8=, $pop7 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push3=, $0, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push2=, $1, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push4=, $pop3, $pop2 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push9=, $pop8, 1, $pop4 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push11=, $0, 2 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push10=, $1, 2 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push12=, $pop11, $pop10 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push13=, $pop9, 2, $pop12 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push15=, $0, 3 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push14=, $1, 3 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push16=, $pop15, $pop14 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push17=, $pop13, 3, $pop16 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push19=, $0, 4 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push18=, $1, 4 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push20=, $pop19, $pop18 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push21=, $pop17, 4, $pop20 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push23=, $0, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push22=, $1, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push25=, $pop21, 5, $pop24 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push27=, $0, 6 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push26=, $1, 6 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push28=, $pop27, $pop26 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push29=, $pop25, 6, $pop28 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push31=, $0, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push30=, $1, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push32=, $pop31, $pop30 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push33=, $pop29, 7, $pop32 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push35=, $0, 8 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push34=, $1, 8 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push36=, $pop35, $pop34 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push37=, $pop33, 8, $pop36 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push39=, $0, 9 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push38=, $1, 9 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push40=, $pop39, $pop38 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push41=, $pop37, 9, $pop40 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push43=, $0, 10 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push42=, $1, 10 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push44=, $pop43, $pop42 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push45=, $pop41, 10, $pop44 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push47=, $0, 11 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push46=, $1, 11 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push48=, $pop47, $pop46 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push49=, $pop45, 11, $pop48 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push51=, $0, 12 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push50=, $1, 12 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push52=, $pop51, $pop50 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push53=, $pop49, 12, $pop52 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push55=, $0, 13 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push54=, $1, 13 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push56=, $pop55, $pop54 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push57=, $pop53, 13, $pop56 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push59=, $0, 14 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push58=, $1, 14 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push60=, $pop59, $pop58 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push61=, $pop57, 14, $pop60 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_s $push63=, $0, 15 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push62=, $1, 15 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push64=, $pop63, $pop62 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push65=, $pop61, 15, $pop64 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop65 +; +; SIMD128-UNIMPL-FAST-LABEL: shr_s_vec_v16i8: +; SIMD128-UNIMPL-FAST: .functype shr_s_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push7=, $0, 0 +; SIMD128-UNIMPL-FAST-NEXT: i32.const $push1=, 7 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.splat $push2=, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: v128.and $push67=, $1, $pop2 +; SIMD128-UNIMPL-FAST-NEXT: local.tee $push66=, $1=, $pop67 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push6=, $pop66, 0 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push8=, $pop7, $pop6 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.splat $push9=, $pop8 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push4=, $0, 1 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push3=, $1, 1 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push5=, $pop4, $pop3 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push10=, $pop9, 1, $pop5 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push12=, $0, 2 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push11=, $1, 2 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push13=, $pop12, $pop11 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push14=, $pop10, 2, $pop13 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push16=, $0, 3 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push15=, $1, 3 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop15 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push18=, $pop14, 3, $pop17 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push20=, $0, 4 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push19=, $1, 4 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push22=, $pop18, 4, $pop21 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push24=, $0, 5 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push23=, $1, 5 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push25=, $pop24, $pop23 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push26=, $pop22, 5, $pop25 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push28=, $0, 6 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push27=, $1, 6 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push29=, $pop28, $pop27 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push30=, $pop26, 6, $pop29 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push32=, $0, 7 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push31=, $1, 7 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push33=, $pop32, $pop31 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push34=, $pop30, 7, $pop33 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push36=, $0, 8 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push35=, $1, 8 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push37=, $pop36, $pop35 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push38=, $pop34, 8, $pop37 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push40=, $0, 9 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push39=, $1, 9 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push41=, $pop40, $pop39 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push42=, $pop38, 9, $pop41 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push44=, $0, 10 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push43=, $1, 10 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push45=, $pop44, $pop43 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push46=, $pop42, 10, $pop45 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push48=, $0, 11 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push47=, $1, 11 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push49=, $pop48, $pop47 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push50=, $pop46, 11, $pop49 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push52=, $0, 12 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push51=, $1, 12 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push53=, $pop52, $pop51 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push54=, $pop50, 12, $pop53 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push56=, $0, 13 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push55=, $1, 13 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push57=, $pop56, $pop55 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push58=, $pop54, 13, $pop57 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push60=, $0, 14 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push59=, $1, 14 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push61=, $pop60, $pop59 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push62=, $pop58, 14, $pop61 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_s $push64=, $0, 15 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push63=, $1, 15 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push65=, $pop64, $pop63 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push0=, $pop62, 15, $pop65 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shr_s_vec_v16i8: +; SIMD128-VM-SLOW: .functype shr_s_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push8=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push0=, 7 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push1=, $pop0 +; SIMD128-VM-SLOW-NEXT: v128.and $push99=, $1, $pop1 +; SIMD128-VM-SLOW-NEXT: local.tee $push98=, $1=, $pop99 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push6=, $pop98, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push97=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push7=, $pop6, $pop97 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push9=, $pop8, $pop7 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push10=, $pop9 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push4=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push2=, $1, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push96=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push3=, $pop2, $pop96 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push5=, $pop4, $pop3 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push11=, $pop10, 1, $pop5 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push14=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push12=, $1, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push95=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push13=, $pop12, $pop95 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push15=, $pop14, $pop13 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push16=, $pop11, 2, $pop15 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push19=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push17=, $1, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push94=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push18=, $pop17, $pop94 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push20=, $pop19, $pop18 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push21=, $pop16, 3, $pop20 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push24=, $0, 4 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push22=, $1, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push93=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push23=, $pop22, $pop93 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push25=, $pop24, $pop23 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push26=, $pop21, 4, $pop25 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push29=, $0, 5 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push27=, $1, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push92=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push28=, $pop27, $pop92 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push30=, $pop29, $pop28 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push31=, $pop26, 5, $pop30 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push34=, $0, 6 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push32=, $1, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push91=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push33=, $pop32, $pop91 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push35=, $pop34, $pop33 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push36=, $pop31, 6, $pop35 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push39=, $0, 7 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push37=, $1, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push90=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push38=, $pop37, $pop90 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push40=, $pop39, $pop38 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push41=, $pop36, 7, $pop40 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push44=, $0, 8 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push42=, $1, 8 +; SIMD128-VM-SLOW-NEXT: i32.const $push89=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push43=, $pop42, $pop89 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push45=, $pop44, $pop43 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push46=, $pop41, 8, $pop45 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push49=, $0, 9 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push47=, $1, 9 +; SIMD128-VM-SLOW-NEXT: i32.const $push88=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push48=, $pop47, $pop88 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push50=, $pop49, $pop48 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push51=, $pop46, 9, $pop50 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push54=, $0, 10 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push52=, $1, 10 +; SIMD128-VM-SLOW-NEXT: i32.const $push87=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push53=, $pop52, $pop87 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push55=, $pop54, $pop53 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push56=, $pop51, 10, $pop55 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push59=, $0, 11 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push57=, $1, 11 +; SIMD128-VM-SLOW-NEXT: i32.const $push86=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push58=, $pop57, $pop86 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push60=, $pop59, $pop58 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push61=, $pop56, 11, $pop60 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push64=, $0, 12 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push62=, $1, 12 +; SIMD128-VM-SLOW-NEXT: i32.const $push85=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push63=, $pop62, $pop85 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push65=, $pop64, $pop63 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push66=, $pop61, 12, $pop65 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push69=, $0, 13 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push67=, $1, 13 +; SIMD128-VM-SLOW-NEXT: i32.const $push84=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push68=, $pop67, $pop84 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push70=, $pop69, $pop68 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push71=, $pop66, 13, $pop70 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push74=, $0, 14 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push72=, $1, 14 +; SIMD128-VM-SLOW-NEXT: i32.const $push83=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push73=, $pop72, $pop83 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push75=, $pop74, $pop73 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push76=, $pop71, 14, $pop75 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push79=, $0, 15 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push77=, $1, 15 +; SIMD128-VM-SLOW-NEXT: i32.const $push82=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push78=, $pop77, $pop82 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push80=, $pop79, $pop78 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push81=, $pop76, 15, $pop80 +; SIMD128-VM-SLOW-NEXT: return $pop81 +; +; SIMD128-VM-FAST-LABEL: shr_s_vec_v16i8: +; SIMD128-VM-FAST: .functype shr_s_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push9=, $0, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push1=, 7 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push2=, $pop1 +; SIMD128-VM-FAST-NEXT: v128.and $push99=, $1, $pop2 +; SIMD128-VM-FAST-NEXT: local.tee $push98=, $1=, $pop99 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push7=, $pop98, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push97=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push8=, $pop7, $pop97 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push10=, $pop9, $pop8 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push11=, $pop10 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push5=, $0, 1 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push3=, $1, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push96=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push4=, $pop3, $pop96 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop4 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push12=, $pop11, 1, $pop6 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push15=, $0, 2 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push13=, $1, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push95=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push14=, $pop13, $pop95 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push16=, $pop15, $pop14 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push17=, $pop12, 2, $pop16 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push20=, $0, 3 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push18=, $1, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push94=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push19=, $pop18, $pop94 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push22=, $pop17, 3, $pop21 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push25=, $0, 4 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push23=, $1, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push93=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push24=, $pop23, $pop93 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push26=, $pop25, $pop24 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push27=, $pop22, 4, $pop26 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push30=, $0, 5 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push28=, $1, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push92=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push29=, $pop28, $pop92 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push31=, $pop30, $pop29 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push32=, $pop27, 5, $pop31 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push35=, $0, 6 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push33=, $1, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push91=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push34=, $pop33, $pop91 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push36=, $pop35, $pop34 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push37=, $pop32, 6, $pop36 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push40=, $0, 7 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push38=, $1, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push90=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push39=, $pop38, $pop90 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push41=, $pop40, $pop39 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push42=, $pop37, 7, $pop41 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push45=, $0, 8 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push43=, $1, 8 +; SIMD128-VM-FAST-NEXT: i32.const $push89=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push44=, $pop43, $pop89 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push46=, $pop45, $pop44 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push47=, $pop42, 8, $pop46 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push50=, $0, 9 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push48=, $1, 9 +; SIMD128-VM-FAST-NEXT: i32.const $push88=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push49=, $pop48, $pop88 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push51=, $pop50, $pop49 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push52=, $pop47, 9, $pop51 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push55=, $0, 10 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push53=, $1, 10 +; SIMD128-VM-FAST-NEXT: i32.const $push87=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push54=, $pop53, $pop87 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push56=, $pop55, $pop54 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push57=, $pop52, 10, $pop56 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push60=, $0, 11 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push58=, $1, 11 +; SIMD128-VM-FAST-NEXT: i32.const $push86=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push59=, $pop58, $pop86 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push61=, $pop60, $pop59 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push62=, $pop57, 11, $pop61 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push65=, $0, 12 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push63=, $1, 12 +; SIMD128-VM-FAST-NEXT: i32.const $push85=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push64=, $pop63, $pop85 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push66=, $pop65, $pop64 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push67=, $pop62, 12, $pop66 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push70=, $0, 13 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push68=, $1, 13 +; SIMD128-VM-FAST-NEXT: i32.const $push84=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push69=, $pop68, $pop84 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push71=, $pop70, $pop69 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push72=, $pop67, 13, $pop71 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push75=, $0, 14 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push73=, $1, 14 +; SIMD128-VM-FAST-NEXT: i32.const $push83=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push74=, $pop73, $pop83 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push76=, $pop75, $pop74 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push77=, $pop72, 14, $pop76 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push80=, $0, 15 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push78=, $1, 15 +; SIMD128-VM-FAST-NEXT: i32.const $push82=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push79=, $pop78, $pop82 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push81=, $pop80, $pop79 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push0=, $pop77, 15, $pop81 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shr_s_vec_v16i8: +; NO-SIMD128-SLOW: .functype shr_s_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push6=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push3=, $16, $pop2 +; NO-SIMD128-SLOW-NEXT: i32.const $push133=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push4=, $pop3, $pop133 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push1=, $32, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push5=, $pop4, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop7), $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push12=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-SLOW-NEXT: i32.const $push132=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push9=, $15, $pop132 +; NO-SIMD128-SLOW-NEXT: i32.const $push131=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push10=, $pop9, $pop131 +; NO-SIMD128-SLOW-NEXT: i32.const $push130=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push8=, $31, $pop130 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push11=, $pop10, $pop8 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop13), $pop11 +; NO-SIMD128-SLOW-NEXT: i32.const $push18=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-SLOW-NEXT: i32.const $push129=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push15=, $14, $pop129 +; NO-SIMD128-SLOW-NEXT: i32.const $push128=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push16=, $pop15, $pop128 +; NO-SIMD128-SLOW-NEXT: i32.const $push127=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push14=, $30, $pop127 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push17=, $pop16, $pop14 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop19), $pop17 +; NO-SIMD128-SLOW-NEXT: i32.const $push24=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-SLOW-NEXT: i32.const $push126=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push21=, $13, $pop126 +; NO-SIMD128-SLOW-NEXT: i32.const $push125=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push22=, $pop21, $pop125 +; NO-SIMD128-SLOW-NEXT: i32.const $push124=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push20=, $29, $pop124 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push23=, $pop22, $pop20 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop25), $pop23 +; NO-SIMD128-SLOW-NEXT: i32.const $push30=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-SLOW-NEXT: i32.const $push123=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push27=, $12, $pop123 +; NO-SIMD128-SLOW-NEXT: i32.const $push122=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push28=, $pop27, $pop122 +; NO-SIMD128-SLOW-NEXT: i32.const $push121=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push26=, $28, $pop121 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push29=, $pop28, $pop26 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop31), $pop29 +; NO-SIMD128-SLOW-NEXT: i32.const $push36=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-SLOW-NEXT: i32.const $push120=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push33=, $11, $pop120 +; NO-SIMD128-SLOW-NEXT: i32.const $push119=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push34=, $pop33, $pop119 +; NO-SIMD128-SLOW-NEXT: i32.const $push118=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push32=, $27, $pop118 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push35=, $pop34, $pop32 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-SLOW-NEXT: i32.const $push42=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push43=, $0, $pop42 +; NO-SIMD128-SLOW-NEXT: i32.const $push117=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push39=, $10, $pop117 +; NO-SIMD128-SLOW-NEXT: i32.const $push116=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push40=, $pop39, $pop116 +; NO-SIMD128-SLOW-NEXT: i32.const $push115=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push38=, $26, $pop115 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push41=, $pop40, $pop38 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop43), $pop41 +; NO-SIMD128-SLOW-NEXT: i32.const $push114=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push45=, $9, $pop114 +; NO-SIMD128-SLOW-NEXT: i32.const $push113=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push46=, $pop45, $pop113 +; NO-SIMD128-SLOW-NEXT: i32.const $push112=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push44=, $25, $pop112 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push47=, $pop46, $pop44 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop47 +; NO-SIMD128-SLOW-NEXT: i32.const $push52=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push53=, $0, $pop52 +; NO-SIMD128-SLOW-NEXT: i32.const $push111=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push49=, $8, $pop111 +; NO-SIMD128-SLOW-NEXT: i32.const $push110=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push50=, $pop49, $pop110 +; NO-SIMD128-SLOW-NEXT: i32.const $push109=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push48=, $24, $pop109 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push51=, $pop50, $pop48 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop53), $pop51 +; NO-SIMD128-SLOW-NEXT: i32.const $push58=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push59=, $0, $pop58 +; NO-SIMD128-SLOW-NEXT: i32.const $push108=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push55=, $7, $pop108 +; NO-SIMD128-SLOW-NEXT: i32.const $push107=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push56=, $pop55, $pop107 +; NO-SIMD128-SLOW-NEXT: i32.const $push106=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push54=, $23, $pop106 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push57=, $pop56, $pop54 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop59), $pop57 +; NO-SIMD128-SLOW-NEXT: i32.const $push64=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push65=, $0, $pop64 +; NO-SIMD128-SLOW-NEXT: i32.const $push105=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push61=, $6, $pop105 +; NO-SIMD128-SLOW-NEXT: i32.const $push104=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push62=, $pop61, $pop104 +; NO-SIMD128-SLOW-NEXT: i32.const $push103=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push60=, $22, $pop103 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push63=, $pop62, $pop60 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop65), $pop63 +; NO-SIMD128-SLOW-NEXT: i32.const $push102=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push67=, $5, $pop102 +; NO-SIMD128-SLOW-NEXT: i32.const $push101=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push68=, $pop67, $pop101 +; NO-SIMD128-SLOW-NEXT: i32.const $push100=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push66=, $21, $pop100 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push69=, $pop68, $pop66 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop69 +; NO-SIMD128-SLOW-NEXT: i32.const $push74=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push75=, $0, $pop74 +; NO-SIMD128-SLOW-NEXT: i32.const $push99=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push71=, $4, $pop99 +; NO-SIMD128-SLOW-NEXT: i32.const $push98=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push72=, $pop71, $pop98 +; NO-SIMD128-SLOW-NEXT: i32.const $push97=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push70=, $20, $pop97 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push73=, $pop72, $pop70 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop75), $pop73 +; NO-SIMD128-SLOW-NEXT: i32.const $push96=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push77=, $3, $pop96 +; NO-SIMD128-SLOW-NEXT: i32.const $push95=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push78=, $pop77, $pop95 +; NO-SIMD128-SLOW-NEXT: i32.const $push94=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push76=, $19, $pop94 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push79=, $pop78, $pop76 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop79 +; NO-SIMD128-SLOW-NEXT: i32.const $push93=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push81=, $2, $pop93 +; NO-SIMD128-SLOW-NEXT: i32.const $push92=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push82=, $pop81, $pop92 +; NO-SIMD128-SLOW-NEXT: i32.const $push91=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push80=, $18, $pop91 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push83=, $pop82, $pop80 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop83 +; NO-SIMD128-SLOW-NEXT: i32.const $push90=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shl $push85=, $1, $pop90 +; NO-SIMD128-SLOW-NEXT: i32.const $push89=, 24 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push86=, $pop85, $pop89 +; NO-SIMD128-SLOW-NEXT: i32.const $push88=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push84=, $17, $pop88 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push87=, $pop86, $pop84 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop87 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_vec_v16i8: +; NO-SIMD128-FAST: .functype shr_s_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push2=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $1, $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push133=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push4=, $pop3, $pop133 +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push5=, $pop4, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push132=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $2, $pop132 +; NO-SIMD128-FAST-NEXT: i32.const $push131=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push8=, $pop7, $pop131 +; NO-SIMD128-FAST-NEXT: i32.const $push130=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $18, $pop130 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push129=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push11=, $3, $pop129 +; NO-SIMD128-FAST-NEXT: i32.const $push128=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $pop128 +; NO-SIMD128-FAST-NEXT: i32.const $push127=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $19, $pop127 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push13=, $pop12, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push126=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $4, $pop126 +; NO-SIMD128-FAST-NEXT: i32.const $push125=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $pop15, $pop125 +; NO-SIMD128-FAST-NEXT: i32.const $push124=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $20, $pop124 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop14 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push123=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push21=, $5, $pop123 +; NO-SIMD128-FAST-NEXT: i32.const $push122=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $pop122 +; NO-SIMD128-FAST-NEXT: i32.const $push121=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $21, $pop121 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push23=, $pop22, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push29=, $0, $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push120=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push25=, $6, $pop120 +; NO-SIMD128-FAST-NEXT: i32.const $push119=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push26=, $pop25, $pop119 +; NO-SIMD128-FAST-NEXT: i32.const $push118=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $22, $pop118 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push27=, $pop26, $pop24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop29), $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push35=, $0, $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push117=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push31=, $7, $pop117 +; NO-SIMD128-FAST-NEXT: i32.const $push116=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $pop116 +; NO-SIMD128-FAST-NEXT: i32.const $push115=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $23, $pop115 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push33=, $pop32, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop35), $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push41=, $0, $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push114=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push37=, $8, $pop114 +; NO-SIMD128-FAST-NEXT: i32.const $push113=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push38=, $pop37, $pop113 +; NO-SIMD128-FAST-NEXT: i32.const $push112=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push36=, $24, $pop112 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push39=, $pop38, $pop36 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop41), $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push111=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push43=, $9, $pop111 +; NO-SIMD128-FAST-NEXT: i32.const $push110=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push44=, $pop43, $pop110 +; NO-SIMD128-FAST-NEXT: i32.const $push109=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push42=, $25, $pop109 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push45=, $pop44, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push51=, $0, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push108=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push47=, $10, $pop108 +; NO-SIMD128-FAST-NEXT: i32.const $push107=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push48=, $pop47, $pop107 +; NO-SIMD128-FAST-NEXT: i32.const $push106=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $26, $pop106 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push49=, $pop48, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop51), $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push57=, $0, $pop56 +; NO-SIMD128-FAST-NEXT: i32.const $push105=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push53=, $11, $pop105 +; NO-SIMD128-FAST-NEXT: i32.const $push104=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push54=, $pop53, $pop104 +; NO-SIMD128-FAST-NEXT: i32.const $push103=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push52=, $27, $pop103 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push55=, $pop54, $pop52 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop57), $pop55 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push63=, $0, $pop62 +; NO-SIMD128-FAST-NEXT: i32.const $push102=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push59=, $12, $pop102 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push60=, $pop59, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push58=, $28, $pop100 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push61=, $pop60, $pop58 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop63), $pop61 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push69=, $0, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push99=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push65=, $13, $pop99 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push66=, $pop65, $pop98 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push64=, $29, $pop97 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push67=, $pop66, $pop64 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop69), $pop67 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push75=, $0, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push96=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push71=, $14, $pop96 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push72=, $pop71, $pop95 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push70=, $30, $pop94 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push73=, $pop72, $pop70 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop75), $pop73 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push81=, $0, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push77=, $15, $pop93 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push78=, $pop77, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push76=, $31, $pop91 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push79=, $pop78, $pop76 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop81), $pop79 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push87=, $0, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 24 +; NO-SIMD128-FAST-NEXT: i32.shl $push83=, $16, $pop90 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 24 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push84=, $pop83, $pop89 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push82=, $32, $pop88 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push85=, $pop84, $pop82 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop87), $pop85 +; NO-SIMD128-FAST-NEXT: return %a = ashr <16 x i8> %v, %x ret <16 x i8> %a } -; CHECK-LABEL: shr_u_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shr_u_v16i8 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i8x16.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) { +; SIMD128-UNIMPL-LABEL: shr_u_v16i8: +; SIMD128-UNIMPL: .functype shr_u_v16i8 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i8x16.shr_u $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shr_u_v16i8: +; SIMD128-VM-SLOW: .functype shr_u_v16i8 (v128, i32) -> (v128) +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push9=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push1=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push10=, $pop9, $pop1 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push5=, $1 +; SIMD128-VM-SLOW-NEXT: i32.const $push3=, 7 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push4=, $pop3 +; SIMD128-VM-SLOW-NEXT: v128.and $push132=, $pop5, $pop4 +; SIMD128-VM-SLOW-NEXT: local.tee $push131=, $2=, $pop132 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push11=, $pop131, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push130=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push12=, $pop11, $pop130 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push13=, $pop10, $pop12 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push14=, $pop13 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push0=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push129=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push2=, $pop0, $pop129 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push6=, $2, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push128=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push7=, $pop6, $pop128 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push8=, $pop2, $pop7 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push15=, $pop14, 1, $pop8 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push16=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push127=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push17=, $pop16, $pop127 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push18=, $2, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push126=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push19=, $pop18, $pop126 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push20=, $pop17, $pop19 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push21=, $pop15, 2, $pop20 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push22=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push125=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push23=, $pop22, $pop125 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push24=, $2, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push124=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push25=, $pop24, $pop124 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push26=, $pop23, $pop25 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push27=, $pop21, 3, $pop26 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push28=, $0, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push123=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push29=, $pop28, $pop123 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push30=, $2, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push122=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push31=, $pop30, $pop122 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push32=, $pop29, $pop31 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push33=, $pop27, 4, $pop32 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push34=, $0, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push121=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push35=, $pop34, $pop121 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push36=, $2, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push120=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push37=, $pop36, $pop120 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push38=, $pop35, $pop37 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push39=, $pop33, 5, $pop38 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push40=, $0, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push119=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push41=, $pop40, $pop119 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push42=, $2, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push118=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push43=, $pop42, $pop118 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push44=, $pop41, $pop43 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push45=, $pop39, 6, $pop44 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push46=, $0, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push117=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push47=, $pop46, $pop117 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push48=, $2, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push116=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push49=, $pop48, $pop116 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push50=, $pop47, $pop49 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push51=, $pop45, 7, $pop50 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push52=, $0, 8 +; SIMD128-VM-SLOW-NEXT: i32.const $push115=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push53=, $pop52, $pop115 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push54=, $2, 8 +; SIMD128-VM-SLOW-NEXT: i32.const $push114=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push55=, $pop54, $pop114 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push56=, $pop53, $pop55 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push57=, $pop51, 8, $pop56 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push58=, $0, 9 +; SIMD128-VM-SLOW-NEXT: i32.const $push113=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push59=, $pop58, $pop113 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push60=, $2, 9 +; SIMD128-VM-SLOW-NEXT: i32.const $push112=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push61=, $pop60, $pop112 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push62=, $pop59, $pop61 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push63=, $pop57, 9, $pop62 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push64=, $0, 10 +; SIMD128-VM-SLOW-NEXT: i32.const $push111=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push65=, $pop64, $pop111 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push66=, $2, 10 +; SIMD128-VM-SLOW-NEXT: i32.const $push110=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push67=, $pop66, $pop110 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push68=, $pop65, $pop67 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push69=, $pop63, 10, $pop68 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push70=, $0, 11 +; SIMD128-VM-SLOW-NEXT: i32.const $push109=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push71=, $pop70, $pop109 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push72=, $2, 11 +; SIMD128-VM-SLOW-NEXT: i32.const $push108=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push73=, $pop72, $pop108 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push74=, $pop71, $pop73 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push75=, $pop69, 11, $pop74 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push76=, $0, 12 +; SIMD128-VM-SLOW-NEXT: i32.const $push107=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push77=, $pop76, $pop107 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push78=, $2, 12 +; SIMD128-VM-SLOW-NEXT: i32.const $push106=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push79=, $pop78, $pop106 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push80=, $pop77, $pop79 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push81=, $pop75, 12, $pop80 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push82=, $0, 13 +; SIMD128-VM-SLOW-NEXT: i32.const $push105=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push83=, $pop82, $pop105 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push84=, $2, 13 +; SIMD128-VM-SLOW-NEXT: i32.const $push104=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push85=, $pop84, $pop104 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push86=, $pop83, $pop85 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push87=, $pop81, 13, $pop86 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push88=, $0, 14 +; SIMD128-VM-SLOW-NEXT: i32.const $push103=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push89=, $pop88, $pop103 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push90=, $2, 14 +; SIMD128-VM-SLOW-NEXT: i32.const $push102=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push91=, $pop90, $pop102 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push92=, $pop89, $pop91 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push93=, $pop87, 14, $pop92 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push94=, $0, 15 +; SIMD128-VM-SLOW-NEXT: i32.const $push101=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push95=, $pop94, $pop101 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push96=, $2, 15 +; SIMD128-VM-SLOW-NEXT: i32.const $push100=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push97=, $pop96, $pop100 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push98=, $pop95, $pop97 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push99=, $pop93, 15, $pop98 +; SIMD128-VM-SLOW-NEXT: return $pop99 +; +; SIMD128-VM-FAST-LABEL: shr_u_v16i8: +; SIMD128-VM-FAST: .functype shr_u_v16i8 (v128, i32) -> (v128) +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push10=, $0, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push3=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push11=, $pop10, $pop3 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push1=, $1 +; SIMD128-VM-FAST-NEXT: i32.const $push5=, 7 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push6=, $pop5 +; SIMD128-VM-FAST-NEXT: v128.and $push132=, $pop1, $pop6 +; SIMD128-VM-FAST-NEXT: local.tee $push131=, $2=, $pop132 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push12=, $pop131, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push130=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push13=, $pop12, $pop130 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push14=, $pop11, $pop13 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push15=, $pop14 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push2=, $0, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push129=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push4=, $pop2, $pop129 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push7=, $2, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push128=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push8=, $pop7, $pop128 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push9=, $pop4, $pop8 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push16=, $pop15, 1, $pop9 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push17=, $0, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push127=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push18=, $pop17, $pop127 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push19=, $2, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push126=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push20=, $pop19, $pop126 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push21=, $pop18, $pop20 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push22=, $pop16, 2, $pop21 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push23=, $0, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push125=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push24=, $pop23, $pop125 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push25=, $2, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push124=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push26=, $pop25, $pop124 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push27=, $pop24, $pop26 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push28=, $pop22, 3, $pop27 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push29=, $0, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push123=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push30=, $pop29, $pop123 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push31=, $2, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push122=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push32=, $pop31, $pop122 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push33=, $pop30, $pop32 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push34=, $pop28, 4, $pop33 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push35=, $0, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push121=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push36=, $pop35, $pop121 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push37=, $2, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push120=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push38=, $pop37, $pop120 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push39=, $pop36, $pop38 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push40=, $pop34, 5, $pop39 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push41=, $0, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push119=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push42=, $pop41, $pop119 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push43=, $2, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push118=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push44=, $pop43, $pop118 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push45=, $pop42, $pop44 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push46=, $pop40, 6, $pop45 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push47=, $0, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push117=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push48=, $pop47, $pop117 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push49=, $2, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push116=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push50=, $pop49, $pop116 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push51=, $pop48, $pop50 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push52=, $pop46, 7, $pop51 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push53=, $0, 8 +; SIMD128-VM-FAST-NEXT: i32.const $push115=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push54=, $pop53, $pop115 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push55=, $2, 8 +; SIMD128-VM-FAST-NEXT: i32.const $push114=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push56=, $pop55, $pop114 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push57=, $pop54, $pop56 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push58=, $pop52, 8, $pop57 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push59=, $0, 9 +; SIMD128-VM-FAST-NEXT: i32.const $push113=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push60=, $pop59, $pop113 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push61=, $2, 9 +; SIMD128-VM-FAST-NEXT: i32.const $push112=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push62=, $pop61, $pop112 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push63=, $pop60, $pop62 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push64=, $pop58, 9, $pop63 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push65=, $0, 10 +; SIMD128-VM-FAST-NEXT: i32.const $push111=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push66=, $pop65, $pop111 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push67=, $2, 10 +; SIMD128-VM-FAST-NEXT: i32.const $push110=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push68=, $pop67, $pop110 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push69=, $pop66, $pop68 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push70=, $pop64, 10, $pop69 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push71=, $0, 11 +; SIMD128-VM-FAST-NEXT: i32.const $push109=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push72=, $pop71, $pop109 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push73=, $2, 11 +; SIMD128-VM-FAST-NEXT: i32.const $push108=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push74=, $pop73, $pop108 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push75=, $pop72, $pop74 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push76=, $pop70, 11, $pop75 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push77=, $0, 12 +; SIMD128-VM-FAST-NEXT: i32.const $push107=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push78=, $pop77, $pop107 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push79=, $2, 12 +; SIMD128-VM-FAST-NEXT: i32.const $push106=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push80=, $pop79, $pop106 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push81=, $pop78, $pop80 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push82=, $pop76, 12, $pop81 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push83=, $0, 13 +; SIMD128-VM-FAST-NEXT: i32.const $push105=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push84=, $pop83, $pop105 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push85=, $2, 13 +; SIMD128-VM-FAST-NEXT: i32.const $push104=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push86=, $pop85, $pop104 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push87=, $pop84, $pop86 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push88=, $pop82, 13, $pop87 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push89=, $0, 14 +; SIMD128-VM-FAST-NEXT: i32.const $push103=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push90=, $pop89, $pop103 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push91=, $2, 14 +; SIMD128-VM-FAST-NEXT: i32.const $push102=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push92=, $pop91, $pop102 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push93=, $pop90, $pop92 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push94=, $pop88, 14, $pop93 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push95=, $0, 15 +; SIMD128-VM-FAST-NEXT: i32.const $push101=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push96=, $pop95, $pop101 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push97=, $2, 15 +; SIMD128-VM-FAST-NEXT: i32.const $push100=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push98=, $pop97, $pop100 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push99=, $pop96, $pop98 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push0=, $pop94, 15, $pop99 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shr_u_v16i8: +; NO-SIMD128-SLOW: .functype shr_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push3=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push1=, $16, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push72=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push71=, $17, $pop72 +; NO-SIMD128-SLOW-NEXT: local.tee $push70=, $16=, $pop71 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push2=, $pop1, $pop70 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop4), $pop2 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.const $push69=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push5=, $15, $pop69 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push6=, $pop5, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop8), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.const $push11=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.const $push68=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push9=, $14, $pop68 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push10=, $pop9, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop12), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.const $push15=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-SLOW-NEXT: i32.const $push67=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push13=, $13, $pop67 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push14=, $pop13, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop16), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.const $push19=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.const $push66=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push17=, $12, $pop66 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push18=, $pop17, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-SLOW-NEXT: i32.const $push23=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-SLOW-NEXT: i32.const $push65=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push21=, $11, $pop65 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push22=, $pop21, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push27=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-SLOW-NEXT: i32.const $push64=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push25=, $10, $pop64 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push26=, $pop25, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-SLOW-NEXT: i32.const $push63=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push29=, $9, $pop63 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push30=, $pop29, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop30 +; NO-SIMD128-SLOW-NEXT: i32.const $push33=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-SLOW-NEXT: i32.const $push62=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push31=, $8, $pop62 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push32=, $pop31, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-SLOW-NEXT: i32.const $push37=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push38=, $0, $pop37 +; NO-SIMD128-SLOW-NEXT: i32.const $push61=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push35=, $7, $pop61 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push36=, $pop35, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-SLOW-NEXT: i32.const $push41=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push42=, $0, $pop41 +; NO-SIMD128-SLOW-NEXT: i32.const $push60=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push39=, $6, $pop60 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push40=, $pop39, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop42), $pop40 +; NO-SIMD128-SLOW-NEXT: i32.const $push59=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push43=, $5, $pop59 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push44=, $pop43, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop44 +; NO-SIMD128-SLOW-NEXT: i32.const $push47=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push48=, $0, $pop47 +; NO-SIMD128-SLOW-NEXT: i32.const $push58=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push45=, $4, $pop58 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push46=, $pop45, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop48), $pop46 +; NO-SIMD128-SLOW-NEXT: i32.const $push57=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push49=, $3, $pop57 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push50=, $pop49, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop50 +; NO-SIMD128-SLOW-NEXT: i32.const $push56=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push51=, $2, $pop56 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push52=, $pop51, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop52 +; NO-SIMD128-SLOW-NEXT: i32.const $push55=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push53=, $1, $pop55 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push54=, $pop53, $16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop54 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_v16i8: +; NO-SIMD128-FAST: .functype shr_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push71=, $17, $pop72 +; NO-SIMD128-FAST-NEXT: local.tee $push70=, $17=, $pop71 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop70 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop69 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push4=, $pop3, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop68 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop67 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push8=, $pop7, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $5, $pop66 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $6, $pop65 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push14=, $pop13, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop16), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $7, $pop64 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $8, $pop63 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push22=, $pop21, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $9, $pop62 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push26=, $pop25, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $10, $pop61 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push28=, $pop27, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $11, $pop60 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push32=, $pop31, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push38=, $0, $pop37 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $12, $pop59 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push36=, $pop35, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push42=, $0, $pop41 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push39=, $13, $pop58 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push40=, $pop39, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop42), $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push46=, $0, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $14, $pop57 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push44=, $pop43, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop46), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push47=, $15, $pop56 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push54=, $0, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push51=, $16, $pop55 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push52=, $pop51, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, <16 x i32> %a } -; CHECK-LABEL: shr_u_vec_v16i8: -; NO-SIMD128-NOT: i8x16 -; SIMD128-NEXT: .functype shr_u_vec_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 7{{$}} -; SIMD128-NEXT: i8x16.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}} -; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}} -; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}} -; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; Skip 14 lanes -; SIMD128: i8x16.extract_lane_u $push[[L4:[0-9]+]]=, $0, 15{{$}} -; SIMD128-NEXT: i8x16.extract_lane_u $push[[L5:[0-9]+]]=, $1, 15{{$}} -; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 15, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { +; SIMD128-UNIMPL-SLOW-LABEL: shr_u_vec_v16i8: +; SIMD128-UNIMPL-SLOW: .functype shr_u_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push6=, $0, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i32.const $push0=, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.splat $push1=, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: v128.and $push67=, $1, $pop1 +; SIMD128-UNIMPL-SLOW-NEXT: local.tee $push66=, $1=, $pop67 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push5=, $pop66, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push7=, $pop6, $pop5 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.splat $push8=, $pop7 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push3=, $0, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push2=, $1, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push4=, $pop3, $pop2 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push9=, $pop8, 1, $pop4 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push11=, $0, 2 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push10=, $1, 2 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push13=, $pop9, 2, $pop12 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push15=, $0, 3 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push14=, $1, 3 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push16=, $pop15, $pop14 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push17=, $pop13, 3, $pop16 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push19=, $0, 4 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push18=, $1, 4 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push20=, $pop19, $pop18 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push21=, $pop17, 4, $pop20 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push23=, $0, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push22=, $1, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push25=, $pop21, 5, $pop24 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push27=, $0, 6 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push26=, $1, 6 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push28=, $pop27, $pop26 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push29=, $pop25, 6, $pop28 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push31=, $0, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push30=, $1, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push32=, $pop31, $pop30 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push33=, $pop29, 7, $pop32 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push35=, $0, 8 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push34=, $1, 8 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push36=, $pop35, $pop34 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push37=, $pop33, 8, $pop36 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push39=, $0, 9 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push38=, $1, 9 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push40=, $pop39, $pop38 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push41=, $pop37, 9, $pop40 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push43=, $0, 10 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push42=, $1, 10 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push44=, $pop43, $pop42 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push45=, $pop41, 10, $pop44 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push47=, $0, 11 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push46=, $1, 11 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push48=, $pop47, $pop46 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push49=, $pop45, 11, $pop48 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push51=, $0, 12 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push50=, $1, 12 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push52=, $pop51, $pop50 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push53=, $pop49, 12, $pop52 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push55=, $0, 13 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push54=, $1, 13 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push56=, $pop55, $pop54 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push57=, $pop53, 13, $pop56 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push59=, $0, 14 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push58=, $1, 14 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push60=, $pop59, $pop58 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push61=, $pop57, 14, $pop60 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push63=, $0, 15 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.extract_lane_u $push62=, $1, 15 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push64=, $pop63, $pop62 +; SIMD128-UNIMPL-SLOW-NEXT: i8x16.replace_lane $push65=, $pop61, 15, $pop64 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop65 +; +; SIMD128-UNIMPL-FAST-LABEL: shr_u_vec_v16i8: +; SIMD128-UNIMPL-FAST: .functype shr_u_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push7=, $0, 0 +; SIMD128-UNIMPL-FAST-NEXT: i32.const $push1=, 7 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.splat $push2=, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: v128.and $push67=, $1, $pop2 +; SIMD128-UNIMPL-FAST-NEXT: local.tee $push66=, $1=, $pop67 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push6=, $pop66, 0 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push8=, $pop7, $pop6 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.splat $push9=, $pop8 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push4=, $0, 1 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push3=, $1, 1 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop3 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push10=, $pop9, 1, $pop5 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push12=, $0, 2 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push11=, $1, 2 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop11 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push14=, $pop10, 2, $pop13 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push16=, $0, 3 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push15=, $1, 3 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop15 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push18=, $pop14, 3, $pop17 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push20=, $0, 4 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push19=, $1, 4 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push22=, $pop18, 4, $pop21 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push24=, $0, 5 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push23=, $1, 5 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push26=, $pop22, 5, $pop25 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push28=, $0, 6 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push27=, $1, 6 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop27 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push30=, $pop26, 6, $pop29 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push32=, $0, 7 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push31=, $1, 7 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop31 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push34=, $pop30, 7, $pop33 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push36=, $0, 8 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push35=, $1, 8 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push37=, $pop36, $pop35 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push38=, $pop34, 8, $pop37 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push40=, $0, 9 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push39=, $1, 9 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop39 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push42=, $pop38, 9, $pop41 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push44=, $0, 10 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push43=, $1, 10 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop43 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push46=, $pop42, 10, $pop45 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push48=, $0, 11 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push47=, $1, 11 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push49=, $pop48, $pop47 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push50=, $pop46, 11, $pop49 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push52=, $0, 12 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push51=, $1, 12 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop51 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push54=, $pop50, 12, $pop53 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push56=, $0, 13 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push55=, $1, 13 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop55 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push58=, $pop54, 13, $pop57 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push60=, $0, 14 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push59=, $1, 14 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push61=, $pop60, $pop59 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push62=, $pop58, 14, $pop61 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push64=, $0, 15 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.extract_lane_u $push63=, $1, 15 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push65=, $pop64, $pop63 +; SIMD128-UNIMPL-FAST-NEXT: i8x16.replace_lane $push0=, $pop62, 15, $pop65 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shr_u_vec_v16i8: +; SIMD128-VM-SLOW: .functype shr_u_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push8=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push1=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push9=, $pop8, $pop1 +; SIMD128-VM-SLOW-NEXT: i32.const $push3=, 7 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push4=, $pop3 +; SIMD128-VM-SLOW-NEXT: v128.and $push131=, $1, $pop4 +; SIMD128-VM-SLOW-NEXT: local.tee $push130=, $1=, $pop131 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push10=, $pop130, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push129=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push11=, $pop10, $pop129 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push12=, $pop9, $pop11 +; SIMD128-VM-SLOW-NEXT: i8x16.splat $push13=, $pop12 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push0=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push128=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push2=, $pop0, $pop128 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push5=, $1, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push127=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push6=, $pop5, $pop127 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push7=, $pop2, $pop6 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push14=, $pop13, 1, $pop7 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push15=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push126=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push16=, $pop15, $pop126 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push17=, $1, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push125=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push18=, $pop17, $pop125 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push19=, $pop16, $pop18 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push20=, $pop14, 2, $pop19 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push21=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push124=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push22=, $pop21, $pop124 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push23=, $1, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push123=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push24=, $pop23, $pop123 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push25=, $pop22, $pop24 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push26=, $pop20, 3, $pop25 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push27=, $0, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push122=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push28=, $pop27, $pop122 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push29=, $1, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push121=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push30=, $pop29, $pop121 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push31=, $pop28, $pop30 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push32=, $pop26, 4, $pop31 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push33=, $0, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push120=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push34=, $pop33, $pop120 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push35=, $1, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push119=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push36=, $pop35, $pop119 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push37=, $pop34, $pop36 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push38=, $pop32, 5, $pop37 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push39=, $0, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push118=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push40=, $pop39, $pop118 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push41=, $1, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push117=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push42=, $pop41, $pop117 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push43=, $pop40, $pop42 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push44=, $pop38, 6, $pop43 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push45=, $0, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push116=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push46=, $pop45, $pop116 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push47=, $1, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push115=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push48=, $pop47, $pop115 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push49=, $pop46, $pop48 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push50=, $pop44, 7, $pop49 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push51=, $0, 8 +; SIMD128-VM-SLOW-NEXT: i32.const $push114=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push52=, $pop51, $pop114 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push53=, $1, 8 +; SIMD128-VM-SLOW-NEXT: i32.const $push113=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push54=, $pop53, $pop113 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push55=, $pop52, $pop54 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push56=, $pop50, 8, $pop55 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push57=, $0, 9 +; SIMD128-VM-SLOW-NEXT: i32.const $push112=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push58=, $pop57, $pop112 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push59=, $1, 9 +; SIMD128-VM-SLOW-NEXT: i32.const $push111=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push60=, $pop59, $pop111 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push61=, $pop58, $pop60 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push62=, $pop56, 9, $pop61 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push63=, $0, 10 +; SIMD128-VM-SLOW-NEXT: i32.const $push110=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push64=, $pop63, $pop110 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push65=, $1, 10 +; SIMD128-VM-SLOW-NEXT: i32.const $push109=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push66=, $pop65, $pop109 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push67=, $pop64, $pop66 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push68=, $pop62, 10, $pop67 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push69=, $0, 11 +; SIMD128-VM-SLOW-NEXT: i32.const $push108=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push70=, $pop69, $pop108 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push71=, $1, 11 +; SIMD128-VM-SLOW-NEXT: i32.const $push107=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push72=, $pop71, $pop107 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push73=, $pop70, $pop72 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push74=, $pop68, 11, $pop73 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push75=, $0, 12 +; SIMD128-VM-SLOW-NEXT: i32.const $push106=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push76=, $pop75, $pop106 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push77=, $1, 12 +; SIMD128-VM-SLOW-NEXT: i32.const $push105=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push78=, $pop77, $pop105 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push79=, $pop76, $pop78 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push80=, $pop74, 12, $pop79 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push81=, $0, 13 +; SIMD128-VM-SLOW-NEXT: i32.const $push104=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push82=, $pop81, $pop104 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push83=, $1, 13 +; SIMD128-VM-SLOW-NEXT: i32.const $push103=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push84=, $pop83, $pop103 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push85=, $pop82, $pop84 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push86=, $pop80, 13, $pop85 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push87=, $0, 14 +; SIMD128-VM-SLOW-NEXT: i32.const $push102=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push88=, $pop87, $pop102 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push89=, $1, 14 +; SIMD128-VM-SLOW-NEXT: i32.const $push101=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push90=, $pop89, $pop101 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push91=, $pop88, $pop90 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push92=, $pop86, 14, $pop91 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push93=, $0, 15 +; SIMD128-VM-SLOW-NEXT: i32.const $push100=, 255 +; SIMD128-VM-SLOW-NEXT: i32.and $push94=, $pop93, $pop100 +; SIMD128-VM-SLOW-NEXT: i8x16.extract_lane_s $push95=, $1, 15 +; SIMD128-VM-SLOW-NEXT: i32.const $push99=, 7 +; SIMD128-VM-SLOW-NEXT: i32.and $push96=, $pop95, $pop99 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push97=, $pop94, $pop96 +; SIMD128-VM-SLOW-NEXT: i8x16.replace_lane $push98=, $pop92, 15, $pop97 +; SIMD128-VM-SLOW-NEXT: return $pop98 +; +; SIMD128-VM-FAST-LABEL: shr_u_vec_v16i8: +; SIMD128-VM-FAST: .functype shr_u_vec_v16i8 (v128, v128) -> (v128) +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push9=, $0, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push2=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push10=, $pop9, $pop2 +; SIMD128-VM-FAST-NEXT: i32.const $push4=, 7 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push5=, $pop4 +; SIMD128-VM-FAST-NEXT: v128.and $push131=, $1, $pop5 +; SIMD128-VM-FAST-NEXT: local.tee $push130=, $1=, $pop131 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push11=, $pop130, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push129=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push12=, $pop11, $pop129 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push13=, $pop10, $pop12 +; SIMD128-VM-FAST-NEXT: i8x16.splat $push14=, $pop13 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push1=, $0, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push128=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push3=, $pop1, $pop128 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push6=, $1, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push127=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push7=, $pop6, $pop127 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push8=, $pop3, $pop7 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push15=, $pop14, 1, $pop8 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push16=, $0, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push126=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push17=, $pop16, $pop126 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push18=, $1, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push125=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push19=, $pop18, $pop125 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push20=, $pop17, $pop19 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push21=, $pop15, 2, $pop20 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push22=, $0, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push124=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push23=, $pop22, $pop124 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push24=, $1, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push123=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push25=, $pop24, $pop123 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push26=, $pop23, $pop25 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push27=, $pop21, 3, $pop26 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push28=, $0, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push122=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push29=, $pop28, $pop122 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push30=, $1, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push121=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push31=, $pop30, $pop121 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push32=, $pop29, $pop31 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push33=, $pop27, 4, $pop32 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push34=, $0, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push120=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push35=, $pop34, $pop120 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push36=, $1, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push119=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push37=, $pop36, $pop119 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push38=, $pop35, $pop37 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push39=, $pop33, 5, $pop38 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push40=, $0, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push118=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push41=, $pop40, $pop118 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push42=, $1, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push117=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push43=, $pop42, $pop117 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push44=, $pop41, $pop43 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push45=, $pop39, 6, $pop44 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push46=, $0, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push116=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push47=, $pop46, $pop116 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push48=, $1, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push115=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push49=, $pop48, $pop115 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push50=, $pop47, $pop49 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push51=, $pop45, 7, $pop50 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push52=, $0, 8 +; SIMD128-VM-FAST-NEXT: i32.const $push114=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push53=, $pop52, $pop114 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push54=, $1, 8 +; SIMD128-VM-FAST-NEXT: i32.const $push113=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push55=, $pop54, $pop113 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push56=, $pop53, $pop55 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push57=, $pop51, 8, $pop56 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push58=, $0, 9 +; SIMD128-VM-FAST-NEXT: i32.const $push112=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push59=, $pop58, $pop112 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push60=, $1, 9 +; SIMD128-VM-FAST-NEXT: i32.const $push111=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push61=, $pop60, $pop111 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push62=, $pop59, $pop61 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push63=, $pop57, 9, $pop62 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push64=, $0, 10 +; SIMD128-VM-FAST-NEXT: i32.const $push110=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push65=, $pop64, $pop110 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push66=, $1, 10 +; SIMD128-VM-FAST-NEXT: i32.const $push109=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push67=, $pop66, $pop109 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push68=, $pop65, $pop67 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push69=, $pop63, 10, $pop68 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push70=, $0, 11 +; SIMD128-VM-FAST-NEXT: i32.const $push108=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push71=, $pop70, $pop108 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push72=, $1, 11 +; SIMD128-VM-FAST-NEXT: i32.const $push107=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push73=, $pop72, $pop107 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push74=, $pop71, $pop73 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push75=, $pop69, 11, $pop74 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push76=, $0, 12 +; SIMD128-VM-FAST-NEXT: i32.const $push106=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push77=, $pop76, $pop106 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push78=, $1, 12 +; SIMD128-VM-FAST-NEXT: i32.const $push105=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push79=, $pop78, $pop105 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push80=, $pop77, $pop79 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push81=, $pop75, 12, $pop80 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push82=, $0, 13 +; SIMD128-VM-FAST-NEXT: i32.const $push104=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push83=, $pop82, $pop104 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push84=, $1, 13 +; SIMD128-VM-FAST-NEXT: i32.const $push103=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push85=, $pop84, $pop103 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push86=, $pop83, $pop85 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push87=, $pop81, 13, $pop86 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push88=, $0, 14 +; SIMD128-VM-FAST-NEXT: i32.const $push102=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push89=, $pop88, $pop102 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push90=, $1, 14 +; SIMD128-VM-FAST-NEXT: i32.const $push101=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push91=, $pop90, $pop101 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push92=, $pop89, $pop91 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push93=, $pop87, 14, $pop92 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push94=, $0, 15 +; SIMD128-VM-FAST-NEXT: i32.const $push100=, 255 +; SIMD128-VM-FAST-NEXT: i32.and $push95=, $pop94, $pop100 +; SIMD128-VM-FAST-NEXT: i8x16.extract_lane_s $push96=, $1, 15 +; SIMD128-VM-FAST-NEXT: i32.const $push99=, 7 +; SIMD128-VM-FAST-NEXT: i32.and $push97=, $pop96, $pop99 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push98=, $pop95, $pop97 +; SIMD128-VM-FAST-NEXT: i8x16.replace_lane $push0=, $pop93, 15, $pop98 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shr_u_vec_v16i8: +; NO-SIMD128-SLOW: .functype shr_u_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push2=, $16, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push101=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push1=, $32, $pop101 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop5), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push9=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push100=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push7=, $15, $pop100 +; NO-SIMD128-SLOW-NEXT: i32.const $push99=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push6=, $31, $pop99 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push8=, $pop7, $pop6 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop10), $pop8 +; NO-SIMD128-SLOW-NEXT: i32.const $push14=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-SLOW-NEXT: i32.const $push98=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push12=, $14, $pop98 +; NO-SIMD128-SLOW-NEXT: i32.const $push97=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push11=, $30, $pop97 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push13=, $pop12, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop15), $pop13 +; NO-SIMD128-SLOW-NEXT: i32.const $push19=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.const $push96=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push17=, $13, $pop96 +; NO-SIMD128-SLOW-NEXT: i32.const $push95=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push16=, $29, $pop95 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-SLOW-NEXT: i32.const $push24=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-SLOW-NEXT: i32.const $push94=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push22=, $12, $pop94 +; NO-SIMD128-SLOW-NEXT: i32.const $push93=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push21=, $28, $pop93 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push23=, $pop22, $pop21 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop25), $pop23 +; NO-SIMD128-SLOW-NEXT: i32.const $push29=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-SLOW-NEXT: i32.const $push92=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push27=, $11, $pop92 +; NO-SIMD128-SLOW-NEXT: i32.const $push91=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push26=, $27, $pop91 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push28=, $pop27, $pop26 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-SLOW-NEXT: i32.const $push34=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push35=, $0, $pop34 +; NO-SIMD128-SLOW-NEXT: i32.const $push90=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push32=, $10, $pop90 +; NO-SIMD128-SLOW-NEXT: i32.const $push89=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push31=, $26, $pop89 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push33=, $pop32, $pop31 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop35), $pop33 +; NO-SIMD128-SLOW-NEXT: i32.const $push88=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push37=, $9, $pop88 +; NO-SIMD128-SLOW-NEXT: i32.const $push87=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push36=, $25, $pop87 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push38=, $pop37, $pop36 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop38 +; NO-SIMD128-SLOW-NEXT: i32.const $push42=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push43=, $0, $pop42 +; NO-SIMD128-SLOW-NEXT: i32.const $push86=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push40=, $8, $pop86 +; NO-SIMD128-SLOW-NEXT: i32.const $push85=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push39=, $24, $pop85 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push41=, $pop40, $pop39 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop43), $pop41 +; NO-SIMD128-SLOW-NEXT: i32.const $push47=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push48=, $0, $pop47 +; NO-SIMD128-SLOW-NEXT: i32.const $push84=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push45=, $7, $pop84 +; NO-SIMD128-SLOW-NEXT: i32.const $push83=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push44=, $23, $pop83 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push46=, $pop45, $pop44 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop48), $pop46 +; NO-SIMD128-SLOW-NEXT: i32.const $push52=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push53=, $0, $pop52 +; NO-SIMD128-SLOW-NEXT: i32.const $push82=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push50=, $6, $pop82 +; NO-SIMD128-SLOW-NEXT: i32.const $push81=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push49=, $22, $pop81 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push51=, $pop50, $pop49 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop53), $pop51 +; NO-SIMD128-SLOW-NEXT: i32.const $push80=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push55=, $5, $pop80 +; NO-SIMD128-SLOW-NEXT: i32.const $push79=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push54=, $21, $pop79 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push56=, $pop55, $pop54 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop56 +; NO-SIMD128-SLOW-NEXT: i32.const $push60=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push61=, $0, $pop60 +; NO-SIMD128-SLOW-NEXT: i32.const $push78=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push58=, $4, $pop78 +; NO-SIMD128-SLOW-NEXT: i32.const $push77=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push57=, $20, $pop77 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push59=, $pop58, $pop57 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop61), $pop59 +; NO-SIMD128-SLOW-NEXT: i32.const $push76=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push63=, $3, $pop76 +; NO-SIMD128-SLOW-NEXT: i32.const $push75=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push62=, $19, $pop75 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push64=, $pop63, $pop62 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop64 +; NO-SIMD128-SLOW-NEXT: i32.const $push74=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push66=, $2, $pop74 +; NO-SIMD128-SLOW-NEXT: i32.const $push73=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push65=, $18, $pop73 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push67=, $pop66, $pop65 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop67 +; NO-SIMD128-SLOW-NEXT: i32.const $push72=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push69=, $1, $pop72 +; NO-SIMD128-SLOW-NEXT: i32.const $push71=, 255 +; NO-SIMD128-SLOW-NEXT: i32.and $push68=, $17, $pop71 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push70=, $pop69, $pop68 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop70 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_vec_v16i8: +; NO-SIMD128-FAST: .functype shr_u_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop101 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop100 +; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop99 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop98 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop97 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop96 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop95 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop94 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop93 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $22, $pop91 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop90 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $23, $pop89 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $24, $pop87 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $9, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop85 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop33 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $10, $pop84 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push36=, $26, $pop83 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push38=, $pop37, $pop36 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push45=, $0, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push42=, $11, $pop82 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $27, $pop81 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push43=, $pop42, $pop41 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop45), $pop43 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push47=, $12, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $28, $pop79 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push52=, $13, $pop78 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push51=, $29, $pop77 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push60=, $0, $pop59 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $14, $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push56=, $30, $pop75 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push58=, $pop57, $pop56 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop60), $pop58 +; NO-SIMD128-FAST-NEXT: i32.const $push64=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push62=, $15, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push61=, $31, $pop73 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop61 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop63 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push70=, $0, $pop69 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push67=, $16, $pop72 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push66=, $32, $pop71 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push68=, $pop67, $pop66 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop70), $pop68 +; NO-SIMD128-FAST-NEXT: return %a = lshr <16 x i8> %v, %x ret <16 x i8> %a } -; CHECK-LABEL: and_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype and_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: and_v16i8: +; SIMD128: .functype and_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: v128.and $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: and_v16i8: +; NO-SIMD128-SLOW: .functype and_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.and $push0=, $16, $32 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.and $push3=, $15, $31 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop5), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.and $push6=, $14, $30 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop8), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.const $push10=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-SLOW-NEXT: i32.and $push9=, $13, $29 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop11), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push13=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-SLOW-NEXT: i32.and $push12=, $12, $28 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-SLOW-NEXT: i32.const $push16=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-SLOW-NEXT: i32.and $push15=, $11, $27 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop17), $pop15 +; NO-SIMD128-SLOW-NEXT: i32.const $push19=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.and $push18=, $10, $26 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-SLOW-NEXT: i32.and $push21=, $9, $25 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop21 +; NO-SIMD128-SLOW-NEXT: i32.const $push23=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-SLOW-NEXT: i32.and $push22=, $8, $24 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push26=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-SLOW-NEXT: i32.and $push25=, $7, $23 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop27), $pop25 +; NO-SIMD128-SLOW-NEXT: i32.const $push29=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-SLOW-NEXT: i32.and $push28=, $6, $22 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-SLOW-NEXT: i32.and $push31=, $5, $21 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop31 +; NO-SIMD128-SLOW-NEXT: i32.const $push33=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-SLOW-NEXT: i32.and $push32=, $4, $20 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-SLOW-NEXT: i32.and $push35=, $3, $19 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop35 +; NO-SIMD128-SLOW-NEXT: i32.and $push36=, $2, $18 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop36 +; NO-SIMD128-SLOW-NEXT: i32.and $push37=, $1, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop37 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: and_v16i8: +; NO-SIMD128-FAST: .functype and_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: return %a = and <16 x i8> %x, %y ret <16 x i8> %a } -; CHECK-LABEL: or_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype or_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: or_v16i8: +; SIMD128: .functype or_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: v128.or $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: or_v16i8: +; NO-SIMD128-SLOW: .functype or_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.or $push0=, $16, $32 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.or $push3=, $15, $31 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop5), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.or $push6=, $14, $30 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop8), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.const $push10=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-SLOW-NEXT: i32.or $push9=, $13, $29 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop11), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push13=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-SLOW-NEXT: i32.or $push12=, $12, $28 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-SLOW-NEXT: i32.const $push16=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-SLOW-NEXT: i32.or $push15=, $11, $27 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop17), $pop15 +; NO-SIMD128-SLOW-NEXT: i32.const $push19=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.or $push18=, $10, $26 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-SLOW-NEXT: i32.or $push21=, $9, $25 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop21 +; NO-SIMD128-SLOW-NEXT: i32.const $push23=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-SLOW-NEXT: i32.or $push22=, $8, $24 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push26=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-SLOW-NEXT: i32.or $push25=, $7, $23 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop27), $pop25 +; NO-SIMD128-SLOW-NEXT: i32.const $push29=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-SLOW-NEXT: i32.or $push28=, $6, $22 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-SLOW-NEXT: i32.or $push31=, $5, $21 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop31 +; NO-SIMD128-SLOW-NEXT: i32.const $push33=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-SLOW-NEXT: i32.or $push32=, $4, $20 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-SLOW-NEXT: i32.or $push35=, $3, $19 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop35 +; NO-SIMD128-SLOW-NEXT: i32.or $push36=, $2, $18 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop36 +; NO-SIMD128-SLOW-NEXT: i32.or $push37=, $1, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop37 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: or_v16i8: +; NO-SIMD128-FAST: .functype or_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.or $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.or $push1=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.or $push6=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.or $push9=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.or $push12=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.or $push15=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.or $push16=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.or $push19=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-FAST-NEXT: i32.or $push22=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.or $push25=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.or $push28=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.or $push31=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.or $push34=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.or $push37=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: return %a = or <16 x i8> %x, %y ret <16 x i8> %a } -; CHECK-LABEL: xor_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype xor_v16i8 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) { +; SIMD128-LABEL: xor_v16i8: +; SIMD128: .functype xor_v16i8 (v128, v128) -> (v128) +; SIMD128-NEXT: v128.xor $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: xor_v16i8: +; NO-SIMD128-SLOW: .functype xor_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push0=, $16, $32 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.xor $push3=, $15, $31 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop5), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.xor $push6=, $14, $30 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop8), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.const $push10=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-SLOW-NEXT: i32.xor $push9=, $13, $29 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop11), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push13=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-SLOW-NEXT: i32.xor $push12=, $12, $28 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop14), $pop12 +; NO-SIMD128-SLOW-NEXT: i32.const $push16=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push17=, $0, $pop16 +; NO-SIMD128-SLOW-NEXT: i32.xor $push15=, $11, $27 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop17), $pop15 +; NO-SIMD128-SLOW-NEXT: i32.const $push19=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.xor $push18=, $10, $26 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop20), $pop18 +; NO-SIMD128-SLOW-NEXT: i32.xor $push21=, $9, $25 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop21 +; NO-SIMD128-SLOW-NEXT: i32.const $push23=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-SLOW-NEXT: i32.xor $push22=, $8, $24 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push26=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-SLOW-NEXT: i32.xor $push25=, $7, $23 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop27), $pop25 +; NO-SIMD128-SLOW-NEXT: i32.const $push29=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-SLOW-NEXT: i32.xor $push28=, $6, $22 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-SLOW-NEXT: i32.xor $push31=, $5, $21 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop31 +; NO-SIMD128-SLOW-NEXT: i32.const $push33=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-SLOW-NEXT: i32.xor $push32=, $4, $20 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-SLOW-NEXT: i32.xor $push35=, $3, $19 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop35 +; NO-SIMD128-SLOW-NEXT: i32.xor $push36=, $2, $18 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop36 +; NO-SIMD128-SLOW-NEXT: i32.xor $push37=, $1, $17 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop37 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: xor_v16i8: +; NO-SIMD128-FAST: .functype xor_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.xor $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.xor $push28=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: return %a = xor <16 x i8> %x, %y ret <16 x i8> %a } -; CHECK-LABEL: not_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype not_v16i8 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <16 x i8> @not_v16i8(<16 x i8> %x) { +; SIMD128-LABEL: not_v16i8: +; SIMD128: .functype not_v16i8 (v128) -> (v128) +; SIMD128-NEXT: v128.not $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: not_v16i8: +; NO-SIMD128-SLOW: .functype not_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push1=, $16, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop3), $pop1 +; NO-SIMD128-SLOW-NEXT: i32.const $push5=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push53=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push4=, $15, $pop53 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop6), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push8=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-SLOW-NEXT: i32.const $push52=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push7=, $14, $pop52 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop9), $pop7 +; NO-SIMD128-SLOW-NEXT: i32.const $push11=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.const $push51=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push10=, $13, $pop51 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop12), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.const $push14=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-SLOW-NEXT: i32.const $push50=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push13=, $12, $pop50 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop15), $pop13 +; NO-SIMD128-SLOW-NEXT: i32.const $push17=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-SLOW-NEXT: i32.const $push49=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push16=, $11, $pop49 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-SLOW-NEXT: i32.const $push20=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push21=, $0, $pop20 +; NO-SIMD128-SLOW-NEXT: i32.const $push48=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push19=, $10, $pop48 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop21), $pop19 +; NO-SIMD128-SLOW-NEXT: i32.const $push47=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push22=, $9, $pop47 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push24=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-SLOW-NEXT: i32.const $push46=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push23=, $8, $pop46 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop25), $pop23 +; NO-SIMD128-SLOW-NEXT: i32.const $push27=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-SLOW-NEXT: i32.const $push45=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push26=, $7, $pop45 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-SLOW-NEXT: i32.const $push30=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-SLOW-NEXT: i32.const $push44=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push29=, $6, $pop44 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop31), $pop29 +; NO-SIMD128-SLOW-NEXT: i32.const $push43=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push32=, $5, $pop43 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop32 +; NO-SIMD128-SLOW-NEXT: i32.const $push34=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push35=, $0, $pop34 +; NO-SIMD128-SLOW-NEXT: i32.const $push42=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push33=, $4, $pop42 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop35), $pop33 +; NO-SIMD128-SLOW-NEXT: i32.const $push41=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push36=, $3, $pop41 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop36 +; NO-SIMD128-SLOW-NEXT: i32.const $push40=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push37=, $2, $pop40 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop37 +; NO-SIMD128-SLOW-NEXT: i32.const $push39=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push38=, $1, $pop39 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop38 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: not_v16i8: +; NO-SIMD128-FAST: .functype not_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop53 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop52 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $5, $pop50 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $6, $pop49 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push48=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $7, $pop48 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $8, $pop47 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $9, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $10, $pop45 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $11, $pop44 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $12, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $13, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $14, $pop41 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $15, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $16, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: return %a = xor <16 x i8> %x, %a } -; CHECK-LABEL: bitselect_v16i8: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_v16i8 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.or -; SIMD128-FAST-NEXT: return define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) { +; SIMD128-SLOW-LABEL: bitselect_v16i8: +; SIMD128-SLOW: .functype bitselect_v16i8 (v128, v128, v128) -> (v128) +; SIMD128-SLOW-NEXT: v128.bitselect $push0=, $1, $2, $0 +; SIMD128-SLOW-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_v16i8: +; SIMD128-FAST: .functype bitselect_v16i8 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: v128.and $push0=, $0, $1 +; SIMD128-FAST-NEXT: v128.not $push2=, $0 +; SIMD128-FAST-NEXT: v128.and $push3=, $pop2, $2 +; SIMD128-FAST-NEXT: v128.or $push1=, $pop0, $pop3 +; SIMD128-FAST-NEXT: return $pop1 +; +; NO-SIMD128-SLOW-LABEL: bitselect_v16i8: +; NO-SIMD128-SLOW: .functype bitselect_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push5=, 15 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-SLOW-NEXT: i32.and $push0=, $16, $32 +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push2=, $16, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.and $push3=, $pop2, $48 +; NO-SIMD128-SLOW-NEXT: i32.or $push4=, $pop0, $pop3 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop6), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push11=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.and $push7=, $15, $31 +; NO-SIMD128-SLOW-NEXT: i32.const $push101=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push8=, $15, $pop101 +; NO-SIMD128-SLOW-NEXT: i32.and $push9=, $pop8, $47 +; NO-SIMD128-SLOW-NEXT: i32.or $push10=, $pop7, $pop9 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop12), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.const $push17=, 13 +; NO-SIMD128-SLOW-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-SLOW-NEXT: i32.and $push13=, $14, $30 +; NO-SIMD128-SLOW-NEXT: i32.const $push100=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push14=, $14, $pop100 +; NO-SIMD128-SLOW-NEXT: i32.and $push15=, $pop14, $46 +; NO-SIMD128-SLOW-NEXT: i32.or $push16=, $pop13, $pop15 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-SLOW-NEXT: i32.const $push23=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-SLOW-NEXT: i32.and $push19=, $13, $29 +; NO-SIMD128-SLOW-NEXT: i32.const $push99=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push20=, $13, $pop99 +; NO-SIMD128-SLOW-NEXT: i32.and $push21=, $pop20, $45 +; NO-SIMD128-SLOW-NEXT: i32.or $push22=, $pop19, $pop21 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop24), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push29=, 11 +; NO-SIMD128-SLOW-NEXT: i32.add $push30=, $0, $pop29 +; NO-SIMD128-SLOW-NEXT: i32.and $push25=, $12, $28 +; NO-SIMD128-SLOW-NEXT: i32.const $push98=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push26=, $12, $pop98 +; NO-SIMD128-SLOW-NEXT: i32.and $push27=, $pop26, $44 +; NO-SIMD128-SLOW-NEXT: i32.or $push28=, $pop25, $pop27 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop30), $pop28 +; NO-SIMD128-SLOW-NEXT: i32.const $push35=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push36=, $0, $pop35 +; NO-SIMD128-SLOW-NEXT: i32.and $push31=, $11, $27 +; NO-SIMD128-SLOW-NEXT: i32.const $push97=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push32=, $11, $pop97 +; NO-SIMD128-SLOW-NEXT: i32.and $push33=, $pop32, $43 +; NO-SIMD128-SLOW-NEXT: i32.or $push34=, $pop31, $pop33 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop36), $pop34 +; NO-SIMD128-SLOW-NEXT: i32.const $push41=, 9 +; NO-SIMD128-SLOW-NEXT: i32.add $push42=, $0, $pop41 +; NO-SIMD128-SLOW-NEXT: i32.and $push37=, $10, $26 +; NO-SIMD128-SLOW-NEXT: i32.const $push96=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push38=, $10, $pop96 +; NO-SIMD128-SLOW-NEXT: i32.and $push39=, $pop38, $42 +; NO-SIMD128-SLOW-NEXT: i32.or $push40=, $pop37, $pop39 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop42), $pop40 +; NO-SIMD128-SLOW-NEXT: i32.and $push43=, $9, $25 +; NO-SIMD128-SLOW-NEXT: i32.const $push95=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push44=, $9, $pop95 +; NO-SIMD128-SLOW-NEXT: i32.and $push45=, $pop44, $41 +; NO-SIMD128-SLOW-NEXT: i32.or $push46=, $pop43, $pop45 +; NO-SIMD128-SLOW-NEXT: i32.store8 8($0), $pop46 +; NO-SIMD128-SLOW-NEXT: i32.const $push51=, 7 +; NO-SIMD128-SLOW-NEXT: i32.add $push52=, $0, $pop51 +; NO-SIMD128-SLOW-NEXT: i32.and $push47=, $8, $24 +; NO-SIMD128-SLOW-NEXT: i32.const $push94=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push48=, $8, $pop94 +; NO-SIMD128-SLOW-NEXT: i32.and $push49=, $pop48, $40 +; NO-SIMD128-SLOW-NEXT: i32.or $push50=, $pop47, $pop49 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop52), $pop50 +; NO-SIMD128-SLOW-NEXT: i32.const $push57=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push58=, $0, $pop57 +; NO-SIMD128-SLOW-NEXT: i32.and $push53=, $7, $23 +; NO-SIMD128-SLOW-NEXT: i32.const $push93=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push54=, $7, $pop93 +; NO-SIMD128-SLOW-NEXT: i32.and $push55=, $pop54, $39 +; NO-SIMD128-SLOW-NEXT: i32.or $push56=, $pop53, $pop55 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop58), $pop56 +; NO-SIMD128-SLOW-NEXT: i32.const $push63=, 5 +; NO-SIMD128-SLOW-NEXT: i32.add $push64=, $0, $pop63 +; NO-SIMD128-SLOW-NEXT: i32.and $push59=, $6, $22 +; NO-SIMD128-SLOW-NEXT: i32.const $push92=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push60=, $6, $pop92 +; NO-SIMD128-SLOW-NEXT: i32.and $push61=, $pop60, $38 +; NO-SIMD128-SLOW-NEXT: i32.or $push62=, $pop59, $pop61 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop64), $pop62 +; NO-SIMD128-SLOW-NEXT: i32.and $push65=, $5, $21 +; NO-SIMD128-SLOW-NEXT: i32.const $push91=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push66=, $5, $pop91 +; NO-SIMD128-SLOW-NEXT: i32.and $push67=, $pop66, $37 +; NO-SIMD128-SLOW-NEXT: i32.or $push68=, $pop65, $pop67 +; NO-SIMD128-SLOW-NEXT: i32.store8 4($0), $pop68 +; NO-SIMD128-SLOW-NEXT: i32.const $push73=, 3 +; NO-SIMD128-SLOW-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-SLOW-NEXT: i32.and $push69=, $4, $20 +; NO-SIMD128-SLOW-NEXT: i32.const $push90=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push70=, $4, $pop90 +; NO-SIMD128-SLOW-NEXT: i32.and $push71=, $pop70, $36 +; NO-SIMD128-SLOW-NEXT: i32.or $push72=, $pop69, $pop71 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-SLOW-NEXT: i32.and $push75=, $3, $19 +; NO-SIMD128-SLOW-NEXT: i32.const $push89=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push76=, $3, $pop89 +; NO-SIMD128-SLOW-NEXT: i32.and $push77=, $pop76, $35 +; NO-SIMD128-SLOW-NEXT: i32.or $push78=, $pop75, $pop77 +; NO-SIMD128-SLOW-NEXT: i32.store8 2($0), $pop78 +; NO-SIMD128-SLOW-NEXT: i32.and $push79=, $2, $18 +; NO-SIMD128-SLOW-NEXT: i32.const $push88=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push80=, $2, $pop88 +; NO-SIMD128-SLOW-NEXT: i32.and $push81=, $pop80, $34 +; NO-SIMD128-SLOW-NEXT: i32.or $push82=, $pop79, $pop81 +; NO-SIMD128-SLOW-NEXT: i32.store8 1($0), $pop82 +; NO-SIMD128-SLOW-NEXT: i32.and $push83=, $1, $17 +; NO-SIMD128-SLOW-NEXT: i32.const $push87=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push84=, $1, $pop87 +; NO-SIMD128-SLOW-NEXT: i32.and $push85=, $pop84, $33 +; NO-SIMD128-SLOW-NEXT: i32.or $push86=, $pop83, $pop85 +; NO-SIMD128-SLOW-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_v16i8: +; NO-SIMD128-FAST: .functype bitselect_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $17 +; NO-SIMD128-FAST-NEXT: i32.const $push1=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $pop2, $33 +; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $18 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop101 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $34 +; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop5, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $3, $19 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop100 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $35 +; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop9, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.const $push99=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop99 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $36 +; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop13, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $5, $pop98 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $37 +; NO-SIMD128-FAST-NEXT: i32.or $push22=, $pop19, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $6, $pop97 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $38 +; NO-SIMD128-FAST-NEXT: i32.or $push26=, $pop23, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push96=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $7, $pop96 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $39 +; NO-SIMD128-FAST-NEXT: i32.or $push32=, $pop29, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $8, $pop95 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $40 +; NO-SIMD128-FAST-NEXT: i32.or $push38=, $pop35, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $9, $pop94 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $41 +; NO-SIMD128-FAST-NEXT: i32.or $push44=, $pop41, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $10, $pop93 +; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $42 +; NO-SIMD128-FAST-NEXT: i32.or $push48=, $pop45, $pop47 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 +; NO-SIMD128-FAST-NEXT: i32.and $push51=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $11, $pop92 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $43 +; NO-SIMD128-FAST-NEXT: i32.or $push54=, $pop51, $pop53 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 +; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $12, $pop91 +; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $44 +; NO-SIMD128-FAST-NEXT: i32.or $push60=, $pop57, $pop59 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 +; NO-SIMD128-FAST-NEXT: i32.and $push63=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $13, $pop90 +; NO-SIMD128-FAST-NEXT: i32.and $push65=, $pop64, $45 +; NO-SIMD128-FAST-NEXT: i32.or $push66=, $pop63, $pop65 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 +; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-FAST-NEXT: i32.and $push69=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push70=, $14, $pop89 +; NO-SIMD128-FAST-NEXT: i32.and $push71=, $pop70, $46 +; NO-SIMD128-FAST-NEXT: i32.or $push72=, $pop69, $pop71 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 +; NO-SIMD128-FAST-NEXT: i32.and $push75=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push76=, $15, $pop88 +; NO-SIMD128-FAST-NEXT: i32.and $push77=, $pop76, $47 +; NO-SIMD128-FAST-NEXT: i32.or $push78=, $pop75, $pop77 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 +; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 +; NO-SIMD128-FAST-NEXT: i32.and $push81=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push82=, $16, $pop87 +; NO-SIMD128-FAST-NEXT: i32.and $push83=, $pop82, $48 +; NO-SIMD128-FAST-NEXT: i32.or $push84=, $pop81, $pop83 +; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <16 x i8> %c, %v1 %inv_mask = xor <16 x i8> %c, (v128){{$}} -; SIMD128-NEXT: i16x8.add $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} + define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: add_v8i16: +; SIMD128: .functype add_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: i16x8.add $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: add_v8i16: +; NO-SIMD128-SLOW: .functype add_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.add $push0=, $8, $16 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $7, $15 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop5), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $6, $14 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop8), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.add $push9=, $5, $13 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push11=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.add $push10=, $4, $12 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.add $push13=, $3, $11 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-SLOW-NEXT: i32.add $push14=, $2, $10 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.add $push15=, $1, $9 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop15 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: add_v8i16: +; NO-SIMD128-FAST: .functype add_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.add $push0=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.add $push1=, $2, $10 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: return %a = add <8 x i16> %x, %y ret <8 x i16> %a } -; CHECK-LABEL: sub_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype sub_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.sub $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: sub_v8i16: +; SIMD128: .functype sub_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: i16x8.sub $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: sub_v8i16: +; NO-SIMD128-SLOW: .functype sub_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.sub $push0=, $8, $16 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.sub $push3=, $7, $15 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop5), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.sub $push6=, $6, $14 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop8), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.sub $push9=, $5, $13 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push11=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.sub $push10=, $4, $12 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.sub $push13=, $3, $11 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-SLOW-NEXT: i32.sub $push14=, $2, $10 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.sub $push15=, $1, $9 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop15 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: sub_v8i16: +; NO-SIMD128-FAST: .functype sub_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.sub $push0=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $2, $10 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: return %a = sub <8 x i16> %x, %y ret <8 x i16> %a } -; CHECK-LABEL: mul_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype mul_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.mul $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: mul_v8i16: +; SIMD128: .functype mul_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: i16x8.mul $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: mul_v8i16: +; NO-SIMD128-SLOW: .functype mul_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.mul $push0=, $8, $16 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.mul $push3=, $7, $15 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop5), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.mul $push6=, $6, $14 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop8), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.mul $push9=, $5, $13 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push11=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.mul $push10=, $4, $12 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.mul $push13=, $3, $11 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-SLOW-NEXT: i32.mul $push14=, $2, $10 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.mul $push15=, $1, $9 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop15 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: mul_v8i16: +; NO-SIMD128-FAST: .functype mul_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.mul $push0=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.mul $push1=, $2, $10 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: return %a = mul <8 x i16> %x, %y ret <8 x i16> %a } -; CHECK-LABEL: neg_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype neg_v8i16 (v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.neg $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @neg_v8i16(<8 x i16> %x) { +; SIMD128-LABEL: neg_v8i16: +; SIMD128: .functype neg_v8i16 (v128) -> (v128) +; SIMD128-NEXT: i16x8.neg $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: neg_v8i16: +; NO-SIMD128-SLOW: .functype neg_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push1=, $pop0, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop3), $pop1 +; NO-SIMD128-SLOW-NEXT: i32.const $push5=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push23=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push4=, $pop23, $7 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push8=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-SLOW-NEXT: i32.const $push22=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push7=, $pop22, $6 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop9), $pop7 +; NO-SIMD128-SLOW-NEXT: i32.const $push21=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push10=, $pop21, $5 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.const $push12=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-SLOW-NEXT: i32.const $push20=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push11=, $pop20, $4 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop13), $pop11 +; NO-SIMD128-SLOW-NEXT: i32.const $push19=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push14=, $pop19, $3 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.const $push18=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push15=, $pop18, $2 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop15 +; NO-SIMD128-SLOW-NEXT: i32.const $push17=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push16=, $pop17, $1 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop16 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: neg_v8i16: +; NO-SIMD128-FAST: .functype neg_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop23, $2 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop22, $3 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop21, $4 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop20, $5 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop19, $6 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop18, $7 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop17, $8 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: return %a = sub <8 x i16> , %x ret <8 x i16> %a } -; CHECK-LABEL: shl_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype shl_v8i16 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) { +; SIMD128-UNIMPL-LABEL: shl_v8i16: +; SIMD128-UNIMPL: .functype shl_v8i16 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i16x8.shl $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shl_v8i16: +; SIMD128-VM-SLOW: .functype shl_v8i16 (v128, i32) -> (v128) +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push9=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push2=, $1 +; SIMD128-VM-SLOW-NEXT: i32.const $push0=, 15 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push1=, $pop0 +; SIMD128-VM-SLOW-NEXT: v128.and $push52=, $pop2, $pop1 +; SIMD128-VM-SLOW-NEXT: local.tee $push51=, $2=, $pop52 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push7=, $pop51, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push50=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push8=, $pop7, $pop50 +; SIMD128-VM-SLOW-NEXT: i32.shl $push10=, $pop9, $pop8 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push11=, $pop10 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push5=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push3=, $2, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push49=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push4=, $pop3, $pop49 +; SIMD128-VM-SLOW-NEXT: i32.shl $push6=, $pop5, $pop4 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push12=, $pop11, 1, $pop6 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push15=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push13=, $2, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push48=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push14=, $pop13, $pop48 +; SIMD128-VM-SLOW-NEXT: i32.shl $push16=, $pop15, $pop14 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push17=, $pop12, 2, $pop16 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push20=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push18=, $2, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push47=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push19=, $pop18, $pop47 +; SIMD128-VM-SLOW-NEXT: i32.shl $push21=, $pop20, $pop19 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push22=, $pop17, 3, $pop21 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push25=, $0, 4 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push23=, $2, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push46=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push24=, $pop23, $pop46 +; SIMD128-VM-SLOW-NEXT: i32.shl $push26=, $pop25, $pop24 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push27=, $pop22, 4, $pop26 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push30=, $0, 5 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push28=, $2, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push45=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push29=, $pop28, $pop45 +; SIMD128-VM-SLOW-NEXT: i32.shl $push31=, $pop30, $pop29 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push32=, $pop27, 5, $pop31 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push35=, $0, 6 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push33=, $2, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push44=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push34=, $pop33, $pop44 +; SIMD128-VM-SLOW-NEXT: i32.shl $push36=, $pop35, $pop34 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push37=, $pop32, 6, $pop36 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push40=, $0, 7 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push38=, $2, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push43=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push39=, $pop38, $pop43 +; SIMD128-VM-SLOW-NEXT: i32.shl $push41=, $pop40, $pop39 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push42=, $pop37, 7, $pop41 +; SIMD128-VM-SLOW-NEXT: return $pop42 +; +; SIMD128-VM-FAST-LABEL: shl_v8i16: +; SIMD128-VM-FAST: .functype shl_v8i16 (v128, i32) -> (v128) +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push10=, $0, 0 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push1=, $1 +; SIMD128-VM-FAST-NEXT: i32.const $push2=, 15 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push3=, $pop2 +; SIMD128-VM-FAST-NEXT: v128.and $push52=, $pop1, $pop3 +; SIMD128-VM-FAST-NEXT: local.tee $push51=, $2=, $pop52 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push8=, $pop51, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push50=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push9=, $pop8, $pop50 +; SIMD128-VM-FAST-NEXT: i32.shl $push11=, $pop10, $pop9 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push12=, $pop11 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push6=, $0, 1 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push4=, $2, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push49=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push5=, $pop4, $pop49 +; SIMD128-VM-FAST-NEXT: i32.shl $push7=, $pop6, $pop5 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push13=, $pop12, 1, $pop7 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push16=, $0, 2 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push14=, $2, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push48=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push15=, $pop14, $pop48 +; SIMD128-VM-FAST-NEXT: i32.shl $push17=, $pop16, $pop15 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push18=, $pop13, 2, $pop17 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push21=, $0, 3 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push19=, $2, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push47=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push20=, $pop19, $pop47 +; SIMD128-VM-FAST-NEXT: i32.shl $push22=, $pop21, $pop20 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push23=, $pop18, 3, $pop22 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push26=, $0, 4 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push24=, $2, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push46=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push25=, $pop24, $pop46 +; SIMD128-VM-FAST-NEXT: i32.shl $push27=, $pop26, $pop25 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push28=, $pop23, 4, $pop27 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push31=, $0, 5 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push29=, $2, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push45=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push30=, $pop29, $pop45 +; SIMD128-VM-FAST-NEXT: i32.shl $push32=, $pop31, $pop30 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push33=, $pop28, 5, $pop32 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push36=, $0, 6 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push34=, $2, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push44=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push35=, $pop34, $pop44 +; SIMD128-VM-FAST-NEXT: i32.shl $push37=, $pop36, $pop35 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push38=, $pop33, 6, $pop37 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push41=, $0, 7 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push39=, $2, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push43=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push40=, $pop39, $pop43 +; SIMD128-VM-FAST-NEXT: i32.shl $push42=, $pop41, $pop40 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push0=, $pop38, 7, $pop42 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shl_v8i16: +; NO-SIMD128-SLOW: .functype shl_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push18=, $9, $pop2 +; NO-SIMD128-SLOW-NEXT: local.tee $push17=, $9=, $pop18 +; NO-SIMD128-SLOW-NEXT: i32.shl $push3=, $8, $pop17 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop1), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.shl $push6=, $7, $9 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop5), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.shl $push9=, $6, $9 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop8), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.shl $push10=, $5, $9 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.const $push11=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.shl $push13=, $4, $9 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop12), $pop13 +; NO-SIMD128-SLOW-NEXT: i32.shl $push14=, $3, $9 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.shl $push15=, $2, $9 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop15 +; NO-SIMD128-SLOW-NEXT: i32.shl $push16=, $1, $9 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop16 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_v8i16: +; NO-SIMD128-FAST: .functype shl_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push17=, $9=, $pop18 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, <8 x i32> @@ -300,46 +5279,696 @@ ret <8 x i16> %a } -; CHECK-LABEL: shl_const_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype shl_const_v8i16 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5 -; SIMD128-NEXT: i16x8.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shl_const_v8i16(<8 x i16> %v) { +; SIMD128-UNIMPL-SLOW-LABEL: shl_const_v8i16: +; SIMD128-UNIMPL-SLOW: .functype shl_const_v8i16 (v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i32.const $push0=, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.shl $push1=, $0, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop1 +; +; SIMD128-UNIMPL-FAST-LABEL: shl_const_v8i16: +; SIMD128-UNIMPL-FAST: .functype shl_const_v8i16 (v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i32.const $push1=, 5 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.shl $push0=, $0, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shl_const_v8i16: +; SIMD128-VM-SLOW: .functype shl_const_v8i16 (v128) -> (v128) +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push3=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push1=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push4=, $pop3, $pop1 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push5=, $pop4 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push0=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push31=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push2=, $pop0, $pop31 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push6=, $pop5, 1, $pop2 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push7=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push30=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push8=, $pop7, $pop30 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push9=, $pop6, 2, $pop8 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push10=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push29=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push11=, $pop10, $pop29 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push12=, $pop9, 3, $pop11 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push13=, $0, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push28=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push14=, $pop13, $pop28 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push15=, $pop12, 4, $pop14 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push16=, $0, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push27=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push17=, $pop16, $pop27 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push18=, $pop15, 5, $pop17 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push19=, $0, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push26=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push20=, $pop19, $pop26 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push21=, $pop18, 6, $pop20 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push22=, $0, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push25=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push23=, $pop22, $pop25 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push24=, $pop21, 7, $pop23 +; SIMD128-VM-SLOW-NEXT: return $pop24 +; +; SIMD128-VM-FAST-LABEL: shl_const_v8i16: +; SIMD128-VM-FAST: .functype shl_const_v8i16 (v128) -> (v128) +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push4=, $0, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push2=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push5=, $pop4, $pop2 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push6=, $pop5 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push1=, $0, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push31=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push3=, $pop1, $pop31 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push7=, $pop6, 1, $pop3 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push8=, $0, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push30=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push9=, $pop8, $pop30 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push10=, $pop7, 2, $pop9 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push11=, $0, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push29=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push12=, $pop11, $pop29 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push13=, $pop10, 3, $pop12 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push14=, $0, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push28=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push15=, $pop14, $pop28 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push16=, $pop13, 4, $pop15 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push17=, $0, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push27=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push18=, $pop17, $pop27 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push19=, $pop16, 5, $pop18 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push20=, $0, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push26=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push21=, $pop20, $pop26 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push22=, $pop19, 6, $pop21 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push23=, $0, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push25=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push24=, $pop23, $pop25 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push0=, $pop22, 7, $pop24 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shl_const_v8i16: +; NO-SIMD128-SLOW: .functype shl_const_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push1=, $8, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop3), $pop1 +; NO-SIMD128-SLOW-NEXT: i32.const $push5=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push23=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push4=, $7, $pop23 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push8=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-SLOW-NEXT: i32.const $push22=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push7=, $6, $pop22 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop9), $pop7 +; NO-SIMD128-SLOW-NEXT: i32.const $push21=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push10=, $5, $pop21 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.const $push12=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-SLOW-NEXT: i32.const $push20=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push11=, $4, $pop20 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop13), $pop11 +; NO-SIMD128-SLOW-NEXT: i32.const $push19=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push14=, $3, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.const $push18=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push15=, $2, $pop18 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop15 +; NO-SIMD128-SLOW-NEXT: i32.const $push17=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push16=, $1, $pop17 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop16 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_const_v8i16: +; NO-SIMD128-FAST: .functype shl_const_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: return %a = shl <8 x i16> %v, ret <8 x i16> %a } -; CHECK-LABEL: shl_vec_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype shl_vec_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}} -; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}} -; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}} -; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}} -; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; Skip 6 lanes -; SIMD128: i16x8.extract_lane_s $push[[L4:[0-9]+]]=, $0, 7{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}} -; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 7, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { +; SIMD128-UNIMPL-SLOW-LABEL: shl_vec_v8i16: +; SIMD128-UNIMPL-SLOW: .functype shl_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push6=, $0, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i32.const $push0=, 15 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.splat $push1=, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: v128.and $push35=, $1, $pop1 +; SIMD128-UNIMPL-SLOW-NEXT: local.tee $push34=, $1=, $pop35 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push5=, $pop34, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push7=, $pop6, $pop5 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.splat $push8=, $pop7 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push3=, $0, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push2=, $1, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push4=, $pop3, $pop2 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push9=, $pop8, 1, $pop4 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push11=, $0, 2 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push10=, $1, 2 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push12=, $pop11, $pop10 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push13=, $pop9, 2, $pop12 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push15=, $0, 3 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push14=, $1, 3 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push16=, $pop15, $pop14 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push17=, $pop13, 3, $pop16 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push19=, $0, 4 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push18=, $1, 4 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push20=, $pop19, $pop18 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push21=, $pop17, 4, $pop20 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push23=, $0, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push22=, $1, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push24=, $pop23, $pop22 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push25=, $pop21, 5, $pop24 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push27=, $0, 6 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push26=, $1, 6 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push28=, $pop27, $pop26 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push29=, $pop25, 6, $pop28 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push31=, $0, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push30=, $1, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shl $push32=, $pop31, $pop30 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push33=, $pop29, 7, $pop32 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop33 +; +; SIMD128-UNIMPL-FAST-LABEL: shl_vec_v8i16: +; SIMD128-UNIMPL-FAST: .functype shl_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push7=, $0, 0 +; SIMD128-UNIMPL-FAST-NEXT: i32.const $push1=, 15 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.splat $push2=, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: v128.and $push35=, $1, $pop2 +; SIMD128-UNIMPL-FAST-NEXT: local.tee $push34=, $1=, $pop35 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push6=, $pop34, 0 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push8=, $pop7, $pop6 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.splat $push9=, $pop8 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push4=, $0, 1 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push3=, $1, 1 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push5=, $pop4, $pop3 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push10=, $pop9, 1, $pop5 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push12=, $0, 2 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push11=, $1, 2 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push13=, $pop12, $pop11 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push14=, $pop10, 2, $pop13 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push16=, $0, 3 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push15=, $1, 3 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push17=, $pop16, $pop15 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push18=, $pop14, 3, $pop17 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push20=, $0, 4 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push19=, $1, 4 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push21=, $pop20, $pop19 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push22=, $pop18, 4, $pop21 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push24=, $0, 5 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push23=, $1, 5 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push25=, $pop24, $pop23 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push26=, $pop22, 5, $pop25 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push28=, $0, 6 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push27=, $1, 6 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push29=, $pop28, $pop27 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push30=, $pop26, 6, $pop29 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push32=, $0, 7 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push31=, $1, 7 +; SIMD128-UNIMPL-FAST-NEXT: i32.shl $push33=, $pop32, $pop31 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push0=, $pop30, 7, $pop33 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shl_vec_v8i16: +; SIMD128-VM-SLOW: .functype shl_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push8=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push0=, 15 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push1=, $pop0 +; SIMD128-VM-SLOW-NEXT: v128.and $push51=, $1, $pop1 +; SIMD128-VM-SLOW-NEXT: local.tee $push50=, $1=, $pop51 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push6=, $pop50, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push49=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push7=, $pop6, $pop49 +; SIMD128-VM-SLOW-NEXT: i32.shl $push9=, $pop8, $pop7 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push10=, $pop9 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push4=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push2=, $1, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push48=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push3=, $pop2, $pop48 +; SIMD128-VM-SLOW-NEXT: i32.shl $push5=, $pop4, $pop3 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push11=, $pop10, 1, $pop5 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push14=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push12=, $1, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push47=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push13=, $pop12, $pop47 +; SIMD128-VM-SLOW-NEXT: i32.shl $push15=, $pop14, $pop13 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push16=, $pop11, 2, $pop15 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push19=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push17=, $1, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push46=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push18=, $pop17, $pop46 +; SIMD128-VM-SLOW-NEXT: i32.shl $push20=, $pop19, $pop18 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push21=, $pop16, 3, $pop20 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push24=, $0, 4 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push22=, $1, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push45=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push23=, $pop22, $pop45 +; SIMD128-VM-SLOW-NEXT: i32.shl $push25=, $pop24, $pop23 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push26=, $pop21, 4, $pop25 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push29=, $0, 5 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push27=, $1, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push44=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push28=, $pop27, $pop44 +; SIMD128-VM-SLOW-NEXT: i32.shl $push30=, $pop29, $pop28 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push31=, $pop26, 5, $pop30 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push34=, $0, 6 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push32=, $1, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push43=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push33=, $pop32, $pop43 +; SIMD128-VM-SLOW-NEXT: i32.shl $push35=, $pop34, $pop33 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push36=, $pop31, 6, $pop35 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push39=, $0, 7 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push37=, $1, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push42=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push38=, $pop37, $pop42 +; SIMD128-VM-SLOW-NEXT: i32.shl $push40=, $pop39, $pop38 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push41=, $pop36, 7, $pop40 +; SIMD128-VM-SLOW-NEXT: return $pop41 +; +; SIMD128-VM-FAST-LABEL: shl_vec_v8i16: +; SIMD128-VM-FAST: .functype shl_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push9=, $0, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push1=, 15 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push2=, $pop1 +; SIMD128-VM-FAST-NEXT: v128.and $push51=, $1, $pop2 +; SIMD128-VM-FAST-NEXT: local.tee $push50=, $1=, $pop51 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push7=, $pop50, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push49=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push8=, $pop7, $pop49 +; SIMD128-VM-FAST-NEXT: i32.shl $push10=, $pop9, $pop8 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push11=, $pop10 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push5=, $0, 1 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push3=, $1, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push48=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push4=, $pop3, $pop48 +; SIMD128-VM-FAST-NEXT: i32.shl $push6=, $pop5, $pop4 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push12=, $pop11, 1, $pop6 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push15=, $0, 2 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push13=, $1, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push47=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push14=, $pop13, $pop47 +; SIMD128-VM-FAST-NEXT: i32.shl $push16=, $pop15, $pop14 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push17=, $pop12, 2, $pop16 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push20=, $0, 3 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push18=, $1, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push46=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push19=, $pop18, $pop46 +; SIMD128-VM-FAST-NEXT: i32.shl $push21=, $pop20, $pop19 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push22=, $pop17, 3, $pop21 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push25=, $0, 4 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push23=, $1, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push45=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push24=, $pop23, $pop45 +; SIMD128-VM-FAST-NEXT: i32.shl $push26=, $pop25, $pop24 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push27=, $pop22, 4, $pop26 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push30=, $0, 5 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push28=, $1, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push44=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push29=, $pop28, $pop44 +; SIMD128-VM-FAST-NEXT: i32.shl $push31=, $pop30, $pop29 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push32=, $pop27, 5, $pop31 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push35=, $0, 6 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push33=, $1, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push43=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push34=, $pop33, $pop43 +; SIMD128-VM-FAST-NEXT: i32.shl $push36=, $pop35, $pop34 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push37=, $pop32, 6, $pop36 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push40=, $0, 7 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push38=, $1, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push42=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push39=, $pop38, $pop42 +; SIMD128-VM-FAST-NEXT: i32.shl $push41=, $pop40, $pop39 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push0=, $pop37, 7, $pop41 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shl_vec_v8i16: +; NO-SIMD128-SLOW: .functype shl_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push3=, $16, $pop2 +; NO-SIMD128-SLOW-NEXT: i32.shl $push4=, $8, $pop3 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop1), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push5=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push7=, $15, $pop31 +; NO-SIMD128-SLOW-NEXT: i32.shl $push8=, $7, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop6), $pop8 +; NO-SIMD128-SLOW-NEXT: i32.const $push9=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push11=, $14, $pop30 +; NO-SIMD128-SLOW-NEXT: i32.shl $push12=, $6, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop10), $pop12 +; NO-SIMD128-SLOW-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push13=, $13, $pop29 +; NO-SIMD128-SLOW-NEXT: i32.shl $push14=, $5, $pop13 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.const $push15=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-SLOW-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push17=, $12, $pop28 +; NO-SIMD128-SLOW-NEXT: i32.shl $push18=, $4, $pop17 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop16), $pop18 +; NO-SIMD128-SLOW-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push19=, $11, $pop27 +; NO-SIMD128-SLOW-NEXT: i32.shl $push20=, $3, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop20 +; NO-SIMD128-SLOW-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push21=, $10, $pop26 +; NO-SIMD128-SLOW-NEXT: i32.shl $push22=, $2, $pop21 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push23=, $9, $pop25 +; NO-SIMD128-SLOW-NEXT: i32.shl $push24=, $1, $pop23 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop24 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_vec_v8i16: +; NO-SIMD128-FAST: .functype shl_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $10, $pop31 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $2, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $11, $pop30 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $3, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $12, $pop29 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $13, $pop28 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $5, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $14, $pop27 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $6, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $15, $pop26 +; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $7, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $16, $pop25 +; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $8, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: return %a = shl <8 x i16> %v, %x ret <8 x i16> %a } -; CHECK-LABEL: shr_s_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype shr_s_v8i16 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) { +; SIMD128-UNIMPL-LABEL: shr_s_v8i16: +; SIMD128-UNIMPL: .functype shr_s_v8i16 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i16x8.shr_s $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shr_s_v8i16: +; SIMD128-VM-SLOW: .functype shr_s_v8i16 (v128, i32) -> (v128) +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push9=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push2=, $1 +; SIMD128-VM-SLOW-NEXT: i32.const $push0=, 15 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push1=, $pop0 +; SIMD128-VM-SLOW-NEXT: v128.and $push52=, $pop2, $pop1 +; SIMD128-VM-SLOW-NEXT: local.tee $push51=, $2=, $pop52 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push7=, $pop51, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push50=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push8=, $pop7, $pop50 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push10=, $pop9, $pop8 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push11=, $pop10 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push5=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push3=, $2, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push49=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push4=, $pop3, $pop49 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push6=, $pop5, $pop4 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push12=, $pop11, 1, $pop6 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push15=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push13=, $2, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push48=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push14=, $pop13, $pop48 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push16=, $pop15, $pop14 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push17=, $pop12, 2, $pop16 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push20=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push18=, $2, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push47=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push19=, $pop18, $pop47 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push22=, $pop17, 3, $pop21 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push25=, $0, 4 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push23=, $2, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push46=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push24=, $pop23, $pop46 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push26=, $pop25, $pop24 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push27=, $pop22, 4, $pop26 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push30=, $0, 5 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push28=, $2, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push45=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push29=, $pop28, $pop45 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push31=, $pop30, $pop29 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push32=, $pop27, 5, $pop31 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push35=, $0, 6 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push33=, $2, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push44=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push34=, $pop33, $pop44 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push36=, $pop35, $pop34 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push37=, $pop32, 6, $pop36 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push40=, $0, 7 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push38=, $2, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push43=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push39=, $pop38, $pop43 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push41=, $pop40, $pop39 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push42=, $pop37, 7, $pop41 +; SIMD128-VM-SLOW-NEXT: return $pop42 +; +; SIMD128-VM-FAST-LABEL: shr_s_v8i16: +; SIMD128-VM-FAST: .functype shr_s_v8i16 (v128, i32) -> (v128) +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push10=, $0, 0 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push1=, $1 +; SIMD128-VM-FAST-NEXT: i32.const $push2=, 15 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push3=, $pop2 +; SIMD128-VM-FAST-NEXT: v128.and $push52=, $pop1, $pop3 +; SIMD128-VM-FAST-NEXT: local.tee $push51=, $2=, $pop52 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push8=, $pop51, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push50=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push9=, $pop8, $pop50 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push11=, $pop10, $pop9 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push12=, $pop11 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push6=, $0, 1 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push4=, $2, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push49=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push5=, $pop4, $pop49 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push7=, $pop6, $pop5 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push13=, $pop12, 1, $pop7 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push16=, $0, 2 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push14=, $2, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push48=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push15=, $pop14, $pop48 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop15 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push18=, $pop13, 2, $pop17 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push21=, $0, 3 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push19=, $2, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push47=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push20=, $pop19, $pop47 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push22=, $pop21, $pop20 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push23=, $pop18, 3, $pop22 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push26=, $0, 4 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push24=, $2, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push46=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push25=, $pop24, $pop46 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push27=, $pop26, $pop25 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push28=, $pop23, 4, $pop27 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push31=, $0, 5 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push29=, $2, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push45=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push30=, $pop29, $pop45 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push32=, $pop31, $pop30 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push33=, $pop28, 5, $pop32 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push36=, $0, 6 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push34=, $2, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push44=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push35=, $pop34, $pop44 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push37=, $pop36, $pop35 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push38=, $pop33, 6, $pop37 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push41=, $0, 7 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push39=, $2, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push43=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push40=, $pop39, $pop43 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push42=, $pop41, $pop40 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push0=, $pop38, 7, $pop42 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shr_s_v8i16: +; NO-SIMD128-SLOW: .functype shr_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push5=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push2=, $8, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.const $push50=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push3=, $pop2, $pop50 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push49=, $9, $pop0 +; NO-SIMD128-SLOW-NEXT: local.tee $push48=, $8=, $pop49 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push4=, $pop3, $pop48 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push10=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-SLOW-NEXT: i32.const $push47=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push7=, $7, $pop47 +; NO-SIMD128-SLOW-NEXT: i32.const $push46=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push8=, $pop7, $pop46 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push9=, $pop8, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop11), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push15=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-SLOW-NEXT: i32.const $push45=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push12=, $6, $pop45 +; NO-SIMD128-SLOW-NEXT: i32.const $push44=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push13=, $pop12, $pop44 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push14=, $pop13, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.const $push43=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push17=, $5, $pop43 +; NO-SIMD128-SLOW-NEXT: i32.const $push42=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push18=, $pop17, $pop42 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push19=, $pop18, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop19 +; NO-SIMD128-SLOW-NEXT: i32.const $push23=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-SLOW-NEXT: i32.const $push41=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push20=, $4, $pop41 +; NO-SIMD128-SLOW-NEXT: i32.const $push40=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push21=, $pop20, $pop40 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push22=, $pop21, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push39=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push25=, $3, $pop39 +; NO-SIMD128-SLOW-NEXT: i32.const $push38=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push26=, $pop25, $pop38 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push27=, $pop26, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop27 +; NO-SIMD128-SLOW-NEXT: i32.const $push37=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push28=, $2, $pop37 +; NO-SIMD128-SLOW-NEXT: i32.const $push36=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push29=, $pop28, $pop36 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push30=, $pop29, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop30 +; NO-SIMD128-SLOW-NEXT: i32.const $push35=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push31=, $1, $pop35 +; NO-SIMD128-SLOW-NEXT: i32.const $push34=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push32=, $pop31, $pop34 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push33=, $pop32, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop33 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_v8i16: +; NO-SIMD128-FAST: .functype shr_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push1=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $pop2, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push48=, $9=, $pop49 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push4=, $pop3, $pop48 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $2, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop46 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push7=, $pop6, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $3, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop44 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $pop9, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push11=, $4, $pop43 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $pop42 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push13=, $pop12, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $5, $pop41 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop40 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $pop17, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push19=, $6, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push20=, $pop19, $pop38 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $pop20, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $7, $pop37 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push25=, $pop24, $pop36 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push26=, $pop25, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push29=, $8, $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push30=, $pop29, $pop34 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push31=, $pop30, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop33), $pop31 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, <8 x i32> @@ -347,34 +5976,599 @@ ret <8 x i16> %a } -; CHECK-LABEL: shr_s_vec_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype shr_s_vec_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}} -; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}} -; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}} -; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}} -; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; Skip 6 lanes -; SIMD128: i16x8.extract_lane_s $push[[L0:[0-9]+]]=, $0, 7{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $1, 7{{$}} -; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop{{[0-9]+}}, 7, $pop[[L2]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { +; SIMD128-UNIMPL-SLOW-LABEL: shr_s_vec_v8i16: +; SIMD128-UNIMPL-SLOW: .functype shr_s_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push6=, $0, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i32.const $push0=, 15 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.splat $push1=, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: v128.and $push35=, $1, $pop1 +; SIMD128-UNIMPL-SLOW-NEXT: local.tee $push34=, $1=, $pop35 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push5=, $pop34, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push7=, $pop6, $pop5 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.splat $push8=, $pop7 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push3=, $0, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push2=, $1, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push4=, $pop3, $pop2 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push9=, $pop8, 1, $pop4 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push11=, $0, 2 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push10=, $1, 2 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push12=, $pop11, $pop10 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push13=, $pop9, 2, $pop12 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push15=, $0, 3 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push14=, $1, 3 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push16=, $pop15, $pop14 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push17=, $pop13, 3, $pop16 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push19=, $0, 4 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push18=, $1, 4 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push20=, $pop19, $pop18 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push21=, $pop17, 4, $pop20 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push23=, $0, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push22=, $1, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push25=, $pop21, 5, $pop24 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push27=, $0, 6 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push26=, $1, 6 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push28=, $pop27, $pop26 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push29=, $pop25, 6, $pop28 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_s $push31=, $0, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push30=, $1, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_s $push32=, $pop31, $pop30 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push33=, $pop29, 7, $pop32 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop33 +; +; SIMD128-UNIMPL-FAST-LABEL: shr_s_vec_v8i16: +; SIMD128-UNIMPL-FAST: .functype shr_s_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push7=, $0, 0 +; SIMD128-UNIMPL-FAST-NEXT: i32.const $push1=, 15 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.splat $push2=, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: v128.and $push35=, $1, $pop2 +; SIMD128-UNIMPL-FAST-NEXT: local.tee $push34=, $1=, $pop35 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push6=, $pop34, 0 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push8=, $pop7, $pop6 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.splat $push9=, $pop8 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push4=, $0, 1 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push3=, $1, 1 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push5=, $pop4, $pop3 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push10=, $pop9, 1, $pop5 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push12=, $0, 2 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push11=, $1, 2 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push13=, $pop12, $pop11 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push14=, $pop10, 2, $pop13 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push16=, $0, 3 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push15=, $1, 3 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop15 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push18=, $pop14, 3, $pop17 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push20=, $0, 4 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push19=, $1, 4 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push22=, $pop18, 4, $pop21 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push24=, $0, 5 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push23=, $1, 5 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push25=, $pop24, $pop23 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push26=, $pop22, 5, $pop25 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push28=, $0, 6 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push27=, $1, 6 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push29=, $pop28, $pop27 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push30=, $pop26, 6, $pop29 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_s $push32=, $0, 7 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push31=, $1, 7 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_s $push33=, $pop32, $pop31 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push0=, $pop30, 7, $pop33 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shr_s_vec_v8i16: +; SIMD128-VM-SLOW: .functype shr_s_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push8=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push0=, 15 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push1=, $pop0 +; SIMD128-VM-SLOW-NEXT: v128.and $push51=, $1, $pop1 +; SIMD128-VM-SLOW-NEXT: local.tee $push50=, $1=, $pop51 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push6=, $pop50, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push49=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push7=, $pop6, $pop49 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push9=, $pop8, $pop7 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push10=, $pop9 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push4=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push2=, $1, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push48=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push3=, $pop2, $pop48 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push5=, $pop4, $pop3 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push11=, $pop10, 1, $pop5 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push14=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push12=, $1, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push47=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push13=, $pop12, $pop47 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push15=, $pop14, $pop13 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push16=, $pop11, 2, $pop15 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push19=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push17=, $1, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push46=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push18=, $pop17, $pop46 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push20=, $pop19, $pop18 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push21=, $pop16, 3, $pop20 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push24=, $0, 4 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push22=, $1, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push45=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push23=, $pop22, $pop45 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push25=, $pop24, $pop23 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push26=, $pop21, 4, $pop25 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push29=, $0, 5 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push27=, $1, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push44=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push28=, $pop27, $pop44 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push30=, $pop29, $pop28 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push31=, $pop26, 5, $pop30 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push34=, $0, 6 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push32=, $1, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push43=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push33=, $pop32, $pop43 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push35=, $pop34, $pop33 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push36=, $pop31, 6, $pop35 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push39=, $0, 7 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push37=, $1, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push42=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push38=, $pop37, $pop42 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push40=, $pop39, $pop38 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push41=, $pop36, 7, $pop40 +; SIMD128-VM-SLOW-NEXT: return $pop41 +; +; SIMD128-VM-FAST-LABEL: shr_s_vec_v8i16: +; SIMD128-VM-FAST: .functype shr_s_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push9=, $0, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push1=, 15 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push2=, $pop1 +; SIMD128-VM-FAST-NEXT: v128.and $push51=, $1, $pop2 +; SIMD128-VM-FAST-NEXT: local.tee $push50=, $1=, $pop51 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push7=, $pop50, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push49=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push8=, $pop7, $pop49 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push10=, $pop9, $pop8 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push11=, $pop10 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push5=, $0, 1 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push3=, $1, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push48=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push4=, $pop3, $pop48 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop4 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push12=, $pop11, 1, $pop6 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push15=, $0, 2 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push13=, $1, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push47=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push14=, $pop13, $pop47 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push16=, $pop15, $pop14 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push17=, $pop12, 2, $pop16 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push20=, $0, 3 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push18=, $1, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push46=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push19=, $pop18, $pop46 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push22=, $pop17, 3, $pop21 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push25=, $0, 4 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push23=, $1, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push45=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push24=, $pop23, $pop45 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push26=, $pop25, $pop24 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push27=, $pop22, 4, $pop26 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push30=, $0, 5 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push28=, $1, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push44=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push29=, $pop28, $pop44 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push31=, $pop30, $pop29 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push32=, $pop27, 5, $pop31 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push35=, $0, 6 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push33=, $1, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push43=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push34=, $pop33, $pop43 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push36=, $pop35, $pop34 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push37=, $pop32, 6, $pop36 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push40=, $0, 7 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push38=, $1, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push42=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push39=, $pop38, $pop42 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push41=, $pop40, $pop39 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push0=, $pop37, 7, $pop41 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shr_s_vec_v8i16: +; NO-SIMD128-SLOW: .functype shr_s_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push6=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push7=, $0, $pop6 +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push3=, $8, $pop2 +; NO-SIMD128-SLOW-NEXT: i32.const $push63=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push4=, $pop3, $pop63 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push1=, $16, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push5=, $pop4, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop7), $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push12=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-SLOW-NEXT: i32.const $push62=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push9=, $7, $pop62 +; NO-SIMD128-SLOW-NEXT: i32.const $push61=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push10=, $pop9, $pop61 +; NO-SIMD128-SLOW-NEXT: i32.const $push60=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push8=, $15, $pop60 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push11=, $pop10, $pop8 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop13), $pop11 +; NO-SIMD128-SLOW-NEXT: i32.const $push18=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-SLOW-NEXT: i32.const $push59=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push15=, $6, $pop59 +; NO-SIMD128-SLOW-NEXT: i32.const $push58=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push16=, $pop15, $pop58 +; NO-SIMD128-SLOW-NEXT: i32.const $push57=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push14=, $14, $pop57 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push17=, $pop16, $pop14 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop19), $pop17 +; NO-SIMD128-SLOW-NEXT: i32.const $push56=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push21=, $5, $pop56 +; NO-SIMD128-SLOW-NEXT: i32.const $push55=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push22=, $pop21, $pop55 +; NO-SIMD128-SLOW-NEXT: i32.const $push54=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push20=, $13, $pop54 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push23=, $pop22, $pop20 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop23 +; NO-SIMD128-SLOW-NEXT: i32.const $push28=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push29=, $0, $pop28 +; NO-SIMD128-SLOW-NEXT: i32.const $push53=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push25=, $4, $pop53 +; NO-SIMD128-SLOW-NEXT: i32.const $push52=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push26=, $pop25, $pop52 +; NO-SIMD128-SLOW-NEXT: i32.const $push51=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push24=, $12, $pop51 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push27=, $pop26, $pop24 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop29), $pop27 +; NO-SIMD128-SLOW-NEXT: i32.const $push50=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push31=, $3, $pop50 +; NO-SIMD128-SLOW-NEXT: i32.const $push49=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push32=, $pop31, $pop49 +; NO-SIMD128-SLOW-NEXT: i32.const $push48=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push30=, $11, $pop48 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push33=, $pop32, $pop30 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop33 +; NO-SIMD128-SLOW-NEXT: i32.const $push47=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push35=, $2, $pop47 +; NO-SIMD128-SLOW-NEXT: i32.const $push46=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push36=, $pop35, $pop46 +; NO-SIMD128-SLOW-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push34=, $10, $pop45 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push37=, $pop36, $pop34 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop37 +; NO-SIMD128-SLOW-NEXT: i32.const $push44=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shl $push39=, $1, $pop44 +; NO-SIMD128-SLOW-NEXT: i32.const $push43=, 16 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push40=, $pop39, $pop43 +; NO-SIMD128-SLOW-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push38=, $9, $pop42 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push41=, $pop40, $pop38 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop41 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_vec_v8i16: +; NO-SIMD128-FAST: .functype shr_s_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push2=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $1, $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push4=, $pop3, $pop63 +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push5=, $pop4, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $2, $pop62 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push8=, $pop7, $pop61 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $10, $pop60 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop6 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push11=, $3, $pop59 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $pop58 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $11, $pop57 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push13=, $pop12, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $4, $pop56 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $pop15, $pop55 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $12, $pop54 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop19), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push21=, $5, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $13, $pop51 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push23=, $pop22, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push29=, $0, $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push25=, $6, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push26=, $pop25, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push48=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $14, $pop48 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push27=, $pop26, $pop24 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop29), $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push35=, $0, $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push31=, $7, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $15, $pop45 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push33=, $pop32, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop35), $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push41=, $0, $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 16 +; NO-SIMD128-FAST-NEXT: i32.shl $push37=, $8, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push38=, $pop37, $pop43 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push36=, $16, $pop42 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push39=, $pop38, $pop36 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop41), $pop39 +; NO-SIMD128-FAST-NEXT: return %a = ashr <8 x i16> %v, %x ret <8 x i16> %a } -; CHECK-LABEL: shr_u_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype shr_u_v8i16 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i16x8.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) { +; SIMD128-UNIMPL-LABEL: shr_u_v8i16: +; SIMD128-UNIMPL: .functype shr_u_v8i16 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i16x8.shr_u $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shr_u_v8i16: +; SIMD128-VM-SLOW: .functype shr_u_v8i16 (v128, i32) -> (v128) +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push9=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push1=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push10=, $pop9, $pop1 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push5=, $1 +; SIMD128-VM-SLOW-NEXT: i32.const $push3=, 15 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push4=, $pop3 +; SIMD128-VM-SLOW-NEXT: v128.and $push68=, $pop5, $pop4 +; SIMD128-VM-SLOW-NEXT: local.tee $push67=, $2=, $pop68 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push11=, $pop67, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push66=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push12=, $pop11, $pop66 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push13=, $pop10, $pop12 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push14=, $pop13 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push0=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push65=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push2=, $pop0, $pop65 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push6=, $2, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push64=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push7=, $pop6, $pop64 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push8=, $pop2, $pop7 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push15=, $pop14, 1, $pop8 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push16=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push63=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push17=, $pop16, $pop63 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push18=, $2, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push62=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push19=, $pop18, $pop62 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push20=, $pop17, $pop19 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push21=, $pop15, 2, $pop20 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push22=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push61=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push23=, $pop22, $pop61 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push24=, $2, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push60=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push25=, $pop24, $pop60 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push26=, $pop23, $pop25 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push27=, $pop21, 3, $pop26 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push28=, $0, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push59=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push29=, $pop28, $pop59 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push30=, $2, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push58=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push31=, $pop30, $pop58 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push32=, $pop29, $pop31 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push33=, $pop27, 4, $pop32 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push34=, $0, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push57=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push35=, $pop34, $pop57 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push36=, $2, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push56=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push37=, $pop36, $pop56 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push38=, $pop35, $pop37 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push39=, $pop33, 5, $pop38 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push40=, $0, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push55=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push41=, $pop40, $pop55 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push42=, $2, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push54=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push43=, $pop42, $pop54 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push44=, $pop41, $pop43 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push45=, $pop39, 6, $pop44 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push46=, $0, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push53=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push47=, $pop46, $pop53 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push48=, $2, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push52=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push49=, $pop48, $pop52 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push50=, $pop47, $pop49 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push51=, $pop45, 7, $pop50 +; SIMD128-VM-SLOW-NEXT: return $pop51 +; +; SIMD128-VM-FAST-LABEL: shr_u_v8i16: +; SIMD128-VM-FAST: .functype shr_u_v8i16 (v128, i32) -> (v128) +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push10=, $0, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push3=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push11=, $pop10, $pop3 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push1=, $1 +; SIMD128-VM-FAST-NEXT: i32.const $push5=, 15 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push6=, $pop5 +; SIMD128-VM-FAST-NEXT: v128.and $push68=, $pop1, $pop6 +; SIMD128-VM-FAST-NEXT: local.tee $push67=, $2=, $pop68 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push12=, $pop67, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push66=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push13=, $pop12, $pop66 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push14=, $pop11, $pop13 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push15=, $pop14 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push2=, $0, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push65=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push4=, $pop2, $pop65 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push7=, $2, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push64=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push8=, $pop7, $pop64 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push9=, $pop4, $pop8 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push16=, $pop15, 1, $pop9 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push17=, $0, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push63=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push18=, $pop17, $pop63 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push19=, $2, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push62=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push20=, $pop19, $pop62 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push21=, $pop18, $pop20 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push22=, $pop16, 2, $pop21 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push23=, $0, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push61=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push24=, $pop23, $pop61 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push25=, $2, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push60=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push26=, $pop25, $pop60 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push27=, $pop24, $pop26 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push28=, $pop22, 3, $pop27 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push29=, $0, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push59=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push30=, $pop29, $pop59 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push31=, $2, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push58=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push32=, $pop31, $pop58 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push33=, $pop30, $pop32 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push34=, $pop28, 4, $pop33 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push35=, $0, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push57=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push36=, $pop35, $pop57 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push37=, $2, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push56=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push38=, $pop37, $pop56 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push39=, $pop36, $pop38 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push40=, $pop34, 5, $pop39 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push41=, $0, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push55=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push42=, $pop41, $pop55 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push43=, $2, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push54=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push44=, $pop43, $pop54 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push45=, $pop42, $pop44 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push46=, $pop40, 6, $pop45 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push47=, $0, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push53=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push48=, $pop47, $pop53 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push49=, $2, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push52=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push50=, $pop49, $pop52 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push51=, $pop48, $pop50 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push0=, $pop46, 7, $pop51 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shr_u_v8i16: +; NO-SIMD128-SLOW: .functype shr_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push3=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push1=, $8, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push33=, $9, $pop34 +; NO-SIMD128-SLOW-NEXT: local.tee $push32=, $8=, $pop33 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push2=, $pop1, $pop32 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop4), $pop2 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push5=, $7, $pop31 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push6=, $pop5, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop8), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.const $push11=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push9=, $6, $pop30 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push10=, $pop9, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push13=, $5, $pop29 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push14=, $pop13, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.const $push17=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-SLOW-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push15=, $4, $pop28 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push16=, $pop15, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop18), $pop16 +; NO-SIMD128-SLOW-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push19=, $3, $pop27 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push20=, $pop19, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop20 +; NO-SIMD128-SLOW-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push21=, $2, $pop26 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push22=, $pop21, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push23=, $1, $pop25 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push24=, $pop23, $8 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop24 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_v8i16: +; NO-SIMD128-FAST: .functype shr_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $9, $pop34 +; NO-SIMD128-FAST-NEXT: local.tee $push32=, $9=, $pop33 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop32 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop31 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push4=, $pop3, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop30 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop29 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push8=, $pop7, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $5, $pop28 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push16=, $0, $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $6, $pop27 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push14=, $pop13, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push20=, $0, $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $7, $pop26 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop20), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $8, $pop25 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push22=, $pop21, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, <8 x i32> @@ -382,80 +6576,764 @@ ret <8 x i16> %a } -; CHECK-LABEL: shr_u_vec_v8i16: -; NO-SIMD128-NOT: i16x8 -; SIMD128-NEXT: .functype shr_u_vec_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i32.const $push[[M0:[0-9]+]]=, 15{{$}} -; SIMD128-NEXT: i16x8.splat $push[[M1:[0-9]+]]=, $pop[[M0]]{{$}} -; SIMD128-NEXT: v128.and $push[[M2:[0-9]+]]=, $1, $pop[[M1]]{{$}} -; SIMD128-NEXT: local.tee $push[[M:[0-9]+]]=, $1=, $pop[[M2]]{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L1:[0-9]+]]=, $pop[[M]], 0{{$}} -; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; Skip 6 lanes -; SIMD128: i16x8.extract_lane_u $push[[L4:[0-9]+]]=, $0, 7{{$}} -; SIMD128-NEXT: i16x8.extract_lane_u $push[[L5:[0-9]+]]=, $1, 7{{$}} -; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 7, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { +; SIMD128-UNIMPL-SLOW-LABEL: shr_u_vec_v8i16: +; SIMD128-UNIMPL-SLOW: .functype shr_u_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push6=, $0, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i32.const $push0=, 15 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.splat $push1=, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: v128.and $push35=, $1, $pop1 +; SIMD128-UNIMPL-SLOW-NEXT: local.tee $push34=, $1=, $pop35 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push5=, $pop34, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push7=, $pop6, $pop5 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.splat $push8=, $pop7 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push3=, $0, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push2=, $1, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push4=, $pop3, $pop2 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push9=, $pop8, 1, $pop4 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push11=, $0, 2 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push10=, $1, 2 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push13=, $pop9, 2, $pop12 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push15=, $0, 3 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push14=, $1, 3 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push16=, $pop15, $pop14 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push17=, $pop13, 3, $pop16 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push19=, $0, 4 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push18=, $1, 4 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push20=, $pop19, $pop18 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push21=, $pop17, 4, $pop20 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push23=, $0, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push22=, $1, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push25=, $pop21, 5, $pop24 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push27=, $0, 6 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push26=, $1, 6 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push28=, $pop27, $pop26 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push29=, $pop25, 6, $pop28 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push31=, $0, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.extract_lane_u $push30=, $1, 7 +; SIMD128-UNIMPL-SLOW-NEXT: i32.shr_u $push32=, $pop31, $pop30 +; SIMD128-UNIMPL-SLOW-NEXT: i16x8.replace_lane $push33=, $pop29, 7, $pop32 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop33 +; +; SIMD128-UNIMPL-FAST-LABEL: shr_u_vec_v8i16: +; SIMD128-UNIMPL-FAST: .functype shr_u_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push7=, $0, 0 +; SIMD128-UNIMPL-FAST-NEXT: i32.const $push1=, 15 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.splat $push2=, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: v128.and $push35=, $1, $pop2 +; SIMD128-UNIMPL-FAST-NEXT: local.tee $push34=, $1=, $pop35 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push6=, $pop34, 0 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push8=, $pop7, $pop6 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.splat $push9=, $pop8 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push4=, $0, 1 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push3=, $1, 1 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop3 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push10=, $pop9, 1, $pop5 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push12=, $0, 2 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push11=, $1, 2 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop11 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push14=, $pop10, 2, $pop13 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push16=, $0, 3 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push15=, $1, 3 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop15 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push18=, $pop14, 3, $pop17 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push20=, $0, 4 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push19=, $1, 4 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push22=, $pop18, 4, $pop21 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push24=, $0, 5 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push23=, $1, 5 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push26=, $pop22, 5, $pop25 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push28=, $0, 6 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push27=, $1, 6 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop27 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push30=, $pop26, 6, $pop29 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push32=, $0, 7 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.extract_lane_u $push31=, $1, 7 +; SIMD128-UNIMPL-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop31 +; SIMD128-UNIMPL-FAST-NEXT: i16x8.replace_lane $push0=, $pop30, 7, $pop33 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shr_u_vec_v8i16: +; SIMD128-VM-SLOW: .functype shr_u_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push8=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push1=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push9=, $pop8, $pop1 +; SIMD128-VM-SLOW-NEXT: i32.const $push3=, 15 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push4=, $pop3 +; SIMD128-VM-SLOW-NEXT: v128.and $push67=, $1, $pop4 +; SIMD128-VM-SLOW-NEXT: local.tee $push66=, $1=, $pop67 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push10=, $pop66, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push65=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push11=, $pop10, $pop65 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push12=, $pop9, $pop11 +; SIMD128-VM-SLOW-NEXT: i16x8.splat $push13=, $pop12 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push0=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push64=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push2=, $pop0, $pop64 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push5=, $1, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push63=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push6=, $pop5, $pop63 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push7=, $pop2, $pop6 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push14=, $pop13, 1, $pop7 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push15=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push62=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push16=, $pop15, $pop62 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push17=, $1, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push61=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push18=, $pop17, $pop61 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push19=, $pop16, $pop18 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push20=, $pop14, 2, $pop19 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push21=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push60=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push22=, $pop21, $pop60 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push23=, $1, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push59=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push24=, $pop23, $pop59 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push25=, $pop22, $pop24 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push26=, $pop20, 3, $pop25 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push27=, $0, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push58=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push28=, $pop27, $pop58 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push29=, $1, 4 +; SIMD128-VM-SLOW-NEXT: i32.const $push57=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push30=, $pop29, $pop57 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push31=, $pop28, $pop30 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push32=, $pop26, 4, $pop31 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push33=, $0, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push56=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push34=, $pop33, $pop56 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push35=, $1, 5 +; SIMD128-VM-SLOW-NEXT: i32.const $push55=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push36=, $pop35, $pop55 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push37=, $pop34, $pop36 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push38=, $pop32, 5, $pop37 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push39=, $0, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push54=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push40=, $pop39, $pop54 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push41=, $1, 6 +; SIMD128-VM-SLOW-NEXT: i32.const $push53=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push42=, $pop41, $pop53 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push43=, $pop40, $pop42 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push44=, $pop38, 6, $pop43 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push45=, $0, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push52=, 65535 +; SIMD128-VM-SLOW-NEXT: i32.and $push46=, $pop45, $pop52 +; SIMD128-VM-SLOW-NEXT: i16x8.extract_lane_s $push47=, $1, 7 +; SIMD128-VM-SLOW-NEXT: i32.const $push51=, 15 +; SIMD128-VM-SLOW-NEXT: i32.and $push48=, $pop47, $pop51 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push49=, $pop46, $pop48 +; SIMD128-VM-SLOW-NEXT: i16x8.replace_lane $push50=, $pop44, 7, $pop49 +; SIMD128-VM-SLOW-NEXT: return $pop50 +; +; SIMD128-VM-FAST-LABEL: shr_u_vec_v8i16: +; SIMD128-VM-FAST: .functype shr_u_vec_v8i16 (v128, v128) -> (v128) +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push9=, $0, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push2=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push10=, $pop9, $pop2 +; SIMD128-VM-FAST-NEXT: i32.const $push4=, 15 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push5=, $pop4 +; SIMD128-VM-FAST-NEXT: v128.and $push67=, $1, $pop5 +; SIMD128-VM-FAST-NEXT: local.tee $push66=, $1=, $pop67 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push11=, $pop66, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push65=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push12=, $pop11, $pop65 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push13=, $pop10, $pop12 +; SIMD128-VM-FAST-NEXT: i16x8.splat $push14=, $pop13 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push1=, $0, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push64=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push3=, $pop1, $pop64 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push6=, $1, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push63=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push7=, $pop6, $pop63 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push8=, $pop3, $pop7 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push15=, $pop14, 1, $pop8 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push16=, $0, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push62=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push17=, $pop16, $pop62 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push18=, $1, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push61=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push19=, $pop18, $pop61 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push20=, $pop17, $pop19 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push21=, $pop15, 2, $pop20 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push22=, $0, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push60=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push23=, $pop22, $pop60 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push24=, $1, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push59=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push25=, $pop24, $pop59 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push26=, $pop23, $pop25 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push27=, $pop21, 3, $pop26 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push28=, $0, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push58=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push29=, $pop28, $pop58 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push30=, $1, 4 +; SIMD128-VM-FAST-NEXT: i32.const $push57=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push31=, $pop30, $pop57 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push32=, $pop29, $pop31 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push33=, $pop27, 4, $pop32 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push34=, $0, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push56=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push35=, $pop34, $pop56 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push36=, $1, 5 +; SIMD128-VM-FAST-NEXT: i32.const $push55=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push37=, $pop36, $pop55 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push38=, $pop35, $pop37 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push39=, $pop33, 5, $pop38 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push40=, $0, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push54=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push41=, $pop40, $pop54 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push42=, $1, 6 +; SIMD128-VM-FAST-NEXT: i32.const $push53=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push43=, $pop42, $pop53 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push44=, $pop41, $pop43 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push45=, $pop39, 6, $pop44 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push46=, $0, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push52=, 65535 +; SIMD128-VM-FAST-NEXT: i32.and $push47=, $pop46, $pop52 +; SIMD128-VM-FAST-NEXT: i16x8.extract_lane_s $push48=, $1, 7 +; SIMD128-VM-FAST-NEXT: i32.const $push51=, 15 +; SIMD128-VM-FAST-NEXT: i32.and $push49=, $pop48, $pop51 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push50=, $pop47, $pop49 +; SIMD128-VM-FAST-NEXT: i16x8.replace_lane $push0=, $pop45, 7, $pop50 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shr_u_vec_v8i16: +; NO-SIMD128-SLOW: .functype shr_u_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push2=, $8, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push1=, $16, $pop47 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop5), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push9=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push46=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push7=, $7, $pop46 +; NO-SIMD128-SLOW-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push6=, $15, $pop45 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push8=, $pop7, $pop6 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop10), $pop8 +; NO-SIMD128-SLOW-NEXT: i32.const $push14=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-SLOW-NEXT: i32.const $push44=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push12=, $6, $pop44 +; NO-SIMD128-SLOW-NEXT: i32.const $push43=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push11=, $14, $pop43 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push13=, $pop12, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-SLOW-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push17=, $5, $pop42 +; NO-SIMD128-SLOW-NEXT: i32.const $push41=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push16=, $13, $pop41 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop18 +; NO-SIMD128-SLOW-NEXT: i32.const $push22=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push23=, $0, $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push20=, $4, $pop40 +; NO-SIMD128-SLOW-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push19=, $12, $pop39 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop23), $pop21 +; NO-SIMD128-SLOW-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push25=, $3, $pop38 +; NO-SIMD128-SLOW-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push24=, $11, $pop37 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push26=, $pop25, $pop24 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop26 +; NO-SIMD128-SLOW-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push28=, $2, $pop36 +; NO-SIMD128-SLOW-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push27=, $10, $pop35 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push29=, $pop28, $pop27 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop29 +; NO-SIMD128-SLOW-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push31=, $1, $pop34 +; NO-SIMD128-SLOW-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-SLOW-NEXT: i32.and $push30=, $9, $pop33 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push32=, $pop31, $pop30 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop32 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_vec_v8i16: +; NO-SIMD128-FAST: .functype shr_u_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop47 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop45 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop43 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop41 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $13, $pop39 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $14, $pop37 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $15, $pop35 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $16, $pop33 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: return %a = lshr <8 x i16> %v, %x ret <8 x i16> %a } -; CHECK-LABEL: and_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype and_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: and_v8i16: +; SIMD128: .functype and_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: v128.and $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: and_v8i16: +; NO-SIMD128-SLOW: .functype and_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.and $push0=, $8, $16 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.and $push3=, $7, $15 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop5), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.and $push6=, $6, $14 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop8), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.and $push9=, $5, $13 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push11=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.and $push10=, $4, $12 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.and $push13=, $3, $11 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-SLOW-NEXT: i32.and $push14=, $2, $10 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.and $push15=, $1, $9 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop15 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: and_v8i16: +; NO-SIMD128-FAST: .functype and_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $2, $10 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: return %a = and <8 x i16> %x, %y ret <8 x i16> %a } -; CHECK-LABEL: or_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype or_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: or_v8i16: +; SIMD128: .functype or_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: v128.or $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: or_v8i16: +; NO-SIMD128-SLOW: .functype or_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.or $push0=, $8, $16 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.or $push3=, $7, $15 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop5), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.or $push6=, $6, $14 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop8), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.or $push9=, $5, $13 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push11=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.or $push10=, $4, $12 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.or $push13=, $3, $11 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-SLOW-NEXT: i32.or $push14=, $2, $10 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.or $push15=, $1, $9 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop15 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: or_v8i16: +; NO-SIMD128-FAST: .functype or_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.or $push0=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.or $push1=, $2, $10 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.or $push6=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.or $push9=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.or $push12=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.or $push15=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: return %a = or <8 x i16> %x, %y ret <8 x i16> %a } -; CHECK-LABEL: xor_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype xor_v8i16 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) { +; SIMD128-LABEL: xor_v8i16: +; SIMD128: .functype xor_v8i16 (v128, v128) -> (v128) +; SIMD128-NEXT: v128.xor $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: xor_v8i16: +; NO-SIMD128-SLOW: .functype xor_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push0=, $8, $16 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.const $push4=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-SLOW-NEXT: i32.xor $push3=, $7, $15 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop5), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.xor $push6=, $6, $14 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop8), $pop6 +; NO-SIMD128-SLOW-NEXT: i32.xor $push9=, $5, $13 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop9 +; NO-SIMD128-SLOW-NEXT: i32.const $push11=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.xor $push10=, $4, $12 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.xor $push13=, $3, $11 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-SLOW-NEXT: i32.xor $push14=, $2, $10 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.xor $push15=, $1, $9 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop15 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: xor_v8i16: +; NO-SIMD128-FAST: .functype xor_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.xor $push0=, $1, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $2, $10 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: return %a = xor <8 x i16> %x, %y ret <8 x i16> %a } -; CHECK-LABEL: not_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype not_v8i16 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <8 x i16> @not_v8i16(<8 x i16> %x) { +; SIMD128-LABEL: not_v8i16: +; SIMD128: .functype not_v8i16 (v128) -> (v128) +; SIMD128-NEXT: v128.not $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: not_v8i16: +; NO-SIMD128-SLOW: .functype not_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push1=, $8, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop3), $pop1 +; NO-SIMD128-SLOW-NEXT: i32.const $push5=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push23=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push4=, $7, $pop23 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push8=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-SLOW-NEXT: i32.const $push22=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push7=, $6, $pop22 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop9), $pop7 +; NO-SIMD128-SLOW-NEXT: i32.const $push21=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push10=, $5, $pop21 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.const $push12=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push13=, $0, $pop12 +; NO-SIMD128-SLOW-NEXT: i32.const $push20=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push11=, $4, $pop20 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop13), $pop11 +; NO-SIMD128-SLOW-NEXT: i32.const $push19=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push14=, $3, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.const $push18=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push15=, $2, $pop18 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop15 +; NO-SIMD128-SLOW-NEXT: i32.const $push17=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push16=, $1, $pop17 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop16 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: not_v8i16: +; NO-SIMD128-FAST: .functype not_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $5, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $6, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $7, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $8, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: return %a = xor <8 x i16> %x, ret <8 x i16> %a } -; CHECK-LABEL: bitselect_v8i16: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_v8i16 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.or -; SIMD128-FAST-NEXT: return define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) { +; SIMD128-SLOW-LABEL: bitselect_v8i16: +; SIMD128-SLOW: .functype bitselect_v8i16 (v128, v128, v128) -> (v128) +; SIMD128-SLOW-NEXT: v128.bitselect $push0=, $1, $2, $0 +; SIMD128-SLOW-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_v8i16: +; SIMD128-FAST: .functype bitselect_v8i16 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: v128.and $push0=, $1, $0 +; SIMD128-FAST-NEXT: v128.not $push2=, $0 +; SIMD128-FAST-NEXT: v128.and $push3=, $2, $pop2 +; SIMD128-FAST-NEXT: v128.or $push1=, $pop0, $pop3 +; SIMD128-FAST-NEXT: return $pop1 +; +; NO-SIMD128-SLOW-LABEL: bitselect_v8i16: +; NO-SIMD128-SLOW: .functype bitselect_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push5=, 14 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-SLOW-NEXT: i32.and $push0=, $16, $8 +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push2=, $8, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.and $push3=, $24, $pop2 +; NO-SIMD128-SLOW-NEXT: i32.or $push4=, $pop0, $pop3 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop6), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push11=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push12=, $0, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.and $push7=, $15, $7 +; NO-SIMD128-SLOW-NEXT: i32.const $push47=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push8=, $7, $pop47 +; NO-SIMD128-SLOW-NEXT: i32.and $push9=, $23, $pop8 +; NO-SIMD128-SLOW-NEXT: i32.or $push10=, $pop7, $pop9 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop12), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.const $push17=, 10 +; NO-SIMD128-SLOW-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-SLOW-NEXT: i32.and $push13=, $14, $6 +; NO-SIMD128-SLOW-NEXT: i32.const $push46=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push14=, $6, $pop46 +; NO-SIMD128-SLOW-NEXT: i32.and $push15=, $22, $pop14 +; NO-SIMD128-SLOW-NEXT: i32.or $push16=, $pop13, $pop15 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop18), $pop16 +; NO-SIMD128-SLOW-NEXT: i32.and $push19=, $13, $5 +; NO-SIMD128-SLOW-NEXT: i32.const $push45=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push20=, $5, $pop45 +; NO-SIMD128-SLOW-NEXT: i32.and $push21=, $21, $pop20 +; NO-SIMD128-SLOW-NEXT: i32.or $push22=, $pop19, $pop21 +; NO-SIMD128-SLOW-NEXT: i32.store16 8($0), $pop22 +; NO-SIMD128-SLOW-NEXT: i32.const $push27=, 6 +; NO-SIMD128-SLOW-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-SLOW-NEXT: i32.and $push23=, $12, $4 +; NO-SIMD128-SLOW-NEXT: i32.const $push44=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push24=, $4, $pop44 +; NO-SIMD128-SLOW-NEXT: i32.and $push25=, $20, $pop24 +; NO-SIMD128-SLOW-NEXT: i32.or $push26=, $pop23, $pop25 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-SLOW-NEXT: i32.and $push29=, $11, $3 +; NO-SIMD128-SLOW-NEXT: i32.const $push43=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push30=, $3, $pop43 +; NO-SIMD128-SLOW-NEXT: i32.and $push31=, $19, $pop30 +; NO-SIMD128-SLOW-NEXT: i32.or $push32=, $pop29, $pop31 +; NO-SIMD128-SLOW-NEXT: i32.store16 4($0), $pop32 +; NO-SIMD128-SLOW-NEXT: i32.and $push33=, $10, $2 +; NO-SIMD128-SLOW-NEXT: i32.const $push42=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push34=, $2, $pop42 +; NO-SIMD128-SLOW-NEXT: i32.and $push35=, $18, $pop34 +; NO-SIMD128-SLOW-NEXT: i32.or $push36=, $pop33, $pop35 +; NO-SIMD128-SLOW-NEXT: i32.store16 2($0), $pop36 +; NO-SIMD128-SLOW-NEXT: i32.and $push37=, $9, $1 +; NO-SIMD128-SLOW-NEXT: i32.const $push41=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push38=, $1, $pop41 +; NO-SIMD128-SLOW-NEXT: i32.and $push39=, $17, $pop38 +; NO-SIMD128-SLOW-NEXT: i32.or $push40=, $pop37, $pop39 +; NO-SIMD128-SLOW-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_v8i16: +; NO-SIMD128-FAST: .functype bitselect_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.and $push0=, $9, $1 +; NO-SIMD128-FAST-NEXT: i32.const $push1=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $17, $pop2 +; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop47 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $18, $pop6 +; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop5, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop46 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $19, $pop10 +; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop9, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $4 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop45 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $20, $pop14 +; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop13, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $5, $pop44 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.or $push22=, $pop19, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 +; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $6, $pop43 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $22, $pop24 +; NO-SIMD128-FAST-NEXT: i32.or $push26=, $pop23, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $7, $pop42 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $23, $pop30 +; NO-SIMD128-FAST-NEXT: i32.or $push32=, $pop29, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 +; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $8, $pop41 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $24, $pop36 +; NO-SIMD128-FAST-NEXT: i32.or $push38=, $pop35, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <8 x i16> %v1, %c %inv_mask = xor <8 x i16> , @@ -468,52 +7346,228 @@ ; ============================================================================== ; 4 x i32 ; ============================================================================== -; CHECK-LABEL: add_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype add_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} + define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: add_v4i32: +; SIMD128: .functype add_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: i32x4.add $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: add_v4i32: +; NO-SIMD128-SLOW: .functype add_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.add $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.add $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.add $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: add_v4i32: +; NO-SIMD128-FAST: .functype add_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.add $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.add $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = add <4 x i32> %x, %y ret <4 x i32> %a } -; CHECK-LABEL: sub_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype sub_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: sub_v4i32: +; SIMD128: .functype sub_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: i32x4.sub $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: sub_v4i32: +; NO-SIMD128-SLOW: .functype sub_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.sub $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.sub $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.sub $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: sub_v4i32: +; NO-SIMD128-FAST: .functype sub_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.sub $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = sub <4 x i32> %x, %y ret <4 x i32> %a } -; CHECK-LABEL: mul_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype mul_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: mul_v4i32: +; SIMD128: .functype mul_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: i32x4.mul $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: mul_v4i32: +; NO-SIMD128-SLOW: .functype mul_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.mul $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.mul $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.mul $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.mul $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: mul_v4i32: +; NO-SIMD128-FAST: .functype mul_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.mul $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.mul $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = mul <4 x i32> %x, %y ret <4 x i32> %a } -; CHECK-LABEL: neg_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype neg_v4i32 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.neg $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @neg_v4i32(<4 x i32> %x) { +; SIMD128-LABEL: neg_v4i32: +; SIMD128: .functype neg_v4i32 (v128) -> (v128) +; SIMD128-NEXT: i32x4.neg $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: neg_v4i32: +; NO-SIMD128-SLOW: .functype neg_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push1=, $pop0, $4 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop3), $pop1 +; NO-SIMD128-SLOW-NEXT: i32.const $push9=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push4=, $pop9, $3 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push8=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push5=, $pop8, $2 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 0 +; NO-SIMD128-SLOW-NEXT: i32.sub $push6=, $pop7, $1 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop6 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: neg_v4i32: +; NO-SIMD128-FAST: .functype neg_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop9, $2 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop8, $3 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop7, $4 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return %a = sub <4 x i32> , %x ret <4 x i32> %a } -; CHECK-LABEL: shl_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype shl_v4i32 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) { +; SIMD128-UNIMPL-LABEL: shl_v4i32: +; SIMD128-UNIMPL: .functype shl_v4i32 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i32x4.shl $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shl_v4i32: +; SIMD128-VM-SLOW: .functype shl_v4i32 (v128, i32) -> (v128) +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push2=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i32.shl $push3=, $pop2, $1 +; SIMD128-VM-SLOW-NEXT: i32x4.splat $push4=, $pop3 +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push0=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i32.shl $push1=, $pop0, $1 +; SIMD128-VM-SLOW-NEXT: i32x4.replace_lane $push5=, $pop4, 1, $pop1 +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push6=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i32.shl $push7=, $pop6, $1 +; SIMD128-VM-SLOW-NEXT: i32x4.replace_lane $push8=, $pop5, 2, $pop7 +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push9=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i32.shl $push10=, $pop9, $1 +; SIMD128-VM-SLOW-NEXT: i32x4.replace_lane $push11=, $pop8, 3, $pop10 +; SIMD128-VM-SLOW-NEXT: return $pop11 +; +; SIMD128-VM-FAST-LABEL: shl_v4i32: +; SIMD128-VM-FAST: .functype shl_v4i32 (v128, i32) -> (v128) +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push3=, $0, 0 +; SIMD128-VM-FAST-NEXT: i32.shl $push4=, $pop3, $1 +; SIMD128-VM-FAST-NEXT: i32x4.splat $push5=, $pop4 +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push1=, $0, 1 +; SIMD128-VM-FAST-NEXT: i32.shl $push2=, $pop1, $1 +; SIMD128-VM-FAST-NEXT: i32x4.replace_lane $push6=, $pop5, 1, $pop2 +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push7=, $0, 2 +; SIMD128-VM-FAST-NEXT: i32.shl $push8=, $pop7, $1 +; SIMD128-VM-FAST-NEXT: i32x4.replace_lane $push9=, $pop6, 2, $pop8 +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push10=, $0, 3 +; SIMD128-VM-FAST-NEXT: i32.shl $push11=, $pop10, $1 +; SIMD128-VM-FAST-NEXT: i32x4.replace_lane $push0=, $pop9, 3, $pop11 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shl_v4i32: +; NO-SIMD128-SLOW: .functype shl_v4i32 (i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.shl $push0=, $4, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.shl $push3=, $3, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.shl $push4=, $2, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.shl $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_v4i32: +; NO-SIMD128-FAST: .functype shl_v4i32 (i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.shl $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $5 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $3, $5 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, <4 x i32> @@ -521,41 +7575,235 @@ ret <4 x i32> %a } -; CHECK-LABEL: shl_const_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype shl_const_v4i32 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5 -; SIMD128-NEXT: i32x4.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shl_const_v4i32(<4 x i32> %v) { +; SIMD128-UNIMPL-SLOW-LABEL: shl_const_v4i32: +; SIMD128-UNIMPL-SLOW: .functype shl_const_v4i32 (v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i32.const $push0=, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i32x4.shl $push1=, $0, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop1 +; +; SIMD128-UNIMPL-FAST-LABEL: shl_const_v4i32: +; SIMD128-UNIMPL-FAST: .functype shl_const_v4i32 (v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i32.const $push1=, 5 +; SIMD128-UNIMPL-FAST-NEXT: i32x4.shl $push0=, $0, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shl_const_v4i32: +; SIMD128-VM-SLOW: .functype shl_const_v4i32 (v128) -> (v128) +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push3=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i32.const $push1=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push4=, $pop3, $pop1 +; SIMD128-VM-SLOW-NEXT: i32x4.splat $push5=, $pop4 +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push0=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i32.const $push15=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push2=, $pop0, $pop15 +; SIMD128-VM-SLOW-NEXT: i32x4.replace_lane $push6=, $pop5, 1, $pop2 +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push7=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i32.const $push14=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push8=, $pop7, $pop14 +; SIMD128-VM-SLOW-NEXT: i32x4.replace_lane $push9=, $pop6, 2, $pop8 +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push10=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i32.const $push13=, 5 +; SIMD128-VM-SLOW-NEXT: i32.shl $push11=, $pop10, $pop13 +; SIMD128-VM-SLOW-NEXT: i32x4.replace_lane $push12=, $pop9, 3, $pop11 +; SIMD128-VM-SLOW-NEXT: return $pop12 +; +; SIMD128-VM-FAST-LABEL: shl_const_v4i32: +; SIMD128-VM-FAST: .functype shl_const_v4i32 (v128) -> (v128) +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push4=, $0, 0 +; SIMD128-VM-FAST-NEXT: i32.const $push2=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push5=, $pop4, $pop2 +; SIMD128-VM-FAST-NEXT: i32x4.splat $push6=, $pop5 +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push1=, $0, 1 +; SIMD128-VM-FAST-NEXT: i32.const $push15=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push3=, $pop1, $pop15 +; SIMD128-VM-FAST-NEXT: i32x4.replace_lane $push7=, $pop6, 1, $pop3 +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push8=, $0, 2 +; SIMD128-VM-FAST-NEXT: i32.const $push14=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push9=, $pop8, $pop14 +; SIMD128-VM-FAST-NEXT: i32x4.replace_lane $push10=, $pop7, 2, $pop9 +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push11=, $0, 3 +; SIMD128-VM-FAST-NEXT: i32.const $push13=, 5 +; SIMD128-VM-FAST-NEXT: i32.shl $push12=, $pop11, $pop13 +; SIMD128-VM-FAST-NEXT: i32x4.replace_lane $push0=, $pop10, 3, $pop12 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shl_const_v4i32: +; NO-SIMD128-SLOW: .functype shl_const_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push1=, $4, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop3), $pop1 +; NO-SIMD128-SLOW-NEXT: i32.const $push9=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push4=, $3, $pop9 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push8=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push5=, $2, $pop8 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, 5 +; NO-SIMD128-SLOW-NEXT: i32.shl $push6=, $1, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop6 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_const_v4i32: +; NO-SIMD128-FAST: .functype shl_const_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return %a = shl <4 x i32> %v, ret <4 x i32> %a } -; CHECK-LABEL: shl_vec_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype shl_vec_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i32.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; Skip 2 lanes -; SIMD128: i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}} -; SIMD128-NEXT: i32.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { +; SIMD128-SLOW-LABEL: shl_vec_v4i32: +; SIMD128-SLOW: .functype shl_vec_v4i32 (v128, v128) -> (v128) +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push4=, $0, 0 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push3=, $1, 0 +; SIMD128-SLOW-NEXT: i32.shl $push5=, $pop4, $pop3 +; SIMD128-SLOW-NEXT: i32x4.splat $push6=, $pop5 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push1=, $0, 1 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push0=, $1, 1 +; SIMD128-SLOW-NEXT: i32.shl $push2=, $pop1, $pop0 +; SIMD128-SLOW-NEXT: i32x4.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push9=, $0, 2 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push8=, $1, 2 +; SIMD128-SLOW-NEXT: i32.shl $push10=, $pop9, $pop8 +; SIMD128-SLOW-NEXT: i32x4.replace_lane $push11=, $pop7, 2, $pop10 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push13=, $0, 3 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push12=, $1, 3 +; SIMD128-SLOW-NEXT: i32.shl $push14=, $pop13, $pop12 +; SIMD128-SLOW-NEXT: i32x4.replace_lane $push15=, $pop11, 3, $pop14 +; SIMD128-SLOW-NEXT: return $pop15 +; +; SIMD128-FAST-LABEL: shl_vec_v4i32: +; SIMD128-FAST: .functype shl_vec_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: i32x4.extract_lane $push5=, $0, 0 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push4=, $1, 0 +; SIMD128-FAST-NEXT: i32.shl $push6=, $pop5, $pop4 +; SIMD128-FAST-NEXT: i32x4.splat $push7=, $pop6 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push2=, $0, 1 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push1=, $1, 1 +; SIMD128-FAST-NEXT: i32.shl $push3=, $pop2, $pop1 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push8=, $pop7, 1, $pop3 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push10=, $0, 2 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push9=, $1, 2 +; SIMD128-FAST-NEXT: i32.shl $push11=, $pop10, $pop9 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push12=, $pop8, 2, $pop11 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push14=, $0, 3 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push13=, $1, 3 +; SIMD128-FAST-NEXT: i32.shl $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push0=, $pop12, 3, $pop15 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shl_vec_v4i32: +; NO-SIMD128-SLOW: .functype shl_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.shl $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.shl $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.shl $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.shl $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shl_vec_v4i32: +; NO-SIMD128-FAST: .functype shl_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.shl $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = shl <4 x i32> %v, %x ret <4 x i32> %a } -; CHECK-LABEL: shr_s_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype shr_s_v4i32 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) { +; SIMD128-UNIMPL-LABEL: shr_s_v4i32: +; SIMD128-UNIMPL: .functype shr_s_v4i32 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i32x4.shr_s $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shr_s_v4i32: +; SIMD128-VM-SLOW: .functype shr_s_v4i32 (v128, i32) -> (v128) +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push2=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push3=, $pop2, $1 +; SIMD128-VM-SLOW-NEXT: i32x4.splat $push4=, $pop3 +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push0=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push1=, $pop0, $1 +; SIMD128-VM-SLOW-NEXT: i32x4.replace_lane $push5=, $pop4, 1, $pop1 +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push6=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push7=, $pop6, $1 +; SIMD128-VM-SLOW-NEXT: i32x4.replace_lane $push8=, $pop5, 2, $pop7 +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push9=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i32.shr_s $push10=, $pop9, $1 +; SIMD128-VM-SLOW-NEXT: i32x4.replace_lane $push11=, $pop8, 3, $pop10 +; SIMD128-VM-SLOW-NEXT: return $pop11 +; +; SIMD128-VM-FAST-LABEL: shr_s_v4i32: +; SIMD128-VM-FAST: .functype shr_s_v4i32 (v128, i32) -> (v128) +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push3=, $0, 0 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push4=, $pop3, $1 +; SIMD128-VM-FAST-NEXT: i32x4.splat $push5=, $pop4 +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push1=, $0, 1 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push2=, $pop1, $1 +; SIMD128-VM-FAST-NEXT: i32x4.replace_lane $push6=, $pop5, 1, $pop2 +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push7=, $0, 2 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push8=, $pop7, $1 +; SIMD128-VM-FAST-NEXT: i32x4.replace_lane $push9=, $pop6, 2, $pop8 +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push10=, $0, 3 +; SIMD128-VM-FAST-NEXT: i32.shr_s $push11=, $pop10, $1 +; SIMD128-VM-FAST-NEXT: i32x4.replace_lane $push0=, $pop9, 3, $pop11 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shr_s_v4i32: +; NO-SIMD128-SLOW: .functype shr_s_v4i32 (i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push0=, $4, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push3=, $3, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push4=, $2, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_v4i32: +; NO-SIMD128-FAST: .functype shr_s_v4i32 (i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.shr_s $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push1=, $2, $5 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $3, $5 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push5=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, <4 x i32> @@ -563,30 +7811,143 @@ ret <4 x i32> %a } -; CHECK-LABEL: shr_s_vec_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype shr_s_vec_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i32.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; Skip 2 lanes -; SIMD128: i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}} -; SIMD128-NEXT: i32.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { +; SIMD128-SLOW-LABEL: shr_s_vec_v4i32: +; SIMD128-SLOW: .functype shr_s_vec_v4i32 (v128, v128) -> (v128) +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push4=, $0, 0 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push3=, $1, 0 +; SIMD128-SLOW-NEXT: i32.shr_s $push5=, $pop4, $pop3 +; SIMD128-SLOW-NEXT: i32x4.splat $push6=, $pop5 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push1=, $0, 1 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push0=, $1, 1 +; SIMD128-SLOW-NEXT: i32.shr_s $push2=, $pop1, $pop0 +; SIMD128-SLOW-NEXT: i32x4.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push9=, $0, 2 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push8=, $1, 2 +; SIMD128-SLOW-NEXT: i32.shr_s $push10=, $pop9, $pop8 +; SIMD128-SLOW-NEXT: i32x4.replace_lane $push11=, $pop7, 2, $pop10 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push13=, $0, 3 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push12=, $1, 3 +; SIMD128-SLOW-NEXT: i32.shr_s $push14=, $pop13, $pop12 +; SIMD128-SLOW-NEXT: i32x4.replace_lane $push15=, $pop11, 3, $pop14 +; SIMD128-SLOW-NEXT: return $pop15 +; +; SIMD128-FAST-LABEL: shr_s_vec_v4i32: +; SIMD128-FAST: .functype shr_s_vec_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: i32x4.extract_lane $push5=, $0, 0 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push4=, $1, 0 +; SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop4 +; SIMD128-FAST-NEXT: i32x4.splat $push7=, $pop6 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push2=, $0, 1 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push1=, $1, 1 +; SIMD128-FAST-NEXT: i32.shr_s $push3=, $pop2, $pop1 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push8=, $pop7, 1, $pop3 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push10=, $0, 2 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push9=, $1, 2 +; SIMD128-FAST-NEXT: i32.shr_s $push11=, $pop10, $pop9 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push12=, $pop8, 2, $pop11 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push14=, $0, 3 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push13=, $1, 3 +; SIMD128-FAST-NEXT: i32.shr_s $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push0=, $pop12, 3, $pop15 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shr_s_vec_v4i32: +; NO-SIMD128-SLOW: .functype shr_s_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.shr_s $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_s_vec_v4i32: +; NO-SIMD128-FAST: .functype shr_s_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.shr_s $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = ashr <4 x i32> %v, %x ret <4 x i32> %a } -; CHECK-LABEL: shr_u_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype shr_u_v4i32 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i32x4.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) { +; SIMD128-UNIMPL-LABEL: shr_u_v4i32: +; SIMD128-UNIMPL: .functype shr_u_v4i32 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i32x4.shr_u $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: shr_u_v4i32: +; SIMD128-VM-SLOW: .functype shr_u_v4i32 (v128, i32) -> (v128) +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push2=, $0, 0 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push3=, $pop2, $1 +; SIMD128-VM-SLOW-NEXT: i32x4.splat $push4=, $pop3 +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push0=, $0, 1 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push1=, $pop0, $1 +; SIMD128-VM-SLOW-NEXT: i32x4.replace_lane $push5=, $pop4, 1, $pop1 +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push6=, $0, 2 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push7=, $pop6, $1 +; SIMD128-VM-SLOW-NEXT: i32x4.replace_lane $push8=, $pop5, 2, $pop7 +; SIMD128-VM-SLOW-NEXT: i32x4.extract_lane $push9=, $0, 3 +; SIMD128-VM-SLOW-NEXT: i32.shr_u $push10=, $pop9, $1 +; SIMD128-VM-SLOW-NEXT: i32x4.replace_lane $push11=, $pop8, 3, $pop10 +; SIMD128-VM-SLOW-NEXT: return $pop11 +; +; SIMD128-VM-FAST-LABEL: shr_u_v4i32: +; SIMD128-VM-FAST: .functype shr_u_v4i32 (v128, i32) -> (v128) +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push3=, $0, 0 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push4=, $pop3, $1 +; SIMD128-VM-FAST-NEXT: i32x4.splat $push5=, $pop4 +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push1=, $0, 1 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push2=, $pop1, $1 +; SIMD128-VM-FAST-NEXT: i32x4.replace_lane $push6=, $pop5, 1, $pop2 +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push7=, $0, 2 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push8=, $pop7, $1 +; SIMD128-VM-FAST-NEXT: i32x4.replace_lane $push9=, $pop6, 2, $pop8 +; SIMD128-VM-FAST-NEXT: i32x4.extract_lane $push10=, $0, 3 +; SIMD128-VM-FAST-NEXT: i32.shr_u $push11=, $pop10, $1 +; SIMD128-VM-FAST-NEXT: i32x4.replace_lane $push0=, $pop9, 3, $pop11 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shr_u_v4i32: +; NO-SIMD128-SLOW: .functype shr_u_v4i32 (i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push0=, $4, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push3=, $3, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push4=, $2, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_v4i32: +; NO-SIMD128-FAST: .functype shr_u_v4i32 (i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.shr_u $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push1=, $2, $5 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $3, $5 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, <4 x i32> @@ -594,75 +7955,307 @@ ret <4 x i32> %a } -; CHECK-LABEL: shr_u_vec_v4i32: -; NO-SIMD128-NOT: i32x4 -; SIMD128-NEXT: .functype shr_u_vec_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i32.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; Skip 2 lanes -; SIMD128: i32x4.extract_lane $push[[L4:[0-9]+]]=, $0, 3{{$}} -; SIMD128-NEXT: i32x4.extract_lane $push[[L5:[0-9]+]]=, $1, 3{{$}} -; SIMD128-NEXT: i32.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $pop[[L7:[0-9]+]], 3, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { +; SIMD128-SLOW-LABEL: shr_u_vec_v4i32: +; SIMD128-SLOW: .functype shr_u_vec_v4i32 (v128, v128) -> (v128) +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push4=, $0, 0 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push3=, $1, 0 +; SIMD128-SLOW-NEXT: i32.shr_u $push5=, $pop4, $pop3 +; SIMD128-SLOW-NEXT: i32x4.splat $push6=, $pop5 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push1=, $0, 1 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push0=, $1, 1 +; SIMD128-SLOW-NEXT: i32.shr_u $push2=, $pop1, $pop0 +; SIMD128-SLOW-NEXT: i32x4.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push9=, $0, 2 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push8=, $1, 2 +; SIMD128-SLOW-NEXT: i32.shr_u $push10=, $pop9, $pop8 +; SIMD128-SLOW-NEXT: i32x4.replace_lane $push11=, $pop7, 2, $pop10 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push13=, $0, 3 +; SIMD128-SLOW-NEXT: i32x4.extract_lane $push12=, $1, 3 +; SIMD128-SLOW-NEXT: i32.shr_u $push14=, $pop13, $pop12 +; SIMD128-SLOW-NEXT: i32x4.replace_lane $push15=, $pop11, 3, $pop14 +; SIMD128-SLOW-NEXT: return $pop15 +; +; SIMD128-FAST-LABEL: shr_u_vec_v4i32: +; SIMD128-FAST: .functype shr_u_vec_v4i32 (v128, v128) -> (v128) +; SIMD128-FAST-NEXT: i32x4.extract_lane $push5=, $0, 0 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push4=, $1, 0 +; SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; SIMD128-FAST-NEXT: i32x4.splat $push7=, $pop6 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push2=, $0, 1 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push1=, $1, 1 +; SIMD128-FAST-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push8=, $pop7, 1, $pop3 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push10=, $0, 2 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push9=, $1, 2 +; SIMD128-FAST-NEXT: i32.shr_u $push11=, $pop10, $pop9 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push12=, $pop8, 2, $pop11 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push14=, $0, 3 +; SIMD128-FAST-NEXT: i32x4.extract_lane $push13=, $1, 3 +; SIMD128-FAST-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: i32x4.replace_lane $push0=, $pop12, 3, $pop15 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: shr_u_vec_v4i32: +; NO-SIMD128-SLOW: .functype shr_u_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.shr_u $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: shr_u_vec_v4i32: +; NO-SIMD128-FAST: .functype shr_u_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.shr_u $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = lshr <4 x i32> %v, %x ret <4 x i32> %a } -; CHECK-LABEL: and_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype and_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: and_v4i32: +; SIMD128: .functype and_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: v128.and $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: and_v4i32: +; NO-SIMD128-SLOW: .functype and_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.and $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.and $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.and $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.and $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: and_v4i32: +; NO-SIMD128-FAST: .functype and_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = and <4 x i32> %x, %y ret <4 x i32> %a } -; CHECK-LABEL: or_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype or_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: or_v4i32: +; SIMD128: .functype or_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: v128.or $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: or_v4i32: +; NO-SIMD128-SLOW: .functype or_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.or $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.or $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.or $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.or $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: or_v4i32: +; NO-SIMD128-FAST: .functype or_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.or $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.or $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = or <4 x i32> %x, %y ret <4 x i32> %a } -; CHECK-LABEL: xor_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype xor_v4i32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) { +; SIMD128-LABEL: xor_v4i32: +; SIMD128: .functype xor_v4i32 (v128, v128) -> (v128) +; SIMD128-NEXT: v128.xor $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: xor_v4i32: +; NO-SIMD128-SLOW: .functype xor_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: i32.xor $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: i32.xor $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.xor $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: xor_v4i32: +; NO-SIMD128-FAST: .functype xor_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.xor $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = xor <4 x i32> %x, %y ret <4 x i32> %a } -; CHECK-LABEL: not_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype not_v4i32 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x i32> @not_v4i32(<4 x i32> %x) { +; SIMD128-LABEL: not_v4i32: +; SIMD128: .functype not_v4i32 (v128) -> (v128) +; SIMD128-NEXT: v128.not $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: not_v4i32: +; NO-SIMD128-SLOW: .functype not_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push1=, $4, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop3), $pop1 +; NO-SIMD128-SLOW-NEXT: i32.const $push9=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push4=, $3, $pop9 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push8=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push5=, $2, $pop8 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push7=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push6=, $1, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop6 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: not_v4i32: +; NO-SIMD128-FAST: .functype not_v4i32 (i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push8=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return %a = xor <4 x i32> %x, ret <4 x i32> %a } -; CHECK-LABEL: bitselect_v4i32: -; NO-SIMD128-NOT: v128 -; SIMD128-NEXT: .functype bitselect_v4i32 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.or -; SIMD128-FAST-NEXT: return define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) { +; SIMD128-SLOW-LABEL: bitselect_v4i32: +; SIMD128-SLOW: .functype bitselect_v4i32 (v128, v128, v128) -> (v128) +; SIMD128-SLOW-NEXT: v128.bitselect $push0=, $1, $2, $0 +; SIMD128-SLOW-NEXT: return $pop0 +; +; SIMD128-FAST-LABEL: bitselect_v4i32: +; SIMD128-FAST: .functype bitselect_v4i32 (v128, v128, v128) -> (v128) +; SIMD128-FAST-NEXT: v128.not $push2=, $0 +; SIMD128-FAST-NEXT: v128.and $push3=, $pop2, $2 +; SIMD128-FAST-NEXT: v128.and $push0=, $0, $1 +; SIMD128-FAST-NEXT: v128.or $push1=, $pop3, $pop0 +; SIMD128-FAST-NEXT: return $pop1 +; +; NO-SIMD128-SLOW-LABEL: bitselect_v4i32: +; NO-SIMD128-SLOW: .functype bitselect_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push5=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push2=, $4, $pop1 +; NO-SIMD128-SLOW-NEXT: i32.and $push3=, $pop2, $12 +; NO-SIMD128-SLOW-NEXT: i32.and $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: i32.or $push4=, $pop3, $pop0 +; NO-SIMD128-SLOW-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-SLOW-NEXT: i32.const $push21=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push8=, $3, $pop21 +; NO-SIMD128-SLOW-NEXT: i32.and $push9=, $pop8, $11 +; NO-SIMD128-SLOW-NEXT: i32.and $push7=, $3, $7 +; NO-SIMD128-SLOW-NEXT: i32.or $push10=, $pop9, $pop7 +; NO-SIMD128-SLOW-NEXT: i32.store 8($0), $pop10 +; NO-SIMD128-SLOW-NEXT: i32.const $push20=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push12=, $2, $pop20 +; NO-SIMD128-SLOW-NEXT: i32.and $push13=, $pop12, $10 +; NO-SIMD128-SLOW-NEXT: i32.and $push11=, $2, $6 +; NO-SIMD128-SLOW-NEXT: i32.or $push14=, $pop13, $pop11 +; NO-SIMD128-SLOW-NEXT: i32.store 4($0), $pop14 +; NO-SIMD128-SLOW-NEXT: i32.const $push19=, -1 +; NO-SIMD128-SLOW-NEXT: i32.xor $push16=, $1, $pop19 +; NO-SIMD128-SLOW-NEXT: i32.and $push17=, $pop16, $9 +; NO-SIMD128-SLOW-NEXT: i32.and $push15=, $1, $5 +; NO-SIMD128-SLOW-NEXT: i32.or $push18=, $pop17, $pop15 +; NO-SIMD128-SLOW-NEXT: i32.store 0($0), $pop18 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: bitselect_v4i32: +; NO-SIMD128-FAST: .functype bitselect_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () +; NO-SIMD128-FAST-NEXT: i32.const $push1=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop1 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $pop2, $9 +; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop3, $pop0 +; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop21 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $10 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $6 +; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop7, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop20 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $11 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $3, $7 +; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop11, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop19 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $12 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop15, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <4 x i32> %c, %v1 %inv_mask = xor <4 x i32> , %c %masked_v2 = and <4 x i32> %inv_mask, %v2 @@ -673,56 +8266,145 @@ ; ============================================================================== ; 2 x i64 ; ============================================================================== -; CHECK-LABEL: add_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-VM-NOT: i64x2 -; SIMD128-NEXT: .functype add_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} + define <2 x i64> @add_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-UNIMPL-LABEL: add_v2i64: +; SIMD128-UNIMPL: .functype add_v2i64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-NEXT: i64x2.add $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: add_v2i64: +; NO-SIMD64-SLOW: .functype add_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.add $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: i64.add $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: add_v2i64: +; NO-SIMD64-FAST: .functype add_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.add $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: i64.add $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = add <2 x i64> %x, %y ret <2 x i64> %a } -; CHECK-LABEL: sub_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-VM-NOT: i64x2 -; SIMD128-NEXT: .functype sub_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @sub_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-UNIMPL-LABEL: sub_v2i64: +; SIMD128-UNIMPL: .functype sub_v2i64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-NEXT: i64x2.sub $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: sub_v2i64: +; NO-SIMD64-SLOW: .functype sub_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.sub $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: i64.sub $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: sub_v2i64: +; NO-SIMD64-FAST: .functype sub_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.sub $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: i64.sub $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = sub <2 x i64> %x, %y ret <2 x i64> %a } -; v2i64.mul is not in spec -; CHECK-LABEL: mul_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-VM-NOT: i64x2 -; SIMD128-NOT: i64x2.mul -; SIMD128: i64x2.extract_lane -; SIMD128: i64.mul define <2 x i64> @mul_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-UNIMPL-SLOW-LABEL: mul_v2i64: +; SIMD128-UNIMPL-SLOW: .functype mul_v2i64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push4=, $0, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push3=, $1, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i64.mul $push5=, $pop4, $pop3 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.splat $push6=, $pop5 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push1=, $0, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push0=, $1, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i64.mul $push2=, $pop1, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop7 +; +; SIMD128-UNIMPL-FAST-LABEL: mul_v2i64: +; SIMD128-UNIMPL-FAST: .functype mul_v2i64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push5=, $0, 0 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push4=, $1, 0 +; SIMD128-UNIMPL-FAST-NEXT: i64.mul $push6=, $pop5, $pop4 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.splat $push7=, $pop6 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push2=, $0, 1 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push1=, $1, 1 +; SIMD128-UNIMPL-FAST-NEXT: i64.mul $push3=, $pop2, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.replace_lane $push0=, $pop7, 1, $pop3 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: mul_v2i64: +; NO-SIMD64-SLOW: .functype mul_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.mul $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: i64.mul $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: mul_v2i64: +; NO-SIMD64-FAST: .functype mul_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.mul $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: i64.mul $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = mul <2 x i64> %x, %y ret <2 x i64> %a } -; CHECK-LABEL: neg_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype neg_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.neg $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @neg_v2i64(<2 x i64> %x) { +; SIMD128-UNIMPL-LABEL: neg_v2i64: +; SIMD128-UNIMPL: .functype neg_v2i64 (v128) -> (v128) +; SIMD128-UNIMPL-NEXT: i64x2.neg $push0=, $0 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: neg_v2i64: +; NO-SIMD64-SLOW: .functype neg_v2i64 (i32, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.const $push0=, 0 +; NO-SIMD64-SLOW-NEXT: i64.sub $push1=, $pop0, $2 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-SLOW-NEXT: i64.const $push3=, 0 +; NO-SIMD64-SLOW-NEXT: i64.sub $push2=, $pop3, $1 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop2 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: neg_v2i64: +; NO-SIMD64-FAST: .functype neg_v2i64 (i32, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.const $push0=, 0 +; NO-SIMD64-FAST-NEXT: i64.sub $push1=, $pop0, $1 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-FAST-NEXT: i64.const $push3=, 0 +; NO-SIMD64-FAST-NEXT: i64.sub $push2=, $pop3, $2 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop2 +; NO-SIMD64-FAST-NEXT: return %a = sub <2 x i64> , %x ret <2 x i64> %a } -; CHECK-LABEL: shl_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shl_v2i64 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shl_v2i64(<2 x i64> %v, i32 %x) { +; SIMD128-UNIMPL-LABEL: shl_v2i64: +; SIMD128-UNIMPL: .functype shl_v2i64 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i64x2.shl $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-LABEL: shl_v2i64: +; NO-SIMD64: .functype shl_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD64-NEXT: i64.extend_i32_u $push3=, $3 +; NO-SIMD64-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD64-NEXT: i64.shl $push0=, $2, $pop2 +; NO-SIMD64-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-NEXT: i64.shl $push1=, $1, $4 +; NO-SIMD64-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-NEXT: return %x2 = zext i32 %x to i64 %t = insertelement <2 x i64> undef, i64 %x2, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> @@ -730,12 +8412,21 @@ ret <2 x i64> %a } -; CHECK-LABEL: shl_sext_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shl_sext_v2i64 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shl_sext_v2i64(<2 x i64> %v, i32 %x) { +; SIMD128-UNIMPL-LABEL: shl_sext_v2i64: +; SIMD128-UNIMPL: .functype shl_sext_v2i64 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i64x2.shl $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-LABEL: shl_sext_v2i64: +; NO-SIMD64: .functype shl_sext_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD64-NEXT: i64.extend_i32_s $push3=, $3 +; NO-SIMD64-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD64-NEXT: i64.shl $push0=, $2, $pop2 +; NO-SIMD64-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-NEXT: i64.shl $push1=, $1, $4 +; NO-SIMD64-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-NEXT: return %x2 = sext i32 %x to i64 %t = insertelement <2 x i64> undef, i64 %x2, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> @@ -743,53 +8434,135 @@ ret <2 x i64> %a } -; CHECK-LABEL: shl_noext_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shl_noext_v2i64 (v128, i64) -> (v128){{$}} -; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}} -; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shl_noext_v2i64(<2 x i64> %v, i64 %x) { +; SIMD128-UNIMPL-SLOW-LABEL: shl_noext_v2i64: +; SIMD128-UNIMPL-SLOW: .functype shl_noext_v2i64 (v128, i64) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i32.wrap_i64 $push0=, $1 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.shl $push1=, $0, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop1 +; +; SIMD128-UNIMPL-FAST-LABEL: shl_noext_v2i64: +; SIMD128-UNIMPL-FAST: .functype shl_noext_v2i64 (v128, i64) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i32.wrap_i64 $push1=, $1 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.shl $push0=, $0, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: shl_noext_v2i64: +; NO-SIMD64-SLOW: .functype shl_noext_v2i64 (i32, i64, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.shl $push0=, $2, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: i64.shl $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: shl_noext_v2i64: +; NO-SIMD64-FAST: .functype shl_noext_v2i64 (i32, i64, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.shl $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: i64.shl $push1=, $2, $3 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %t = insertelement <2 x i64> undef, i64 %x, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> %a = shl <2 x i64> %v, %s ret <2 x i64> %a } -; CHECK-LABEL: shl_const_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shl_const_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}} -; SIMD128-NEXT: i64x2.shl $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shl_const_v2i64(<2 x i64> %v) { +; SIMD128-UNIMPL-SLOW-LABEL: shl_const_v2i64: +; SIMD128-UNIMPL-SLOW: .functype shl_const_v2i64 (v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i32.const $push0=, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.shl $push1=, $0, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop1 +; +; SIMD128-UNIMPL-FAST-LABEL: shl_const_v2i64: +; SIMD128-UNIMPL-FAST: .functype shl_const_v2i64 (v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i32.const $push1=, 5 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.shl $push0=, $0, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: shl_const_v2i64: +; NO-SIMD64-SLOW: .functype shl_const_v2i64 (i32, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.const $push0=, 5 +; NO-SIMD64-SLOW-NEXT: i64.shl $push1=, $2, $pop0 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-SLOW-NEXT: i64.const $push3=, 5 +; NO-SIMD64-SLOW-NEXT: i64.shl $push2=, $1, $pop3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop2 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: shl_const_v2i64: +; NO-SIMD64-FAST: .functype shl_const_v2i64 (i32, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.const $push0=, 5 +; NO-SIMD64-FAST-NEXT: i64.shl $push1=, $1, $pop0 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-FAST-NEXT: i64.const $push3=, 5 +; NO-SIMD64-FAST-NEXT: i64.shl $push2=, $2, $pop3 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop2 +; NO-SIMD64-FAST-NEXT: return %a = shl <2 x i64> %v, ret <2 x i64> %a } -; CHECK-LABEL: shl_vec_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shl_vec_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i64.shl $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}} -; SIMD128-NEXT: i64.shl $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shl_vec_v2i64(<2 x i64> %v, <2 x i64> %x) { +; SIMD128-UNIMPL-SLOW-LABEL: shl_vec_v2i64: +; SIMD128-UNIMPL-SLOW: .functype shl_vec_v2i64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push4=, $0, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push3=, $1, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i64.shl $push5=, $pop4, $pop3 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.splat $push6=, $pop5 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push1=, $0, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push0=, $1, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i64.shl $push2=, $pop1, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop7 +; +; SIMD128-UNIMPL-FAST-LABEL: shl_vec_v2i64: +; SIMD128-UNIMPL-FAST: .functype shl_vec_v2i64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push5=, $0, 0 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push4=, $1, 0 +; SIMD128-UNIMPL-FAST-NEXT: i64.shl $push6=, $pop5, $pop4 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.splat $push7=, $pop6 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push2=, $0, 1 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push1=, $1, 1 +; SIMD128-UNIMPL-FAST-NEXT: i64.shl $push3=, $pop2, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.replace_lane $push0=, $pop7, 1, $pop3 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: shl_vec_v2i64: +; NO-SIMD64-SLOW: .functype shl_vec_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.shl $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: i64.shl $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: shl_vec_v2i64: +; NO-SIMD64-FAST: .functype shl_vec_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.shl $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: i64.shl $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = shl <2 x i64> %v, %x ret <2 x i64> %a } -; CHECK-LABEL: shr_s_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_s_v2i64 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_s_v2i64(<2 x i64> %v, i32 %x) { +; SIMD128-UNIMPL-LABEL: shr_s_v2i64: +; SIMD128-UNIMPL: .functype shr_s_v2i64 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i64x2.shr_s $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-LABEL: shr_s_v2i64: +; NO-SIMD64: .functype shr_s_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD64-NEXT: i64.extend_i32_u $push3=, $3 +; NO-SIMD64-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD64-NEXT: i64.shr_s $push0=, $2, $pop2 +; NO-SIMD64-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-NEXT: i64.shr_s $push1=, $1, $4 +; NO-SIMD64-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-NEXT: return %x2 = zext i32 %x to i64 %t = insertelement <2 x i64> undef, i64 %x2, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> @@ -797,12 +8570,21 @@ ret <2 x i64> %a } -; CHECK-LABEL: shr_s_sext_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_s_sext_v2i64 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_s_sext_v2i64(<2 x i64> %v, i32 %x) { +; SIMD128-UNIMPL-LABEL: shr_s_sext_v2i64: +; SIMD128-UNIMPL: .functype shr_s_sext_v2i64 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i64x2.shr_s $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-LABEL: shr_s_sext_v2i64: +; NO-SIMD64: .functype shr_s_sext_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD64-NEXT: i64.extend_i32_s $push3=, $3 +; NO-SIMD64-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD64-NEXT: i64.shr_s $push0=, $2, $pop2 +; NO-SIMD64-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-NEXT: i64.shr_s $push1=, $1, $4 +; NO-SIMD64-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-NEXT: return %x2 = sext i32 %x to i64 %t = insertelement <2 x i64> undef, i64 %x2, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> @@ -810,53 +8592,135 @@ ret <2 x i64> %a } -; CHECK-LABEL: shr_s_noext_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_s_noext_v2i64 (v128, i64) -> (v128){{$}} -; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}} -; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_s_noext_v2i64(<2 x i64> %v, i64 %x) { +; SIMD128-UNIMPL-SLOW-LABEL: shr_s_noext_v2i64: +; SIMD128-UNIMPL-SLOW: .functype shr_s_noext_v2i64 (v128, i64) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i32.wrap_i64 $push0=, $1 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.shr_s $push1=, $0, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop1 +; +; SIMD128-UNIMPL-FAST-LABEL: shr_s_noext_v2i64: +; SIMD128-UNIMPL-FAST: .functype shr_s_noext_v2i64 (v128, i64) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i32.wrap_i64 $push1=, $1 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.shr_s $push0=, $0, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: shr_s_noext_v2i64: +; NO-SIMD64-SLOW: .functype shr_s_noext_v2i64 (i32, i64, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.shr_s $push0=, $2, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: i64.shr_s $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: shr_s_noext_v2i64: +; NO-SIMD64-FAST: .functype shr_s_noext_v2i64 (i32, i64, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.shr_s $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: i64.shr_s $push1=, $2, $3 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %t = insertelement <2 x i64> undef, i64 %x, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> %a = ashr <2 x i64> %v, %s ret <2 x i64> %a } -; CHECK-LABEL: shr_s_const_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_s_const_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}} -; SIMD128-NEXT: i64x2.shr_s $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_s_const_v2i64(<2 x i64> %v) { +; SIMD128-UNIMPL-SLOW-LABEL: shr_s_const_v2i64: +; SIMD128-UNIMPL-SLOW: .functype shr_s_const_v2i64 (v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i32.const $push0=, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.shr_s $push1=, $0, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop1 +; +; SIMD128-UNIMPL-FAST-LABEL: shr_s_const_v2i64: +; SIMD128-UNIMPL-FAST: .functype shr_s_const_v2i64 (v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i32.const $push1=, 5 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.shr_s $push0=, $0, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: shr_s_const_v2i64: +; NO-SIMD64-SLOW: .functype shr_s_const_v2i64 (i32, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.const $push0=, 5 +; NO-SIMD64-SLOW-NEXT: i64.shr_s $push1=, $2, $pop0 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-SLOW-NEXT: i64.const $push3=, 5 +; NO-SIMD64-SLOW-NEXT: i64.shr_s $push2=, $1, $pop3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop2 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: shr_s_const_v2i64: +; NO-SIMD64-FAST: .functype shr_s_const_v2i64 (i32, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.const $push0=, 5 +; NO-SIMD64-FAST-NEXT: i64.shr_s $push1=, $1, $pop0 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-FAST-NEXT: i64.const $push3=, 5 +; NO-SIMD64-FAST-NEXT: i64.shr_s $push2=, $2, $pop3 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop2 +; NO-SIMD64-FAST-NEXT: return %a = ashr <2 x i64> %v, ret <2 x i64> %a } -; CHECK-LABEL: shr_s_vec_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_s_vec_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i64.shr_s $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}} -; SIMD128-NEXT: i64.shr_s $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_s_vec_v2i64(<2 x i64> %v, <2 x i64> %x) { +; SIMD128-UNIMPL-SLOW-LABEL: shr_s_vec_v2i64: +; SIMD128-UNIMPL-SLOW: .functype shr_s_vec_v2i64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push4=, $0, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push3=, $1, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i64.shr_s $push5=, $pop4, $pop3 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.splat $push6=, $pop5 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push1=, $0, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push0=, $1, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i64.shr_s $push2=, $pop1, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop7 +; +; SIMD128-UNIMPL-FAST-LABEL: shr_s_vec_v2i64: +; SIMD128-UNIMPL-FAST: .functype shr_s_vec_v2i64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push5=, $0, 0 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push4=, $1, 0 +; SIMD128-UNIMPL-FAST-NEXT: i64.shr_s $push6=, $pop5, $pop4 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.splat $push7=, $pop6 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push2=, $0, 1 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push1=, $1, 1 +; SIMD128-UNIMPL-FAST-NEXT: i64.shr_s $push3=, $pop2, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.replace_lane $push0=, $pop7, 1, $pop3 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: shr_s_vec_v2i64: +; NO-SIMD64-SLOW: .functype shr_s_vec_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.shr_s $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: i64.shr_s $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: shr_s_vec_v2i64: +; NO-SIMD64-FAST: .functype shr_s_vec_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.shr_s $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: i64.shr_s $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = ashr <2 x i64> %v, %x ret <2 x i64> %a } -; CHECK-LABEL: shr_u_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_u_v2i64 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_u_v2i64(<2 x i64> %v, i32 %x) { +; SIMD128-UNIMPL-LABEL: shr_u_v2i64: +; SIMD128-UNIMPL: .functype shr_u_v2i64 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i64x2.shr_u $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-LABEL: shr_u_v2i64: +; NO-SIMD64: .functype shr_u_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD64-NEXT: i64.extend_i32_u $push3=, $3 +; NO-SIMD64-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD64-NEXT: i64.shr_u $push0=, $2, $pop2 +; NO-SIMD64-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-NEXT: i64.shr_u $push1=, $1, $4 +; NO-SIMD64-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-NEXT: return %x2 = zext i32 %x to i64 %t = insertelement <2 x i64> undef, i64 %x2, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> @@ -864,12 +8728,21 @@ ret <2 x i64> %a } -; CHECK-LABEL: shr_u_sext_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_u_sext_v2i64 (v128, i32) -> (v128){{$}} -; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_u_sext_v2i64(<2 x i64> %v, i32 %x) { +; SIMD128-UNIMPL-LABEL: shr_u_sext_v2i64: +; SIMD128-UNIMPL: .functype shr_u_sext_v2i64 (v128, i32) -> (v128) +; SIMD128-UNIMPL-NEXT: i64x2.shr_u $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-LABEL: shr_u_sext_v2i64: +; NO-SIMD64: .functype shr_u_sext_v2i64 (i32, i64, i64, i32) -> () +; NO-SIMD64-NEXT: i64.extend_i32_s $push3=, $3 +; NO-SIMD64-NEXT: local.tee $push2=, $4=, $pop3 +; NO-SIMD64-NEXT: i64.shr_u $push0=, $2, $pop2 +; NO-SIMD64-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-NEXT: i64.shr_u $push1=, $1, $4 +; NO-SIMD64-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-NEXT: return %x2 = sext i32 %x to i64 %t = insertelement <2 x i64> undef, i64 %x2, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> @@ -877,103 +8750,269 @@ ret <2 x i64> %a } -; CHECK-LABEL: shr_u_noext_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_u_noext_v2i64 (v128, i64) -> (v128){{$}} -; SIMD128-NEXT: i32.wrap_i64 $push[[L0:[0-9]+]]=, $1{{$}} -; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_u_noext_v2i64(<2 x i64> %v, i64 %x) { +; SIMD128-UNIMPL-SLOW-LABEL: shr_u_noext_v2i64: +; SIMD128-UNIMPL-SLOW: .functype shr_u_noext_v2i64 (v128, i64) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i32.wrap_i64 $push0=, $1 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.shr_u $push1=, $0, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop1 +; +; SIMD128-UNIMPL-FAST-LABEL: shr_u_noext_v2i64: +; SIMD128-UNIMPL-FAST: .functype shr_u_noext_v2i64 (v128, i64) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i32.wrap_i64 $push1=, $1 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.shr_u $push0=, $0, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: shr_u_noext_v2i64: +; NO-SIMD64-SLOW: .functype shr_u_noext_v2i64 (i32, i64, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.shr_u $push0=, $2, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: i64.shr_u $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: shr_u_noext_v2i64: +; NO-SIMD64-FAST: .functype shr_u_noext_v2i64 (i32, i64, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.shr_u $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: i64.shr_u $push1=, $2, $3 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %t = insertelement <2 x i64> undef, i64 %x, i32 0 %s = shufflevector <2 x i64> %t, <2 x i64> undef, <2 x i32> %a = lshr <2 x i64> %v, %s ret <2 x i64> %a } -; CHECK-LABEL: shr_u_const_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_u_const_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: i32.const $push[[L0:[0-9]+]]=, 5{{$}} -; SIMD128-NEXT: i64x2.shr_u $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_u_const_v2i64(<2 x i64> %v) { +; SIMD128-UNIMPL-SLOW-LABEL: shr_u_const_v2i64: +; SIMD128-UNIMPL-SLOW: .functype shr_u_const_v2i64 (v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i32.const $push0=, 5 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.shr_u $push1=, $0, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop1 +; +; SIMD128-UNIMPL-FAST-LABEL: shr_u_const_v2i64: +; SIMD128-UNIMPL-FAST: .functype shr_u_const_v2i64 (v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i32.const $push1=, 5 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.shr_u $push0=, $0, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: shr_u_const_v2i64: +; NO-SIMD64-SLOW: .functype shr_u_const_v2i64 (i32, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.const $push0=, 5 +; NO-SIMD64-SLOW-NEXT: i64.shr_u $push1=, $2, $pop0 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-SLOW-NEXT: i64.const $push3=, 5 +; NO-SIMD64-SLOW-NEXT: i64.shr_u $push2=, $1, $pop3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop2 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: shr_u_const_v2i64: +; NO-SIMD64-FAST: .functype shr_u_const_v2i64 (i32, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.const $push0=, 5 +; NO-SIMD64-FAST-NEXT: i64.shr_u $push1=, $1, $pop0 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-FAST-NEXT: i64.const $push3=, 5 +; NO-SIMD64-FAST-NEXT: i64.shr_u $push2=, $2, $pop3 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop2 +; NO-SIMD64-FAST-NEXT: return %a = lshr <2 x i64> %v, ret <2 x i64> %a } -; CHECK-LABEL: shr_u_vec_v2i64: -; NO-SIMD128-NOT: i64x2 -; SIMD128-NEXT: .functype shr_u_vec_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L0:[0-9]+]]=, $0, 0{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L1:[0-9]+]]=, $1, 0{{$}} -; SIMD128-NEXT: i64.shr_u $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; SIMD128-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L4:[0-9]+]]=, $0, 1{{$}} -; SIMD128-NEXT: i64x2.extract_lane $push[[L5:[0-9]+]]=, $1, 1{{$}} -; SIMD128-NEXT: i64.shr_u $push[[L6:[0-9]+]]=, $pop[[L4]], $pop[[L5]]{{$}} -; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $pop[[L3]], 1, $pop[[L6]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @shr_u_vec_v2i64(<2 x i64> %v, <2 x i64> %x) { +; SIMD128-UNIMPL-SLOW-LABEL: shr_u_vec_v2i64: +; SIMD128-UNIMPL-SLOW: .functype shr_u_vec_v2i64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push4=, $0, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push3=, $1, 0 +; SIMD128-UNIMPL-SLOW-NEXT: i64.shr_u $push5=, $pop4, $pop3 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.splat $push6=, $pop5 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push1=, $0, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.extract_lane $push0=, $1, 1 +; SIMD128-UNIMPL-SLOW-NEXT: i64.shr_u $push2=, $pop1, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: i64x2.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop7 +; +; SIMD128-UNIMPL-FAST-LABEL: shr_u_vec_v2i64: +; SIMD128-UNIMPL-FAST: .functype shr_u_vec_v2i64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push5=, $0, 0 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push4=, $1, 0 +; SIMD128-UNIMPL-FAST-NEXT: i64.shr_u $push6=, $pop5, $pop4 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.splat $push7=, $pop6 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push2=, $0, 1 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.extract_lane $push1=, $1, 1 +; SIMD128-UNIMPL-FAST-NEXT: i64.shr_u $push3=, $pop2, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: i64x2.replace_lane $push0=, $pop7, 1, $pop3 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: shr_u_vec_v2i64: +; NO-SIMD64-SLOW: .functype shr_u_vec_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.shr_u $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: i64.shr_u $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: shr_u_vec_v2i64: +; NO-SIMD64-FAST: .functype shr_u_vec_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.shr_u $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: i64.shr_u $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = lshr <2 x i64> %v, %x ret <2 x i64> %a } -; CHECK-LABEL: and_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-VM-NOT: v128 -; SIMD128-NEXT: .functype and_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @and_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-UNIMPL-LABEL: and_v2i64: +; SIMD128-UNIMPL: .functype and_v2i64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-NEXT: v128.and $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: and_v2i64: +; NO-SIMD64-SLOW: .functype and_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.and $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: i64.and $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: and_v2i64: +; NO-SIMD64-FAST: .functype and_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.and $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: i64.and $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = and <2 x i64> %x, %y ret <2 x i64> %a } -; CHECK-LABEL: or_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-VM-NOT: v128 -; SIMD128-NEXT: .functype or_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @or_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-UNIMPL-LABEL: or_v2i64: +; SIMD128-UNIMPL: .functype or_v2i64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-NEXT: v128.or $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: or_v2i64: +; NO-SIMD64-SLOW: .functype or_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.or $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: i64.or $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: or_v2i64: +; NO-SIMD64-FAST: .functype or_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.or $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: i64.or $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = or <2 x i64> %x, %y ret <2 x i64> %a } -; CHECK-LABEL: xor_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-VM-NOT: v128 -; SIMD128-NEXT: .functype xor_v2i64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: v128.xor $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @xor_v2i64(<2 x i64> %x, <2 x i64> %y) { +; SIMD128-UNIMPL-LABEL: xor_v2i64: +; SIMD128-UNIMPL: .functype xor_v2i64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-NEXT: v128.xor $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: xor_v2i64: +; NO-SIMD64-SLOW: .functype xor_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.xor $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: i64.xor $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: xor_v2i64: +; NO-SIMD64-FAST: .functype xor_v2i64 (i32, i64, i64, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.xor $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: i64.xor $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = xor <2 x i64> %x, %y ret <2 x i64> %a } -; CHECK-LABEL: not_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-VM-NOT: v128 -; SIMD128-NEXT: .functype not_v2i64 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.not $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x i64> @not_v2i64(<2 x i64> %x) { +; SIMD128-UNIMPL-LABEL: not_v2i64: +; SIMD128-UNIMPL: .functype not_v2i64 (v128) -> (v128) +; SIMD128-UNIMPL-NEXT: v128.not $push0=, $0 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: not_v2i64: +; NO-SIMD64-SLOW: .functype not_v2i64 (i32, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.const $push0=, -1 +; NO-SIMD64-SLOW-NEXT: i64.xor $push1=, $2, $pop0 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop1 +; NO-SIMD64-SLOW-NEXT: i64.const $push3=, -1 +; NO-SIMD64-SLOW-NEXT: i64.xor $push2=, $1, $pop3 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop2 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: not_v2i64: +; NO-SIMD64-FAST: .functype not_v2i64 (i32, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.const $push0=, -1 +; NO-SIMD64-FAST-NEXT: i64.xor $push1=, $1, $pop0 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-FAST-NEXT: i64.const $push3=, -1 +; NO-SIMD64-FAST-NEXT: i64.xor $push2=, $2, $pop3 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop2 +; NO-SIMD64-FAST-NEXT: return %a = xor <2 x i64> %x, ret <2 x i64> %a } -; CHECK-LABEL: bitselect_v2i64: -; NO-SIMD128-NOT: v128 -; SIMD128-VM-NOT: v128 -; SIMD128-NEXT: .functype bitselect_v2i64 (v128, v128, v128) -> (v128){{$}} -; SIMD128-SLOW-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $0{{$}} -; SIMD128-SLOW-NEXT: return $pop[[R]]{{$}} -; SIMD128-FAST-NEXT: v128.not -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.and -; SIMD128-FAST-NEXT: v128.or -; SIMD128-FAST-NEXT: return define <2 x i64> @bitselect_v2i64(<2 x i64> %c, <2 x i64> %v1, <2 x i64> %v2) { +; SIMD128-UNIMPL-SLOW-LABEL: bitselect_v2i64: +; SIMD128-UNIMPL-SLOW: .functype bitselect_v2i64 (v128, v128, v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: v128.bitselect $push0=, $1, $2, $0 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop0 +; +; SIMD128-UNIMPL-FAST-LABEL: bitselect_v2i64: +; SIMD128-UNIMPL-FAST: .functype bitselect_v2i64 (v128, v128, v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: v128.not $push2=, $0 +; SIMD128-UNIMPL-FAST-NEXT: v128.and $push3=, $2, $pop2 +; SIMD128-UNIMPL-FAST-NEXT: v128.and $push0=, $1, $0 +; SIMD128-UNIMPL-FAST-NEXT: v128.or $push1=, $pop3, $pop0 +; SIMD128-UNIMPL-FAST-NEXT: return $pop1 +; +; NO-SIMD64-SLOW-LABEL: bitselect_v2i64: +; NO-SIMD64-SLOW: .functype bitselect_v2i64 (i32, i64, i64, i64, i64, i64, i64) -> () +; NO-SIMD64-SLOW-NEXT: i64.const $push1=, -1 +; NO-SIMD64-SLOW-NEXT: i64.xor $push2=, $2, $pop1 +; NO-SIMD64-SLOW-NEXT: i64.and $push3=, $6, $pop2 +; NO-SIMD64-SLOW-NEXT: i64.and $push0=, $4, $2 +; NO-SIMD64-SLOW-NEXT: i64.or $push4=, $pop3, $pop0 +; NO-SIMD64-SLOW-NEXT: i64.store 8($0), $pop4 +; NO-SIMD64-SLOW-NEXT: i64.const $push9=, -1 +; NO-SIMD64-SLOW-NEXT: i64.xor $push6=, $1, $pop9 +; NO-SIMD64-SLOW-NEXT: i64.and $push7=, $5, $pop6 +; NO-SIMD64-SLOW-NEXT: i64.and $push5=, $3, $1 +; NO-SIMD64-SLOW-NEXT: i64.or $push8=, $pop7, $pop5 +; NO-SIMD64-SLOW-NEXT: i64.store 0($0), $pop8 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: bitselect_v2i64: +; NO-SIMD64-FAST: .functype bitselect_v2i64 (i32, i64, i64, i64, i64, i64, i64) -> () +; NO-SIMD64-FAST-NEXT: i64.const $push1=, -1 +; NO-SIMD64-FAST-NEXT: i64.xor $push2=, $1, $pop1 +; NO-SIMD64-FAST-NEXT: i64.and $push3=, $5, $pop2 +; NO-SIMD64-FAST-NEXT: i64.and $push0=, $3, $1 +; NO-SIMD64-FAST-NEXT: i64.or $push4=, $pop3, $pop0 +; NO-SIMD64-FAST-NEXT: i64.store 0($0), $pop4 +; NO-SIMD64-FAST-NEXT: i64.const $push9=, -1 +; NO-SIMD64-FAST-NEXT: i64.xor $push6=, $2, $pop9 +; NO-SIMD64-FAST-NEXT: i64.and $push7=, $6, $pop6 +; NO-SIMD64-FAST-NEXT: i64.and $push5=, $4, $2 +; NO-SIMD64-FAST-NEXT: i64.or $push8=, $pop7, $pop5 +; NO-SIMD64-FAST-NEXT: i64.store 8($0), $pop8 +; NO-SIMD64-FAST-NEXT: return %masked_v1 = and <2 x i64> %v1, %c %inv_mask = xor <2 x i64> , %c %masked_v2 = and <2 x i64> %v2, %inv_mask @@ -984,135 +9023,479 @@ ; ============================================================================== ; 4 x float ; ============================================================================== -; CHECK-LABEL: neg_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype neg_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.neg $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} + +; nsz makes this semantically equivalent to flipping sign bit define <4 x float> @neg_v4f32(<4 x float> %x) { - ; nsz makes this semantically equivalent to flipping sign bit +; SIMD128-LABEL: neg_v4f32: +; SIMD128: .functype neg_v4f32 (v128) -> (v128) +; SIMD128-NEXT: f32x4.neg $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: neg_v4f32: +; NO-SIMD128-SLOW: .functype neg_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-SLOW-NEXT: f32.neg $push2=, $4 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop1), $pop2 +; NO-SIMD128-SLOW-NEXT: f32.neg $push3=, $3 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: f32.neg $push4=, $2 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.neg $push5=, $1 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: neg_v4f32: +; NO-SIMD128-FAST: .functype neg_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.neg $push0=, $1 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.neg $push1=, $2 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.neg $push2=, $3 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.neg $push5=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = fsub nsz <4 x float> , %x ret <4 x float> %a } -; CHECK-LABEL: abs_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype abs_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.abs $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone define <4 x float> @abs_v4f32(<4 x float> %x) { +; SIMD128-LABEL: abs_v4f32: +; SIMD128: .functype abs_v4f32 (v128) -> (v128) +; SIMD128-NEXT: f32x4.abs $push0=, $0 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: abs_v4f32: +; NO-SIMD128-SLOW: .functype abs_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-SLOW-NEXT: f32.abs $push2=, $4 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop1), $pop2 +; NO-SIMD128-SLOW-NEXT: f32.abs $push3=, $3 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: f32.abs $push4=, $2 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.abs $push5=, $1 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: abs_v4f32: +; NO-SIMD128-FAST: .functype abs_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.abs $push0=, $1 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.abs $push1=, $2 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.abs $push2=, $3 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.abs $push5=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) ret <4 x float> %a } -; CHECK-LABEL: min_unordered_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype min_unordered_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2 -; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]] -; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @min_unordered_v4f32(<4 x float> %x) { +; SIMD128-SLOW-LABEL: min_unordered_v4f32: +; SIMD128-SLOW: .functype min_unordered_v4f32 (v128) -> (v128) +; SIMD128-SLOW-NEXT: f32.const $push0=, 0x1.4p2 +; SIMD128-SLOW-NEXT: f32x4.splat $push1=, $pop0 +; SIMD128-SLOW-NEXT: f32x4.min $push2=, $0, $pop1 +; SIMD128-SLOW-NEXT: return $pop2 +; +; SIMD128-FAST-LABEL: min_unordered_v4f32: +; SIMD128-FAST: .functype min_unordered_v4f32 (v128) -> (v128) +; SIMD128-FAST-NEXT: f32.const $push1=, 0x1.4p2 +; SIMD128-FAST-NEXT: f32x4.splat $push2=, $pop1 +; SIMD128-FAST-NEXT: f32x4.min $push0=, $0, $pop2 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: min_unordered_v4f32: +; NO-SIMD128-SLOW: .functype min_unordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-SLOW-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.min $push1=, $4, $pop0 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop3), $pop1 +; NO-SIMD128-SLOW-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.min $push4=, $3, $pop9 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.min $push5=, $2, $pop8 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop5 +; NO-SIMD128-SLOW-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.min $push6=, $1, $pop7 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop6 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_unordered_v4f32: +; NO-SIMD128-FAST: .functype min_unordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push6=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ule <4 x float> %x, %a = select <4 x i1> %cmps, <4 x float> %x, <4 x float> ret <4 x float> %a } -; CHECK-LABEL: max_unordered_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype max_unordered_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2 -; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]] -; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @max_unordered_v4f32(<4 x float> %x) { +; SIMD128-SLOW-LABEL: max_unordered_v4f32: +; SIMD128-SLOW: .functype max_unordered_v4f32 (v128) -> (v128) +; SIMD128-SLOW-NEXT: f32.const $push0=, 0x1.4p2 +; SIMD128-SLOW-NEXT: f32x4.splat $push1=, $pop0 +; SIMD128-SLOW-NEXT: f32x4.max $push2=, $0, $pop1 +; SIMD128-SLOW-NEXT: return $pop2 +; +; SIMD128-FAST-LABEL: max_unordered_v4f32: +; SIMD128-FAST: .functype max_unordered_v4f32 (v128) -> (v128) +; SIMD128-FAST-NEXT: f32.const $push1=, 0x1.4p2 +; SIMD128-FAST-NEXT: f32x4.splat $push2=, $pop1 +; SIMD128-FAST-NEXT: f32x4.max $push0=, $0, $pop2 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: max_unordered_v4f32: +; NO-SIMD128-SLOW: .functype max_unordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-SLOW-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.max $push1=, $4, $pop0 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop3), $pop1 +; NO-SIMD128-SLOW-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.max $push4=, $3, $pop9 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.max $push5=, $2, $pop8 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop5 +; NO-SIMD128-SLOW-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.max $push6=, $1, $pop7 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop6 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_unordered_v4f32: +; NO-SIMD128-FAST: .functype max_unordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push6=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return %cmps = fcmp uge <4 x float> %x, %a = select <4 x i1> %cmps, <4 x float> %x, <4 x float> ret <4 x float> %a } -; CHECK-LABEL: min_ordered_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype min_ordered_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2 -; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]] -; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @min_ordered_v4f32(<4 x float> %x) { +; SIMD128-SLOW-LABEL: min_ordered_v4f32: +; SIMD128-SLOW: .functype min_ordered_v4f32 (v128) -> (v128) +; SIMD128-SLOW-NEXT: f32.const $push0=, 0x1.4p2 +; SIMD128-SLOW-NEXT: f32x4.splat $push1=, $pop0 +; SIMD128-SLOW-NEXT: f32x4.min $push2=, $0, $pop1 +; SIMD128-SLOW-NEXT: return $pop2 +; +; SIMD128-FAST-LABEL: min_ordered_v4f32: +; SIMD128-FAST: .functype min_ordered_v4f32 (v128) -> (v128) +; SIMD128-FAST-NEXT: f32.const $push1=, 0x1.4p2 +; SIMD128-FAST-NEXT: f32x4.splat $push2=, $pop1 +; SIMD128-FAST-NEXT: f32x4.min $push0=, $0, $pop2 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: min_ordered_v4f32: +; NO-SIMD128-SLOW: .functype min_ordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-SLOW-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.min $push1=, $4, $pop0 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop3), $pop1 +; NO-SIMD128-SLOW-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.min $push4=, $3, $pop9 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.min $push5=, $2, $pop8 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop5 +; NO-SIMD128-SLOW-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.min $push6=, $1, $pop7 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop6 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_ordered_v4f32: +; NO-SIMD128-FAST: .functype min_ordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.min $push6=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ole <4 x float> , %x %a = select <4 x i1> %cmps, <4 x float> , <4 x float> %x ret <4 x float> %a } -; CHECK-LABEL: max_ordered_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype max_ordered_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2 -; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]] -; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @max_ordered_v4f32(<4 x float> %x) { +; SIMD128-SLOW-LABEL: max_ordered_v4f32: +; SIMD128-SLOW: .functype max_ordered_v4f32 (v128) -> (v128) +; SIMD128-SLOW-NEXT: f32.const $push0=, 0x1.4p2 +; SIMD128-SLOW-NEXT: f32x4.splat $push1=, $pop0 +; SIMD128-SLOW-NEXT: f32x4.max $push2=, $0, $pop1 +; SIMD128-SLOW-NEXT: return $pop2 +; +; SIMD128-FAST-LABEL: max_ordered_v4f32: +; SIMD128-FAST: .functype max_ordered_v4f32 (v128) -> (v128) +; SIMD128-FAST-NEXT: f32.const $push1=, 0x1.4p2 +; SIMD128-FAST-NEXT: f32x4.splat $push2=, $pop1 +; SIMD128-FAST-NEXT: f32x4.max $push0=, $0, $pop2 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: max_ordered_v4f32: +; NO-SIMD128-SLOW: .functype max_ordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push2=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push3=, $0, $pop2 +; NO-SIMD128-SLOW-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.max $push1=, $4, $pop0 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop3), $pop1 +; NO-SIMD128-SLOW-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.max $push4=, $3, $pop9 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.max $push5=, $2, $pop8 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop5 +; NO-SIMD128-SLOW-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-SLOW-NEXT: f32.max $push6=, $1, $pop7 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop6 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_ordered_v4f32: +; NO-SIMD128-FAST: .functype max_ordered_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push1=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.max $push6=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: return %cmps = fcmp oge <4 x float> , %x %a = select <4 x i1> %cmps, <4 x float> , <4 x float> %x ret <4 x float> %a } -; CHECK-LABEL: min_intrinsic_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype min_intrinsic_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>) define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: min_intrinsic_v4f32: +; SIMD128: .functype min_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: f32x4.min $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: min_intrinsic_v4f32: +; NO-SIMD128-SLOW: .functype min_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: f32.min $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: f32.min $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: f32.min $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.min $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: min_intrinsic_v4f32: +; NO-SIMD128-FAST: .functype min_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.min $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.min $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.min $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.min $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a } -; CHECK-LABEL: minnum_intrinsic_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype minnum_intrinsic_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: minnum_intrinsic_v4f32: +; SIMD128: .functype minnum_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: f32x4.min $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: minnum_intrinsic_v4f32: +; NO-SIMD128-SLOW: .functype minnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: f32.call $push0=, fminf, $4, $8 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: f32.call $push3=, fminf, $3, $7 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: f32.call $push4=, fminf, $2, $6 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.call $push5=, fminf, $1, $5 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: minnum_intrinsic_v4f32: +; NO-SIMD128-FAST: .functype minnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.call $push0=, fminf, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.call $push1=, fminf, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.call $push2=, fminf, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.call $push5=, fminf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a } -; CHECK-LABEL: max_intrinsic_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype max_intrinsic_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>) define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: max_intrinsic_v4f32: +; SIMD128: .functype max_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: f32x4.max $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: max_intrinsic_v4f32: +; NO-SIMD128-SLOW: .functype max_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: f32.max $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: f32.max $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: f32.max $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.max $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: max_intrinsic_v4f32: +; NO-SIMD128-FAST: .functype max_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.max $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.max $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.max $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.max $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a } -; CHECK-LABEL: maxnum_intrinsic_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype maxnum_intrinsic_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: maxnum_intrinsic_v4f32: +; SIMD128: .functype maxnum_intrinsic_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: f32x4.max $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: maxnum_intrinsic_v4f32: +; NO-SIMD128-SLOW: .functype maxnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: f32.call $push0=, fmaxf, $4, $8 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: f32.call $push3=, fmaxf, $3, $7 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: f32.call $push4=, fmaxf, $2, $6 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.call $push5=, fmaxf, $1, $5 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: maxnum_intrinsic_v4f32: +; NO-SIMD128-FAST: .functype maxnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.call $push0=, fmaxf, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.call $push1=, fmaxf, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.call $push2=, fmaxf, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.call $push5=, fmaxf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a } -; CHECK-LABEL: min_const_intrinsic_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype min_const_intrinsic_v4f32 () -> (v128){{$}} -; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}} -; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @min_const_intrinsic_v4f32() { +; SIMD128-SLOW-LABEL: min_const_intrinsic_v4f32: +; SIMD128-SLOW: .functype min_const_intrinsic_v4f32 () -> (v128) +; SIMD128-SLOW-NEXT: f32.const $push0=, 0x1.4p2 +; SIMD128-SLOW-NEXT: f32x4.splat $push1=, $pop0 +; SIMD128-SLOW-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: min_const_intrinsic_v4f32: +; SIMD128-FAST: .functype min_const_intrinsic_v4f32 () -> (v128) +; SIMD128-FAST-NEXT: f32.const $push1=, 0x1.4p2 +; SIMD128-FAST-NEXT: f32x4.splat $push0=, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: min_const_intrinsic_v4f32: +; NO-SIMD128: .functype min_const_intrinsic_v4f32 (i32) -> () +; NO-SIMD128-NEXT: i64.const $push0=, 4656722015785320448 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 4656722015785320448 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return %a = call <4 x float> @llvm.minimum.v4f32( <4 x float> , <4 x float> @@ -1120,13 +9503,26 @@ ret <4 x float> %a } -; CHECK-LABEL: max_const_intrinsic_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype max_const_intrinsic_v4f32 () -> (v128){{$}} -; SIMD128-NEXT: f32.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}} -; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @max_const_intrinsic_v4f32() { +; SIMD128-SLOW-LABEL: max_const_intrinsic_v4f32: +; SIMD128-SLOW: .functype max_const_intrinsic_v4f32 () -> (v128) +; SIMD128-SLOW-NEXT: f32.const $push0=, 0x1.5p5 +; SIMD128-SLOW-NEXT: f32x4.splat $push1=, $pop0 +; SIMD128-SLOW-NEXT: return $pop1 +; +; SIMD128-FAST-LABEL: max_const_intrinsic_v4f32: +; SIMD128-FAST: .functype max_const_intrinsic_v4f32 () -> (v128) +; SIMD128-FAST-NEXT: f32.const $push1=, 0x1.5p5 +; SIMD128-FAST-NEXT: f32x4.splat $push0=, $pop1 +; SIMD128-FAST-NEXT: return $pop0 +; +; NO-SIMD128-LABEL: max_const_intrinsic_v4f32: +; NO-SIMD128: .functype max_const_intrinsic_v4f32 (i32) -> () +; NO-SIMD128-NEXT: i64.const $push0=, 4767060206681587712 +; NO-SIMD128-NEXT: i64.store 8($0), $pop0 +; NO-SIMD128-NEXT: i64.const $push1=, 4767060206681587712 +; NO-SIMD128-NEXT: i64.store 0($0), $pop1 +; NO-SIMD128-NEXT: return %a = call <4 x float> @llvm.maximum.v4f32( <4 x float> , <4 x float> @@ -1134,55 +9530,260 @@ ret <4 x float> %a } -; CHECK-LABEL: add_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype add_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.add $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: add_v4f32: +; SIMD128: .functype add_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: f32x4.add $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: add_v4f32: +; NO-SIMD128-SLOW: .functype add_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: f32.add $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: f32.add $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: f32.add $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.add $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: add_v4f32: +; NO-SIMD128-FAST: .functype add_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.add $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.add $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.add $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.add $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = fadd <4 x float> %x, %y ret <4 x float> %a } -; CHECK-LABEL: sub_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype sub_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.sub $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: sub_v4f32: +; SIMD128: .functype sub_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: f32x4.sub $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: sub_v4f32: +; NO-SIMD128-SLOW: .functype sub_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: f32.sub $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: f32.sub $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: f32.sub $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.sub $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: sub_v4f32: +; NO-SIMD128-FAST: .functype sub_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.sub $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.sub $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.sub $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.sub $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = fsub <4 x float> %x, %y ret <4 x float> %a } -; CHECK-LABEL: div_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-VM-NOT: f32x4.div -; SIMD128-NEXT: .functype div_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.div $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-UNIMPL-LABEL: div_v4f32: +; SIMD128-UNIMPL: .functype div_v4f32 (v128, v128) -> (v128) +; SIMD128-UNIMPL-NEXT: f32x4.div $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: div_v4f32: +; SIMD128-VM-SLOW: .functype div_v4f32 (v128, v128) -> (v128) +; SIMD128-VM-SLOW-NEXT: f32x4.extract_lane $push4=, $0, 0 +; SIMD128-VM-SLOW-NEXT: f32x4.extract_lane $push3=, $1, 0 +; SIMD128-VM-SLOW-NEXT: f32.div $push5=, $pop4, $pop3 +; SIMD128-VM-SLOW-NEXT: f32x4.splat $push6=, $pop5 +; SIMD128-VM-SLOW-NEXT: f32x4.extract_lane $push1=, $0, 1 +; SIMD128-VM-SLOW-NEXT: f32x4.extract_lane $push0=, $1, 1 +; SIMD128-VM-SLOW-NEXT: f32.div $push2=, $pop1, $pop0 +; SIMD128-VM-SLOW-NEXT: f32x4.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-VM-SLOW-NEXT: f32x4.extract_lane $push9=, $0, 2 +; SIMD128-VM-SLOW-NEXT: f32x4.extract_lane $push8=, $1, 2 +; SIMD128-VM-SLOW-NEXT: f32.div $push10=, $pop9, $pop8 +; SIMD128-VM-SLOW-NEXT: f32x4.replace_lane $push11=, $pop7, 2, $pop10 +; SIMD128-VM-SLOW-NEXT: f32x4.extract_lane $push13=, $0, 3 +; SIMD128-VM-SLOW-NEXT: f32x4.extract_lane $push12=, $1, 3 +; SIMD128-VM-SLOW-NEXT: f32.div $push14=, $pop13, $pop12 +; SIMD128-VM-SLOW-NEXT: f32x4.replace_lane $push15=, $pop11, 3, $pop14 +; SIMD128-VM-SLOW-NEXT: return $pop15 +; +; SIMD128-VM-FAST-LABEL: div_v4f32: +; SIMD128-VM-FAST: .functype div_v4f32 (v128, v128) -> (v128) +; SIMD128-VM-FAST-NEXT: f32x4.extract_lane $push5=, $0, 0 +; SIMD128-VM-FAST-NEXT: f32x4.extract_lane $push4=, $1, 0 +; SIMD128-VM-FAST-NEXT: f32.div $push6=, $pop5, $pop4 +; SIMD128-VM-FAST-NEXT: f32x4.splat $push7=, $pop6 +; SIMD128-VM-FAST-NEXT: f32x4.extract_lane $push2=, $0, 1 +; SIMD128-VM-FAST-NEXT: f32x4.extract_lane $push1=, $1, 1 +; SIMD128-VM-FAST-NEXT: f32.div $push3=, $pop2, $pop1 +; SIMD128-VM-FAST-NEXT: f32x4.replace_lane $push8=, $pop7, 1, $pop3 +; SIMD128-VM-FAST-NEXT: f32x4.extract_lane $push10=, $0, 2 +; SIMD128-VM-FAST-NEXT: f32x4.extract_lane $push9=, $1, 2 +; SIMD128-VM-FAST-NEXT: f32.div $push11=, $pop10, $pop9 +; SIMD128-VM-FAST-NEXT: f32x4.replace_lane $push12=, $pop8, 2, $pop11 +; SIMD128-VM-FAST-NEXT: f32x4.extract_lane $push14=, $0, 3 +; SIMD128-VM-FAST-NEXT: f32x4.extract_lane $push13=, $1, 3 +; SIMD128-VM-FAST-NEXT: f32.div $push15=, $pop14, $pop13 +; SIMD128-VM-FAST-NEXT: f32x4.replace_lane $push0=, $pop12, 3, $pop15 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: div_v4f32: +; NO-SIMD128-SLOW: .functype div_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: f32.div $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: f32.div $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: f32.div $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.div $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: div_v4f32: +; NO-SIMD128-FAST: .functype div_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.div $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.div $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.div $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.div $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = fdiv <4 x float> %x, %y ret <4 x float> %a } -; CHECK-LABEL: mul_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-NEXT: .functype mul_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.mul $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) { +; SIMD128-LABEL: mul_v4f32: +; SIMD128: .functype mul_v4f32 (v128, v128) -> (v128) +; SIMD128-NEXT: f32x4.mul $push0=, $0, $1 +; SIMD128-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: mul_v4f32: +; NO-SIMD128-SLOW: .functype mul_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push1=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push2=, $0, $pop1 +; NO-SIMD128-SLOW-NEXT: f32.mul $push0=, $4, $8 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop2), $pop0 +; NO-SIMD128-SLOW-NEXT: f32.mul $push3=, $3, $7 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: f32.mul $push4=, $2, $6 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.mul $push5=, $1, $5 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: mul_v4f32: +; NO-SIMD128-FAST: .functype mul_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.mul $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.mul $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.mul $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.mul $push5=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = fmul <4 x float> %x, %y ret <4 x float> %a } -; CHECK-LABEL: sqrt_v4f32: -; NO-SIMD128-NOT: f32x4 -; SIMD128-VM-NOT: f32x4.sqrt -; SIMD128-NEXT: .functype sqrt_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.sqrt $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) define <4 x float> @sqrt_v4f32(<4 x float> %x) { +; SIMD128-UNIMPL-LABEL: sqrt_v4f32: +; SIMD128-UNIMPL: .functype sqrt_v4f32 (v128) -> (v128) +; SIMD128-UNIMPL-NEXT: f32x4.sqrt $push0=, $0 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; SIMD128-VM-SLOW-LABEL: sqrt_v4f32: +; SIMD128-VM-SLOW: .functype sqrt_v4f32 (v128) -> (v128) +; SIMD128-VM-SLOW-NEXT: f32x4.extract_lane $push2=, $0, 0 +; SIMD128-VM-SLOW-NEXT: f32.sqrt $push3=, $pop2 +; SIMD128-VM-SLOW-NEXT: f32x4.splat $push4=, $pop3 +; SIMD128-VM-SLOW-NEXT: f32x4.extract_lane $push0=, $0, 1 +; SIMD128-VM-SLOW-NEXT: f32.sqrt $push1=, $pop0 +; SIMD128-VM-SLOW-NEXT: f32x4.replace_lane $push5=, $pop4, 1, $pop1 +; SIMD128-VM-SLOW-NEXT: f32x4.extract_lane $push6=, $0, 2 +; SIMD128-VM-SLOW-NEXT: f32.sqrt $push7=, $pop6 +; SIMD128-VM-SLOW-NEXT: f32x4.replace_lane $push8=, $pop5, 2, $pop7 +; SIMD128-VM-SLOW-NEXT: f32x4.extract_lane $push9=, $0, 3 +; SIMD128-VM-SLOW-NEXT: f32.sqrt $push10=, $pop9 +; SIMD128-VM-SLOW-NEXT: f32x4.replace_lane $push11=, $pop8, 3, $pop10 +; SIMD128-VM-SLOW-NEXT: return $pop11 +; +; SIMD128-VM-FAST-LABEL: sqrt_v4f32: +; SIMD128-VM-FAST: .functype sqrt_v4f32 (v128) -> (v128) +; SIMD128-VM-FAST-NEXT: f32x4.extract_lane $push3=, $0, 0 +; SIMD128-VM-FAST-NEXT: f32.sqrt $push4=, $pop3 +; SIMD128-VM-FAST-NEXT: f32x4.splat $push5=, $pop4 +; SIMD128-VM-FAST-NEXT: f32x4.extract_lane $push1=, $0, 1 +; SIMD128-VM-FAST-NEXT: f32.sqrt $push2=, $pop1 +; SIMD128-VM-FAST-NEXT: f32x4.replace_lane $push6=, $pop5, 1, $pop2 +; SIMD128-VM-FAST-NEXT: f32x4.extract_lane $push7=, $0, 2 +; SIMD128-VM-FAST-NEXT: f32.sqrt $push8=, $pop7 +; SIMD128-VM-FAST-NEXT: f32x4.replace_lane $push9=, $pop6, 2, $pop8 +; SIMD128-VM-FAST-NEXT: f32x4.extract_lane $push10=, $0, 3 +; SIMD128-VM-FAST-NEXT: f32.sqrt $push11=, $pop10 +; SIMD128-VM-FAST-NEXT: f32x4.replace_lane $push0=, $pop9, 3, $pop11 +; SIMD128-VM-FAST-NEXT: return $pop0 +; +; NO-SIMD128-SLOW-LABEL: sqrt_v4f32: +; NO-SIMD128-SLOW: .functype sqrt_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-SLOW-NEXT: i32.const $push0=, 12 +; NO-SIMD128-SLOW-NEXT: i32.add $push1=, $0, $pop0 +; NO-SIMD128-SLOW-NEXT: f32.sqrt $push2=, $4 +; NO-SIMD128-SLOW-NEXT: f32.store 0($pop1), $pop2 +; NO-SIMD128-SLOW-NEXT: f32.sqrt $push3=, $3 +; NO-SIMD128-SLOW-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-SLOW-NEXT: f32.sqrt $push4=, $2 +; NO-SIMD128-SLOW-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-SLOW-NEXT: f32.sqrt $push5=, $1 +; NO-SIMD128-SLOW-NEXT: f32.store 0($0), $pop5 +; NO-SIMD128-SLOW-NEXT: return +; +; NO-SIMD128-FAST-LABEL: sqrt_v4f32: +; NO-SIMD128-FAST: .functype sqrt_v4f32 (i32, f32, f32, f32, f32) -> () +; NO-SIMD128-FAST-NEXT: f32.sqrt $push0=, $1 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.sqrt $push1=, $2 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.sqrt $push2=, $3 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 +; NO-SIMD128-FAST-NEXT: f32.sqrt $push5=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) ret <4 x float> %a } @@ -1190,113 +9791,291 @@ ; ============================================================================== ; 2 x double ; ============================================================================== -; CHECK-LABEL: neg_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype neg_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.neg $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} + define <2 x double> @neg_v2f64(<2 x double> %x) { ; nsz makes this semantically equivalent to flipping sign bit +; SIMD128-UNIMPL-LABEL: neg_v2f64: +; SIMD128-UNIMPL: .functype neg_v2f64 (v128) -> (v128) +; SIMD128-UNIMPL-NEXT: f64x2.neg $push0=, $0 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: neg_v2f64: +; NO-SIMD64-SLOW: .functype neg_v2f64 (i32, f64, f64) -> () +; NO-SIMD64-SLOW-NEXT: f64.neg $push0=, $2 +; NO-SIMD64-SLOW-NEXT: f64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: f64.neg $push1=, $1 +; NO-SIMD64-SLOW-NEXT: f64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: neg_v2f64: +; NO-SIMD64-FAST: .functype neg_v2f64 (i32, f64, f64) -> () +; NO-SIMD64-FAST-NEXT: f64.neg $push0=, $1 +; NO-SIMD64-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: f64.neg $push1=, $2 +; NO-SIMD64-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = fsub nsz <2 x double> , %x ret <2 x double> %a } -; CHECK-LABEL: abs_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype abs_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.abs $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <2 x double> @llvm.fabs.v2f64(<2 x double>) nounwind readnone define <2 x double> @abs_v2f64(<2 x double> %x) { +; SIMD128-UNIMPL-LABEL: abs_v2f64: +; SIMD128-UNIMPL: .functype abs_v2f64 (v128) -> (v128) +; SIMD128-UNIMPL-NEXT: f64x2.abs $push0=, $0 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: abs_v2f64: +; NO-SIMD64-SLOW: .functype abs_v2f64 (i32, f64, f64) -> () +; NO-SIMD64-SLOW-NEXT: f64.abs $push0=, $2 +; NO-SIMD64-SLOW-NEXT: f64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: f64.abs $push1=, $1 +; NO-SIMD64-SLOW-NEXT: f64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: abs_v2f64: +; NO-SIMD64-FAST: .functype abs_v2f64 (i32, f64, f64) -> () +; NO-SIMD64-FAST-NEXT: f64.abs $push0=, $1 +; NO-SIMD64-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: f64.abs $push1=, $2 +; NO-SIMD64-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x) ret <2 x double> %a } -; CHECK-LABEL: min_unordered_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype min_unordered_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2 -; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]] -; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @min_unordered_v2f64(<2 x double> %x) { +; SIMD128-UNIMPL-SLOW-LABEL: min_unordered_v2f64: +; SIMD128-UNIMPL-SLOW: .functype min_unordered_v2f64 (v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: f64.const $push0=, 0x1.4p2 +; SIMD128-UNIMPL-SLOW-NEXT: f64x2.splat $push1=, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: f64x2.min $push2=, $0, $pop1 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop2 +; +; SIMD128-UNIMPL-FAST-LABEL: min_unordered_v2f64: +; SIMD128-UNIMPL-FAST: .functype min_unordered_v2f64 (v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: f64.const $push1=, 0x1.4p2 +; SIMD128-UNIMPL-FAST-NEXT: f64x2.splat $push2=, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: f64x2.min $push0=, $0, $pop2 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: min_unordered_v2f64: +; NO-SIMD64-SLOW: .functype min_unordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD64-SLOW-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD64-SLOW-NEXT: f64.min $push1=, $2, $pop0 +; NO-SIMD64-SLOW-NEXT: f64.store 8($0), $pop1 +; NO-SIMD64-SLOW-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD64-SLOW-NEXT: f64.min $push2=, $1, $pop3 +; NO-SIMD64-SLOW-NEXT: f64.store 0($0), $pop2 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: min_unordered_v2f64: +; NO-SIMD64-FAST: .functype min_unordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD64-FAST-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD64-FAST-NEXT: f64.min $push1=, $1, $pop0 +; NO-SIMD64-FAST-NEXT: f64.store 0($0), $pop1 +; NO-SIMD64-FAST-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD64-FAST-NEXT: f64.min $push2=, $2, $pop3 +; NO-SIMD64-FAST-NEXT: f64.store 8($0), $pop2 +; NO-SIMD64-FAST-NEXT: return %cmps = fcmp ule <2 x double> %x, %a = select <2 x i1> %cmps, <2 x double> %x, <2 x double> ret <2 x double> %a } -; CHECK-LABEL: max_unordered_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype max_unordered_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2 -; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]] -; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @max_unordered_v2f64(<2 x double> %x) { +; SIMD128-UNIMPL-SLOW-LABEL: max_unordered_v2f64: +; SIMD128-UNIMPL-SLOW: .functype max_unordered_v2f64 (v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: f64.const $push0=, 0x1.4p2 +; SIMD128-UNIMPL-SLOW-NEXT: f64x2.splat $push1=, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: f64x2.max $push2=, $0, $pop1 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop2 +; +; SIMD128-UNIMPL-FAST-LABEL: max_unordered_v2f64: +; SIMD128-UNIMPL-FAST: .functype max_unordered_v2f64 (v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: f64.const $push1=, 0x1.4p2 +; SIMD128-UNIMPL-FAST-NEXT: f64x2.splat $push2=, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: f64x2.max $push0=, $0, $pop2 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: max_unordered_v2f64: +; NO-SIMD64-SLOW: .functype max_unordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD64-SLOW-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD64-SLOW-NEXT: f64.max $push1=, $2, $pop0 +; NO-SIMD64-SLOW-NEXT: f64.store 8($0), $pop1 +; NO-SIMD64-SLOW-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD64-SLOW-NEXT: f64.max $push2=, $1, $pop3 +; NO-SIMD64-SLOW-NEXT: f64.store 0($0), $pop2 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: max_unordered_v2f64: +; NO-SIMD64-FAST: .functype max_unordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD64-FAST-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD64-FAST-NEXT: f64.max $push1=, $1, $pop0 +; NO-SIMD64-FAST-NEXT: f64.store 0($0), $pop1 +; NO-SIMD64-FAST-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD64-FAST-NEXT: f64.max $push2=, $2, $pop3 +; NO-SIMD64-FAST-NEXT: f64.store 8($0), $pop2 +; NO-SIMD64-FAST-NEXT: return %cmps = fcmp uge <2 x double> %x, %a = select <2 x i1> %cmps, <2 x double> %x, <2 x double> ret <2 x double> %a } -; CHECK-LABEL: min_ordered_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype min_ordered_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2 -; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]] -; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @min_ordered_v2f64(<2 x double> %x) { +; SIMD128-UNIMPL-SLOW-LABEL: min_ordered_v2f64: +; SIMD128-UNIMPL-SLOW: .functype min_ordered_v2f64 (v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: f64.const $push0=, 0x1.4p2 +; SIMD128-UNIMPL-SLOW-NEXT: f64x2.splat $push1=, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: f64x2.min $push2=, $0, $pop1 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop2 +; +; SIMD128-UNIMPL-FAST-LABEL: min_ordered_v2f64: +; SIMD128-UNIMPL-FAST: .functype min_ordered_v2f64 (v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: f64.const $push1=, 0x1.4p2 +; SIMD128-UNIMPL-FAST-NEXT: f64x2.splat $push2=, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: f64x2.min $push0=, $0, $pop2 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: min_ordered_v2f64: +; NO-SIMD64-SLOW: .functype min_ordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD64-SLOW-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD64-SLOW-NEXT: f64.min $push1=, $2, $pop0 +; NO-SIMD64-SLOW-NEXT: f64.store 8($0), $pop1 +; NO-SIMD64-SLOW-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD64-SLOW-NEXT: f64.min $push2=, $1, $pop3 +; NO-SIMD64-SLOW-NEXT: f64.store 0($0), $pop2 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: min_ordered_v2f64: +; NO-SIMD64-FAST: .functype min_ordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD64-FAST-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD64-FAST-NEXT: f64.min $push1=, $1, $pop0 +; NO-SIMD64-FAST-NEXT: f64.store 0($0), $pop1 +; NO-SIMD64-FAST-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD64-FAST-NEXT: f64.min $push2=, $2, $pop3 +; NO-SIMD64-FAST-NEXT: f64.store 8($0), $pop2 +; NO-SIMD64-FAST-NEXT: return %cmps = fcmp ole <2 x double> , %x %a = select <2 x i1> %cmps, <2 x double> , <2 x double> %x ret <2 x double> %a } -; CHECK-LABEL: max_ordered_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype max_ordered_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2 -; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]] -; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @max_ordered_v2f64(<2 x double> %x) { +; SIMD128-UNIMPL-SLOW-LABEL: max_ordered_v2f64: +; SIMD128-UNIMPL-SLOW: .functype max_ordered_v2f64 (v128) -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: f64.const $push0=, 0x1.4p2 +; SIMD128-UNIMPL-SLOW-NEXT: f64x2.splat $push1=, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: f64x2.max $push2=, $0, $pop1 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop2 +; +; SIMD128-UNIMPL-FAST-LABEL: max_ordered_v2f64: +; SIMD128-UNIMPL-FAST: .functype max_ordered_v2f64 (v128) -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: f64.const $push1=, 0x1.4p2 +; SIMD128-UNIMPL-FAST-NEXT: f64x2.splat $push2=, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: f64x2.max $push0=, $0, $pop2 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: max_ordered_v2f64: +; NO-SIMD64-SLOW: .functype max_ordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD64-SLOW-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD64-SLOW-NEXT: f64.max $push1=, $2, $pop0 +; NO-SIMD64-SLOW-NEXT: f64.store 8($0), $pop1 +; NO-SIMD64-SLOW-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD64-SLOW-NEXT: f64.max $push2=, $1, $pop3 +; NO-SIMD64-SLOW-NEXT: f64.store 0($0), $pop2 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: max_ordered_v2f64: +; NO-SIMD64-FAST: .functype max_ordered_v2f64 (i32, f64, f64) -> () +; NO-SIMD64-FAST-NEXT: f64.const $push0=, 0x1.4p2 +; NO-SIMD64-FAST-NEXT: f64.max $push1=, $1, $pop0 +; NO-SIMD64-FAST-NEXT: f64.store 0($0), $pop1 +; NO-SIMD64-FAST-NEXT: f64.const $push3=, 0x1.4p2 +; NO-SIMD64-FAST-NEXT: f64.max $push2=, $2, $pop3 +; NO-SIMD64-FAST-NEXT: f64.store 8($0), $pop2 +; NO-SIMD64-FAST-NEXT: return %cmps = fcmp oge <2 x double> , %x %a = select <2 x i1> %cmps, <2 x double> , <2 x double> %x ret <2 x double> %a } -; CHECK-LABEL: min_intrinsic_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype min_intrinsic_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>) define <2 x double> @min_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-UNIMPL-LABEL: min_intrinsic_v2f64: +; SIMD128-UNIMPL: .functype min_intrinsic_v2f64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-NEXT: f64x2.min $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: min_intrinsic_v2f64: +; NO-SIMD64-SLOW: .functype min_intrinsic_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD64-SLOW-NEXT: f64.min $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: f64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: f64.min $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: f64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: min_intrinsic_v2f64: +; NO-SIMD64-FAST: .functype min_intrinsic_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD64-FAST-NEXT: f64.min $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: f64.min $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y) ret <2 x double> %a } -; CHECK-LABEL: max_intrinsic_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype max_intrinsic_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) define <2 x double> @max_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-UNIMPL-LABEL: max_intrinsic_v2f64: +; SIMD128-UNIMPL: .functype max_intrinsic_v2f64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-NEXT: f64x2.max $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: max_intrinsic_v2f64: +; NO-SIMD64-SLOW: .functype max_intrinsic_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD64-SLOW-NEXT: f64.max $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: f64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: f64.max $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: f64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: max_intrinsic_v2f64: +; NO-SIMD64-FAST: .functype max_intrinsic_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD64-FAST-NEXT: f64.max $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: f64.max $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> %y) ret <2 x double> %a } -; CHECK-LABEL: min_const_intrinsic_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype min_const_intrinsic_v2f64 () -> (v128){{$}} -; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.4p2{{$}} -; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @min_const_intrinsic_v2f64() { +; SIMD128-UNIMPL-SLOW-LABEL: min_const_intrinsic_v2f64: +; SIMD128-UNIMPL-SLOW: .functype min_const_intrinsic_v2f64 () -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: f64.const $push0=, 0x1.4p2 +; SIMD128-UNIMPL-SLOW-NEXT: f64x2.splat $push1=, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop1 +; +; SIMD128-UNIMPL-FAST-LABEL: min_const_intrinsic_v2f64: +; SIMD128-UNIMPL-FAST: .functype min_const_intrinsic_v2f64 () -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: f64.const $push1=, 0x1.4p2 +; SIMD128-UNIMPL-FAST-NEXT: f64x2.splat $push0=, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-LABEL: min_const_intrinsic_v2f64: +; NO-SIMD64: .functype min_const_intrinsic_v2f64 (i32) -> () +; NO-SIMD64-NEXT: i64.const $push0=, 4617315517961601024 +; NO-SIMD64-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-NEXT: i64.const $push1=, 4617315517961601024 +; NO-SIMD64-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-NEXT: return %a = call <2 x double> @llvm.minimum.v2f64( <2 x double> , <2 x double> @@ -1304,13 +10083,26 @@ ret <2 x double> %a } -; CHECK-LABEL: max_const_intrinsic_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype max_const_intrinsic_v2f64 () -> (v128){{$}} -; SIMD128-NEXT: f64.const $push[[L:[0-9]+]]=, 0x1.5p5{{$}} -; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[L]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @max_const_intrinsic_v2f64() { +; SIMD128-UNIMPL-SLOW-LABEL: max_const_intrinsic_v2f64: +; SIMD128-UNIMPL-SLOW: .functype max_const_intrinsic_v2f64 () -> (v128) +; SIMD128-UNIMPL-SLOW-NEXT: f64.const $push0=, 0x1.5p5 +; SIMD128-UNIMPL-SLOW-NEXT: f64x2.splat $push1=, $pop0 +; SIMD128-UNIMPL-SLOW-NEXT: return $pop1 +; +; SIMD128-UNIMPL-FAST-LABEL: max_const_intrinsic_v2f64: +; SIMD128-UNIMPL-FAST: .functype max_const_intrinsic_v2f64 () -> (v128) +; SIMD128-UNIMPL-FAST-NEXT: f64.const $push1=, 0x1.5p5 +; SIMD128-UNIMPL-FAST-NEXT: f64x2.splat $push0=, $pop1 +; SIMD128-UNIMPL-FAST-NEXT: return $pop0 +; +; NO-SIMD64-LABEL: max_const_intrinsic_v2f64: +; NO-SIMD64: .functype max_const_intrinsic_v2f64 (i32) -> () +; NO-SIMD64-NEXT: i64.const $push0=, 4631107791820423168 +; NO-SIMD64-NEXT: i64.store 8($0), $pop0 +; NO-SIMD64-NEXT: i64.const $push1=, 4631107791820423168 +; NO-SIMD64-NEXT: i64.store 0($0), $pop1 +; NO-SIMD64-NEXT: return %a = call <2 x double> @llvm.maximum.v2f64( <2 x double> , <2 x double> @@ -1318,57 +10110,128 @@ ret <2 x double> %a } -; CHECK-LABEL: add_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-VM-NOT: f62x2 -; SIMD128-NEXT: .functype add_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.add $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @add_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-UNIMPL-LABEL: add_v2f64: +; SIMD128-UNIMPL: .functype add_v2f64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-NEXT: f64x2.add $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: add_v2f64: +; NO-SIMD64-SLOW: .functype add_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD64-SLOW-NEXT: f64.add $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: f64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: f64.add $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: f64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: add_v2f64: +; NO-SIMD64-FAST: .functype add_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD64-FAST-NEXT: f64.add $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: f64.add $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = fadd <2 x double> %x, %y ret <2 x double> %a } -; CHECK-LABEL: sub_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-VM-NOT: f62x2 -; SIMD128-NEXT: .functype sub_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.sub $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @sub_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-UNIMPL-LABEL: sub_v2f64: +; SIMD128-UNIMPL: .functype sub_v2f64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-NEXT: f64x2.sub $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: sub_v2f64: +; NO-SIMD64-SLOW: .functype sub_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD64-SLOW-NEXT: f64.sub $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: f64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: f64.sub $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: f64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: sub_v2f64: +; NO-SIMD64-FAST: .functype sub_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD64-FAST-NEXT: f64.sub $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: f64.sub $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = fsub <2 x double> %x, %y ret <2 x double> %a } -; CHECK-LABEL: div_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-VM-NOT: f62x2 -; SIMD128-NEXT: .functype div_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.div $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @div_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-UNIMPL-LABEL: div_v2f64: +; SIMD128-UNIMPL: .functype div_v2f64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-NEXT: f64x2.div $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: div_v2f64: +; NO-SIMD64-SLOW: .functype div_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD64-SLOW-NEXT: f64.div $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: f64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: f64.div $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: f64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: div_v2f64: +; NO-SIMD64-FAST: .functype div_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD64-FAST-NEXT: f64.div $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: f64.div $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = fdiv <2 x double> %x, %y ret <2 x double> %a } -; CHECK-LABEL: mul_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-VM-NOT: f62x2 -; SIMD128-NEXT: .functype mul_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.mul $push[[R:[0-9]+]]=, $0, $1{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @mul_v2f64(<2 x double> %x, <2 x double> %y) { +; SIMD128-UNIMPL-LABEL: mul_v2f64: +; SIMD128-UNIMPL: .functype mul_v2f64 (v128, v128) -> (v128) +; SIMD128-UNIMPL-NEXT: f64x2.mul $push0=, $0, $1 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: mul_v2f64: +; NO-SIMD64-SLOW: .functype mul_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD64-SLOW-NEXT: f64.mul $push0=, $2, $4 +; NO-SIMD64-SLOW-NEXT: f64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: f64.mul $push1=, $1, $3 +; NO-SIMD64-SLOW-NEXT: f64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: mul_v2f64: +; NO-SIMD64-FAST: .functype mul_v2f64 (i32, f64, f64, f64, f64) -> () +; NO-SIMD64-FAST-NEXT: f64.mul $push0=, $1, $3 +; NO-SIMD64-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: f64.mul $push1=, $2, $4 +; NO-SIMD64-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = fmul <2 x double> %x, %y ret <2 x double> %a } -; CHECK-LABEL: sqrt_v2f64: -; NO-SIMD128-NOT: f64x2 -; SIMD128-NEXT: .functype sqrt_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.sqrt $push[[R:[0-9]+]]=, $0{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) define <2 x double> @sqrt_v2f64(<2 x double> %x) { +; SIMD128-UNIMPL-LABEL: sqrt_v2f64: +; SIMD128-UNIMPL: .functype sqrt_v2f64 (v128) -> (v128) +; SIMD128-UNIMPL-NEXT: f64x2.sqrt $push0=, $0 +; SIMD128-UNIMPL-NEXT: return $pop0 +; +; NO-SIMD64-SLOW-LABEL: sqrt_v2f64: +; NO-SIMD64-SLOW: .functype sqrt_v2f64 (i32, f64, f64) -> () +; NO-SIMD64-SLOW-NEXT: f64.sqrt $push0=, $2 +; NO-SIMD64-SLOW-NEXT: f64.store 8($0), $pop0 +; NO-SIMD64-SLOW-NEXT: f64.sqrt $push1=, $1 +; NO-SIMD64-SLOW-NEXT: f64.store 0($0), $pop1 +; NO-SIMD64-SLOW-NEXT: return +; +; NO-SIMD64-FAST-LABEL: sqrt_v2f64: +; NO-SIMD64-FAST: .functype sqrt_v2f64 (i32, f64, f64) -> () +; NO-SIMD64-FAST-NEXT: f64.sqrt $push0=, $1 +; NO-SIMD64-FAST-NEXT: f64.store 0($0), $pop0 +; NO-SIMD64-FAST-NEXT: f64.sqrt $push1=, $2 +; NO-SIMD64-FAST-NEXT: f64.store 8($0), $pop1 +; NO-SIMD64-FAST-NEXT: return %a = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) ret <2 x double> %a }