Index: lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -35,7 +35,7 @@ // for the purpose of testing with lit/llc ONLY. // This produces output which is not valid WebAssembly, and is not supported // by assemblers/disassemblers and other MC based tools. -static cl::opt WasmDisableExplicitLocals( +cl::opt WasmDisableExplicitLocals( "wasm-disable-explicit-locals", cl::Hidden, cl::desc("WebAssembly: output implicit locals in" " instruction output for test purposes only."), Index: lib/Target/WebAssembly/WebAssemblyInstrSIMD.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -271,6 +271,17 @@ def : Pat<(i32 (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx))), (EXTRACT_LANE_v8i16_u V128:$vec, (i32 LaneIdx8:$idx))>; +// Bitcasts are nops +// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types +foreach t1 = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in +foreach t2 = !foldl( + [], [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + acc, cur, !if(!eq(!cast(t1), !cast(cur)), + acc, !listconcat(acc, [cur]) + ) +) in +def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>; + // arbitrary other BUILD_VECTOR patterns def : Pat<(v16i8 (build_vector (i32 I32:$x0), (i32 I32:$x1), (i32 I32:$x2), (i32 I32:$x3), Index: lib/Target/WebAssembly/WebAssemblyRegStackify.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -562,6 +562,12 @@ MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) { LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump()); + // If explicit locals are disabled, the tee this function would produce will + // never be lowered to tee_local, so do not produce a tee. + extern cl::opt WasmDisableExplicitLocals; + if (WasmDisableExplicitLocals) + return Def; + // Move Def into place. MBB.splice(Insert, &MBB, Def); LIS.handleMove(*Def); Index: lib/Target/WebAssembly/WebAssemblyUtilities.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyUtilities.cpp +++ lib/Target/WebAssembly/WebAssemblyUtilities.cpp @@ -79,6 +79,8 @@ case WebAssembly::TEE_F32_S: case WebAssembly::TEE_F64: case WebAssembly::TEE_F64_S: + case WebAssembly::TEE_V128: + case WebAssembly::TEE_V128_S: return true; default: return false; Index: test/CodeGen/WebAssembly/simd-conversions.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/simd-conversions.ll @@ -0,0 +1,224 @@ +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-enable-unimplemented-simd -mattr=+simd128,+sign-ext --show-mc-encoding | FileCheck %s --check-prefixes CHECK,SIMD128 +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+simd128,+sign-ext --show-mc-encoding | FileCheck %s --check-prefixes CHECK,SIMD128-VM +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=-simd128,+sign-ext --show-mc-encoding | FileCheck %s --check-prefixes CHECK,NO-SIMD128 + +; Test that bitcasts between vector types are lowered to zero instructions + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; CHECK-LABEL: v16i8_to_v16i8: +define <16 x i8> @v16i8_to_v16i8(<16 x i8> %v) { + %res = bitcast <16 x i8> %v to <16 x i8> + ret <16 x i8> %res +} + +; CHECK-LABEL: v16i8_to_v8i16: +define <8 x i16> @v16i8_to_v8i16(<16 x i8> %v) { + %res = bitcast <16 x i8> %v to <8 x i16> + ret <8 x i16> %res +} + +; CHECK-LABEL: v16i8_to_v4i32: +define <4 x i32> @v16i8_to_v4i32(<16 x i8> %v) { + %res = bitcast <16 x i8> %v to <4 x i32> + ret <4 x i32> %res +} + +; CHECK-LABEL: v16i8_to_v2i64: +define <2 x i64> @v16i8_to_v2i64(<16 x i8> %v) { + %res = bitcast <16 x i8> %v to <2 x i64> + ret <2 x i64> %res +} + +; CHECK-LABEL: v16i8_to_v4f32: +define <4 x float> @v16i8_to_v4f32(<16 x i8> %v) { + %res = bitcast <16 x i8> %v to <4 x float> + ret <4 x float> %res +} + +; CHECK-LABEL: v16i8_to_v2f64: +define <2 x double> @v16i8_to_v2f64(<16 x i8> %v) { + %res = bitcast <16 x i8> %v to <2 x double> + ret <2 x double> %res +} + +; CHECK-LABEL: v8i16_to_v16i8: +define <16 x i8> @v8i16_to_v16i8(<8 x i16> %v) { + %res = bitcast <8 x i16> %v to <16 x i8> + ret <16 x i8> %res +} + +; CHECK-LABEL: v8i16_to_v8i16: +define <8 x i16> @v8i16_to_v8i16(<8 x i16> %v) { + %res = bitcast <8 x i16> %v to <8 x i16> + ret <8 x i16> %res +} + +; CHECK-LABEL: v8i16_to_v4i32: +define <4 x i32> @v8i16_to_v4i32(<8 x i16> %v) { + %res = bitcast <8 x i16> %v to <4 x i32> + ret <4 x i32> %res +} + +; CHECK-LABEL: v8i16_to_v2i64: +define <2 x i64> @v8i16_to_v2i64(<8 x i16> %v) { + %res = bitcast <8 x i16> %v to <2 x i64> + ret <2 x i64> %res +} + +; CHECK-LABEL: v8i16_to_v4f32: +define <4 x float> @v8i16_to_v4f32(<8 x i16> %v) { + %res = bitcast <8 x i16> %v to <4 x float> + ret <4 x float> %res +} + +; CHECK-LABEL: v8i16_to_v2f64: +define <2 x double> @v8i16_to_v2f64(<8 x i16> %v) { + %res = bitcast <8 x i16> %v to <2 x double> + ret <2 x double> %res +} + +; CHECK-LABEL: v4i32_to_v16i8: +define <16 x i8> @v4i32_to_v16i8(<4 x i32> %v) { + %res = bitcast <4 x i32> %v to <16 x i8> + ret <16 x i8> %res +} + +; CHECK-LABEL: v4i32_to_v8i16: +define <8 x i16> @v4i32_to_v8i16(<4 x i32> %v) { + %res = bitcast <4 x i32> %v to <8 x i16> + ret <8 x i16> %res +} + +; CHECK-LABEL: v4i32_to_v4i32: +define <4 x i32> @v4i32_to_v4i32(<4 x i32> %v) { + %res = bitcast <4 x i32> %v to <4 x i32> + ret <4 x i32> %res +} + +; CHECK-LABEL: v4i32_to_v2i64: +define <2 x i64> @v4i32_to_v2i64(<4 x i32> %v) { + %res = bitcast <4 x i32> %v to <2 x i64> + ret <2 x i64> %res +} + +; CHECK-LABEL: v4i32_to_v4f32: +define <4 x float> @v4i32_to_v4f32(<4 x i32> %v) { + %res = bitcast <4 x i32> %v to <4 x float> + ret <4 x float> %res +} + +; CHECK-LABEL: v4i32_to_v2f64: +define <2 x double> @v4i32_to_v2f64(<4 x i32> %v) { + %res = bitcast <4 x i32> %v to <2 x double> + ret <2 x double> %res +} + +; CHECK-LABEL: v2i64_to_v16i8: +define <16 x i8> @v2i64_to_v16i8(<2 x i64> %v) { + %res = bitcast <2 x i64> %v to <16 x i8> + ret <16 x i8> %res +} + +; CHECK-LABEL: v2i64_to_v8i16: +define <8 x i16> @v2i64_to_v8i16(<2 x i64> %v) { + %res = bitcast <2 x i64> %v to <8 x i16> + ret <8 x i16> %res +} + +; CHECK-LABEL: v2i64_to_v4i32: +define <4 x i32> @v2i64_to_v4i32(<2 x i64> %v) { + %res = bitcast <2 x i64> %v to <4 x i32> + ret <4 x i32> %res +} + +; CHECK-LABEL: v2i64_to_v2i64: +define <2 x i64> @v2i64_to_v2i64(<2 x i64> %v) { + %res = bitcast <2 x i64> %v to <2 x i64> + ret <2 x i64> %res +} + +; CHECK-LABEL: v2i64_to_v4f32: +define <4 x float> @v2i64_to_v4f32(<2 x i64> %v) { + %res = bitcast <2 x i64> %v to <4 x float> + ret <4 x float> %res +} + +; CHECK-LABEL: v2i64_to_v2f64: +define <2 x double> @v2i64_to_v2f64(<2 x i64> %v) { + %res = bitcast <2 x i64> %v to <2 x double> + ret <2 x double> %res +} + +; CHECK-LABEL: v4f32_to_v16i8: +define <16 x i8> @v4f32_to_v16i8(<4 x float> %v) { + %res = bitcast <4 x float> %v to <16 x i8> + ret <16 x i8> %res +} + +; CHECK-LABEL: v4f32_to_v8i16: +define <8 x i16> @v4f32_to_v8i16(<4 x float> %v) { + %res = bitcast <4 x float> %v to <8 x i16> + ret <8 x i16> %res +} + +; CHECK-LABEL: v4f32_to_v4i32: +define <4 x i32> @v4f32_to_v4i32(<4 x float> %v) { + %res = bitcast <4 x float> %v to <4 x i32> + ret <4 x i32> %res +} + +; CHECK-LABEL: v4f32_to_v2i64: +define <2 x i64> @v4f32_to_v2i64(<4 x float> %v) { + %res = bitcast <4 x float> %v to <2 x i64> + ret <2 x i64> %res +} + +; CHECK-LABEL: v4f32_to_v4f32: +define <4 x float> @v4f32_to_v4f32(<4 x float> %v) { + %res = bitcast <4 x float> %v to <4 x float> + ret <4 x float> %res +} + +; CHECK-LABEL: v4f32_to_v2f64: +define <2 x double> @v4f32_to_v2f64(<4 x float> %v) { + %res = bitcast <4 x float> %v to <2 x double> + ret <2 x double> %res +} + +; CHECK-LABEL: v2f64_to_v16i8: +define <16 x i8> @v2f64_to_v16i8(<2 x double> %v) { + %res = bitcast <2 x double> %v to <16 x i8> + ret <16 x i8> %res +} + +; CHECK-LABEL: v2f64_to_v8i16: +define <8 x i16> @v2f64_to_v8i16(<2 x double> %v) { + %res = bitcast <2 x double> %v to <8 x i16> + ret <8 x i16> %res +} + +; CHECK-LABEL: v2f64_to_v4i32: +define <4 x i32> @v2f64_to_v4i32(<2 x double> %v) { + %res = bitcast <2 x double> %v to <4 x i32> + ret <4 x i32> %res +} + +; CHECK-LABEL: v2f64_to_v2i64: +define <2 x i64> @v2f64_to_v2i64(<2 x double> %v) { + %res = bitcast <2 x double> %v to <2 x i64> + ret <2 x i64> %res +} + +; CHECK-LABEL: v2f64_to_v4f32: +define <4 x float> @v2f64_to_v4f32(<2 x double> %v) { + %res = bitcast <2 x double> %v to <4 x float> + ret <4 x float> %res +} + +; CHECK-LABEL: v2f64_to_v2f64: +define <2 x double> @v2f64_to_v2f64(<2 x double> %v) { + %res = bitcast <2 x double> %v to <2 x double> + ret <2 x double> %res +}