diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp @@ -41,13 +41,57 @@ char WebAssemblyFixBrTableDefaults::ID = 0; +// Target indepedent selection dag assumes that it is ok to use PointerTy +// as the index for a "switch", whereas Wasm so far only has a 32-bit br_table. +// See e.g. SelectionDAGBuilder::visitJumpTableHeader +// We have a 64-bit br_table in the tablegen defs as a result, which does get +// selected, and thus we get incorrect truncates/extensions happening on +// wasm64. Here we fix that. +void fixBrTableIndex(MachineInstr &MI, MachineBasicBlock *MBB, + MachineFunction &MF) { + // Only happens on wasm64. + auto &WST = MF.getSubtarget(); + if (!WST.hasAddr64()) + return; + + assert(MI.getDesc().getOpcode() == WebAssembly::BR_TABLE_I64 && + "64-bit br_table pseudo instruction expected"); + + // Get the header block, which contains the redundant extend op. + assert(MBB->pred_size() == 1 && "Expected a single guard predecessor"); + auto *HeaderMBB = *MBB->pred_begin(); + + // Find extension op, if any. It sits in the previous BB before the branch. + auto &Last = *HeaderMBB->getLastNonDebugInstr(); + assert(Last.getOpcode() == WebAssembly::BR_IF); + MachineInstr *Ext = Last.getPrevNode(); + if (Ext && Ext->getOpcode() == WebAssembly::I64_EXTEND_U_I32) { + // Unnecessarily extending a 32-bit value to 64, remove it. + assert(MI.getOperand(0).getReg() == Ext->getOperand(0).getReg()); + MI.getOperand(0).setReg(Ext->getOperand(1).getReg()); + Ext->eraseFromParent(); + } else { + // Incoming 64-bit value that needs to be truncated. + Register Reg32 = + MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass); + BuildMI(*MBB, MI.getIterator(), MI.getDebugLoc(), + WST.getInstrInfo()->get(WebAssembly::I32_WRAP_I64), Reg32) + .addReg(MI.getOperand(0).getReg()); + MI.getOperand(0).setReg(Reg32); + } + + // We now have a 32-bit operand in all cases, so change the instruction + // accordingly. + MI.setDesc(WST.getInstrInfo()->get(WebAssembly::BR_TABLE_I32)); +} + // `MI` is a br_table instruction with a dummy default target argument. This // function finds and adds the default target argument and removes any redundant // range check preceding the br_table. Returns the MBB that the br_table is // moved into so it can be removed from further consideration, or nullptr if the // br_table cannot be optimized. -MachineBasicBlock *fixBrTable(MachineInstr &MI, MachineBasicBlock *MBB, - MachineFunction &MF) { +MachineBasicBlock *fixBrTableDefault(MachineInstr &MI, MachineBasicBlock *MBB, + MachineFunction &MF) { // Get the header block, which contains the redundant range check. assert(MBB->pred_size() == 1 && "Expected a single guard predecessor"); auto *HeaderMBB = *MBB->pred_begin(); @@ -125,7 +169,8 @@ MBBSet.erase(MBB); for (auto &MI : *MBB) { if (WebAssembly::isBrTable(MI)) { - auto *Fixed = fixBrTable(MI, MBB, MF); + fixBrTableIndex(MI, MBB, MF); + auto *Fixed = fixBrTableDefault(MI, MBB, MF); if (Fixed != nullptr) { MBBSet.erase(Fixed); Changed = true; diff --git a/llvm/test/CodeGen/WebAssembly/switch.ll b/llvm/test/CodeGen/WebAssembly/switch.ll --- a/llvm/test/CodeGen/WebAssembly/switch.ll +++ b/llvm/test/CodeGen/WebAssembly/switch.ll @@ -1,11 +1,9 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs | FileCheck %s +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs | FileCheck %s +; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs | FileCheck %s ; Test switch instructions. Block placement is disabled because it reorders ; the blocks in a way that isn't interesting here. -target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" -target triple = "wasm32-unknown-unknown" - declare void @foo0() declare void @foo1() declare void @foo2() @@ -14,28 +12,28 @@ declare void @foo5() ; CHECK-LABEL: bar32: -; CHECK: block {{$}} -; CHECK: block {{$}} -; CHECK: block {{$}} -; CHECK: block {{$}} -; CHECK: block {{$}} -; CHECK: block {{$}} -; CHECK: block {{$}} -; CHECK: br_table {{[^,]+}}, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6{{$}} -; CHECK: .LBB{{[0-9]+}}_1: -; CHECK: call foo0{{$}} -; CHECK: .LBB{{[0-9]+}}_2: -; CHECK: call foo1{{$}} -; CHECK: .LBB{{[0-9]+}}_3: -; CHECK: call foo2{{$}} -; CHECK: .LBB{{[0-9]+}}_4: -; CHECK: call foo3{{$}} -; CHECK: .LBB{{[0-9]+}}_5: -; CHECK: call foo4{{$}} -; CHECK: .LBB{{[0-9]+}}_6: -; CHECK: call foo5{{$}} -; CHECK: .LBB{{[0-9]+}}_7: -; CHECK: return{{$}} +; CHECK: block {{$}} +; CHECK: block {{$}} +; CHECK: block {{$}} +; CHECK: block {{$}} +; CHECK: block {{$}} +; CHECK: block {{$}} +; CHECK: block {{$}} +; CHECK-NEXT: br_table {{[^,]+}}, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6{{$}} +; CHECK: .LBB{{[0-9]+}}_1: +; CHECK: call foo0{{$}} +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK: call foo1{{$}} +; CHECK: .LBB{{[0-9]+}}_3: +; CHECK: call foo2{{$}} +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK: call foo3{{$}} +; CHECK: .LBB{{[0-9]+}}_5: +; CHECK: call foo4{{$}} +; CHECK: .LBB{{[0-9]+}}_6: +; CHECK: call foo5{{$}} +; CHECK: .LBB{{[0-9]+}}_7: +; CHECK: return{{$}} define void @bar32(i32 %n) { entry: switch i32 %n, label %sw.epilog [ @@ -94,32 +92,32 @@ } ; CHECK-LABEL: bar64: -; CHECK: block {{$}} -; CHECK: i64.const -; CHECK: i64.gt_u -; CHECK: br_if 0 -; CHECK: block {{$}} -; CHECK: block {{$}} -; CHECK: block {{$}} -; CHECK: block {{$}} -; CHECK: block {{$}} -; CHECK: block {{$}} -; CHECK: i32.wrap_i64 -; CHECK: br_table {{[^,]+}}, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4, 5, 0{{$}} -; CHECK: .LBB{{[0-9]+}}_2: -; CHECK: call foo0{{$}} -; CHECK: .LBB{{[0-9]+}}_3: -; CHECK: call foo1{{$}} -; CHECK: .LBB{{[0-9]+}}_4: -; CHECK: call foo2{{$}} -; CHECK: .LBB{{[0-9]+}}_5: -; CHECK: call foo3{{$}} -; CHECK: .LBB{{[0-9]+}}_6: -; CHECK: call foo4{{$}} -; CHECK: .LBB{{[0-9]+}}_7: -; CHECK: call foo5{{$}} -; CHECK: .LBB{{[0-9]+}}_8: -; CHECK: return{{$}} +; CHECK: block {{$}} +; CHECK: i64.const +; CHECK: i64.gt_u +; CHECK: br_if 0 +; CHECK: block {{$}} +; CHECK: block {{$}} +; CHECK: block {{$}} +; CHECK: block {{$}} +; CHECK: block {{$}} +; CHECK: block {{$}} +; CHECK: i32.wrap_i64 +; CHECK-NEXT: br_table {{[^,]+}}, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4, 5, 0{{$}} +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK: call foo0{{$}} +; CHECK: .LBB{{[0-9]+}}_3: +; CHECK: call foo1{{$}} +; CHECK: .LBB{{[0-9]+}}_4: +; CHECK: call foo2{{$}} +; CHECK: .LBB{{[0-9]+}}_5: +; CHECK: call foo3{{$}} +; CHECK: .LBB{{[0-9]+}}_6: +; CHECK: call foo4{{$}} +; CHECK: .LBB{{[0-9]+}}_7: +; CHECK: call foo5{{$}} +; CHECK: .LBB{{[0-9]+}}_8: +; CHECK: return{{$}} define void @bar64(i64 %n) { entry: switch i64 %n, label %sw.epilog [ @@ -178,24 +176,24 @@ } ; CHECK-LABEL: truncated: -; CHECK: block -; CHECK: block -; CHECK: block -; CHECK: i32.wrap_i64 -; CHECK: br_table {{[^,]+}}, 0, 1, 2{{$}} -; CHECK: .LBB{{[0-9]+}}_1 -; CHECK: end_block -; CHECK: call foo0{{$}} -; CHECK: return{{$}} -; CHECK: .LBB{{[0-9]+}}_2 -; CHECK: end_block -; CHECK: call foo1{{$}} -; CHECK: return{{$}} -; CHECK: .LBB{{[0-9]+}}_3 -; CHECK: end_block -; CHECK: call foo2{{$}} -; CHECK: return{{$}} -; CHECK: end_function +; CHECK: block +; CHECK: block +; CHECK: block +; CHECK: i32.wrap_i64 +; CHECK-NEXT: br_table {{[^,]+}}, 0, 1, 2{{$}} +; CHECK: .LBB{{[0-9]+}}_1 +; CHECK: end_block +; CHECK: call foo0{{$}} +; CHECK: return{{$}} +; CHECK: .LBB{{[0-9]+}}_2 +; CHECK: end_block +; CHECK: call foo1{{$}} +; CHECK: return{{$}} +; CHECK: .LBB{{[0-9]+}}_3 +; CHECK: end_block +; CHECK: call foo2{{$}} +; CHECK: return{{$}} +; CHECK: end_function define void @truncated(i64 %n) { entry: %m = trunc i64 %n to i32