Index: llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -460,6 +460,12 @@ // Expand indirectbr instructions to switches. addPass(createIndirectBrExpandPass()); + if (getOptLevel() >= CodeGenOpt::Default) { + addPass(createSeparateConstOffsetFromGEPPass()); + addPass(createEarlyCSEPass()); + addPass(createLICMPass()); + } + TargetPassConfig::addIRPasses(); } Index: llvm/test/CodeGen/WebAssembly/address-offsets.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/address-offsets.ll +++ llvm/test/CodeGen/WebAssembly/address-offsets.ll @@ -38,14 +38,12 @@ ; CHECK-LABEL: load_test1: ; CHECK: .functype load_test1 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push2=, g@GOT ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $0, $pop0 -; CHECK-NEXT: global.get $push2=, g@GOT -; CHECK-NEXT: i32.add $push3=, $pop1, $pop2 -; CHECK-NEXT: i32.const $push4=, 40 -; CHECK-NEXT: i32.add $push5=, $pop3, $pop4 -; CHECK-NEXT: i32.load $push6=, 0($pop5) -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: i32.add $push3=, $pop2, $pop1 +; CHECK-NEXT: i32.load $push4=, 40($pop3) +; CHECK-NEXT: return $pop4 %add = add nsw i32 %n, 10 %arrayidx = getelementptr inbounds [0 x i32], ptr @g, i32 0, i32 %add %t = load i32, ptr %arrayidx, align 4 @@ -56,14 +54,12 @@ ; CHECK-LABEL: load_test2: ; CHECK: .functype load_test2 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push2=, g@GOT ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $0, $pop0 -; CHECK-NEXT: global.get $push2=, g@GOT -; CHECK-NEXT: i32.add $push3=, $pop1, $pop2 -; CHECK-NEXT: i32.const $push4=, 40 -; CHECK-NEXT: i32.add $push5=, $pop3, $pop4 -; CHECK-NEXT: i32.load $push6=, 0($pop5) -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: i32.add $push3=, $pop2, $pop1 +; CHECK-NEXT: i32.load $push4=, 40($pop3) +; CHECK-NEXT: return $pop4 %add = add nsw i32 10, %n %arrayidx = getelementptr inbounds [0 x i32], ptr @g, i32 0, i32 %add %t = load i32, ptr %arrayidx, align 4 @@ -124,14 +120,12 @@ ; CHECK-LABEL: load_test6: ; CHECK: .functype load_test6 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push2=, g@GOT ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $0, $pop0 -; CHECK-NEXT: global.get $push2=, g@GOT -; CHECK-NEXT: i32.add $push3=, $pop1, $pop2 -; CHECK-NEXT: i32.const $push4=, 40 -; CHECK-NEXT: i32.add $push5=, $pop3, $pop4 -; CHECK-NEXT: i32.load $push6=, 0($pop5) -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: i32.add $push3=, $pop2, $pop1 +; CHECK-NEXT: i32.load $push4=, 40($pop3) +; CHECK-NEXT: return $pop4 %add = add nsw i32 %n, 10 %add.ptr = getelementptr inbounds [0 x i32], ptr @g, i32 0, i32 %add %t = load i32, ptr %add.ptr, align 4 @@ -158,14 +152,12 @@ ; CHECK-LABEL: load_test8: ; CHECK: .functype load_test8 (i32) -> (i32) ; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push2=, g@GOT ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $0, $pop0 -; CHECK-NEXT: global.get $push2=, g@GOT -; CHECK-NEXT: i32.add $push3=, $pop1, $pop2 -; CHECK-NEXT: i32.const $push4=, 40 -; CHECK-NEXT: i32.add $push5=, $pop3, $pop4 -; CHECK-NEXT: i32.load $push6=, 0($pop5) -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: i32.add $push3=, $pop2, $pop1 +; CHECK-NEXT: i32.load $push4=, 40($pop3) +; CHECK-NEXT: return $pop4 %add = add nsw i32 10, %n %add.ptr = getelementptr inbounds [0 x i32], ptr @g, i32 0, i32 %add %t = load i32, ptr %add.ptr, align 4 @@ -233,11 +225,9 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $1, $pop0 -; CHECK-NEXT: i32.add $push2=, $pop1, $0 -; CHECK-NEXT: i32.const $push3=, 40 -; CHECK-NEXT: i32.add $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.load $push5=, 0($pop4) -; CHECK-NEXT: return $pop5 +; CHECK-NEXT: i32.add $push2=, $0, $pop1 +; CHECK-NEXT: i32.load $push3=, 40($pop2) +; CHECK-NEXT: return $pop3 %add = add nsw i32 %n, 10 %arrayidx = getelementptr inbounds i32, ptr %p, i32 %add %t = load i32, ptr %arrayidx, align 4 @@ -250,11 +240,9 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $1, $pop0 -; CHECK-NEXT: i32.add $push2=, $pop1, $0 -; CHECK-NEXT: i32.const $push3=, 40 -; CHECK-NEXT: i32.add $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.load $push5=, 0($pop4) -; CHECK-NEXT: return $pop5 +; CHECK-NEXT: i32.add $push2=, $0, $pop1 +; CHECK-NEXT: i32.load $push3=, 40($pop2) +; CHECK-NEXT: return $pop3 %add = add nsw i32 10, %n %arrayidx = getelementptr inbounds i32, ptr %p, i32 %add %t = load i32, ptr %arrayidx, align 4 @@ -316,11 +304,9 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $1, $pop0 -; CHECK-NEXT: i32.add $push2=, $pop1, $0 -; CHECK-NEXT: i32.const $push3=, 40 -; CHECK-NEXT: i32.add $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.load $push5=, 0($pop4) -; CHECK-NEXT: return $pop5 +; CHECK-NEXT: i32.add $push2=, $0, $pop1 +; CHECK-NEXT: i32.load $push3=, 40($pop2) +; CHECK-NEXT: return $pop3 %add = add nsw i32 %n, 10 %add.ptr = getelementptr inbounds i32, ptr %p, i32 %add %t = load i32, ptr %add.ptr, align 4 @@ -348,11 +334,9 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $1, $pop0 -; CHECK-NEXT: i32.add $push2=, $pop1, $0 -; CHECK-NEXT: i32.const $push3=, 40 -; CHECK-NEXT: i32.add $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.load $push5=, 0($pop4) -; CHECK-NEXT: return $pop5 +; CHECK-NEXT: i32.add $push2=, $0, $pop1 +; CHECK-NEXT: i32.load $push3=, 40($pop2) +; CHECK-NEXT: return $pop3 %add = add nsw i32 10, %n %add.ptr = getelementptr inbounds i32, ptr %p, i32 %add %t = load i32, ptr %add.ptr, align 4 @@ -415,13 +399,11 @@ ; CHECK-LABEL: store_test1: ; CHECK: .functype store_test1 (i32, i32) -> () ; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push2=, g@GOT ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $0, $pop0 -; CHECK-NEXT: global.get $push2=, g@GOT -; CHECK-NEXT: i32.add $push3=, $pop1, $pop2 -; CHECK-NEXT: i32.const $push4=, 40 -; CHECK-NEXT: i32.add $push5=, $pop3, $pop4 -; CHECK-NEXT: i32.store 0($pop5), $1 +; CHECK-NEXT: i32.add $push3=, $pop2, $pop1 +; CHECK-NEXT: i32.store 40($pop3), $1 ; CHECK-NEXT: return %add = add nsw i32 %n, 10 %arrayidx = getelementptr inbounds [0 x i32], ptr @g, i32 0, i32 %add @@ -433,13 +415,11 @@ ; CHECK-LABEL: store_test2: ; CHECK: .functype store_test2 (i32, i32) -> () ; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push2=, g@GOT ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $0, $pop0 -; CHECK-NEXT: global.get $push2=, g@GOT -; CHECK-NEXT: i32.add $push3=, $pop1, $pop2 -; CHECK-NEXT: i32.const $push4=, 40 -; CHECK-NEXT: i32.add $push5=, $pop3, $pop4 -; CHECK-NEXT: i32.store 0($pop5), $1 +; CHECK-NEXT: i32.add $push3=, $pop2, $pop1 +; CHECK-NEXT: i32.store 40($pop3), $1 ; CHECK-NEXT: return %add = add nsw i32 10, %n %arrayidx = getelementptr inbounds [0 x i32], ptr @g, i32 0, i32 %add @@ -501,13 +481,11 @@ ; CHECK-LABEL: store_test6: ; CHECK: .functype store_test6 (i32, i32) -> () ; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push2=, g@GOT ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $0, $pop0 -; CHECK-NEXT: global.get $push2=, g@GOT -; CHECK-NEXT: i32.add $push3=, $pop1, $pop2 -; CHECK-NEXT: i32.const $push4=, 40 -; CHECK-NEXT: i32.add $push5=, $pop3, $pop4 -; CHECK-NEXT: i32.store 0($pop5), $1 +; CHECK-NEXT: i32.add $push3=, $pop2, $pop1 +; CHECK-NEXT: i32.store 40($pop3), $1 ; CHECK-NEXT: return %add = add nsw i32 %n, 10 %add.ptr = getelementptr inbounds [0 x i32], ptr @g, i32 0, i32 %add @@ -535,13 +513,11 @@ ; CHECK-LABEL: store_test8: ; CHECK: .functype store_test8 (i32, i32) -> () ; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push2=, g@GOT ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $0, $pop0 -; CHECK-NEXT: global.get $push2=, g@GOT -; CHECK-NEXT: i32.add $push3=, $pop1, $pop2 -; CHECK-NEXT: i32.const $push4=, 40 -; CHECK-NEXT: i32.add $push5=, $pop3, $pop4 -; CHECK-NEXT: i32.store 0($pop5), $1 +; CHECK-NEXT: i32.add $push3=, $pop2, $pop1 +; CHECK-NEXT: i32.store 40($pop3), $1 ; CHECK-NEXT: return %add = add nsw i32 10, %n %add.ptr = getelementptr inbounds [0 x i32], ptr @g, i32 0, i32 %add @@ -610,10 +586,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $1, $pop0 -; CHECK-NEXT: i32.add $push2=, $pop1, $0 -; CHECK-NEXT: i32.const $push3=, 40 -; CHECK-NEXT: i32.add $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.store 0($pop4), $2 +; CHECK-NEXT: i32.add $push2=, $0, $pop1 +; CHECK-NEXT: i32.store 40($pop2), $2 ; CHECK-NEXT: return %add = add nsw i32 %n, 10 %arrayidx = getelementptr inbounds i32, ptr %p, i32 %add @@ -627,10 +601,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $1, $pop0 -; CHECK-NEXT: i32.add $push2=, $pop1, $0 -; CHECK-NEXT: i32.const $push3=, 40 -; CHECK-NEXT: i32.add $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.store 0($pop4), $2 +; CHECK-NEXT: i32.add $push2=, $0, $pop1 +; CHECK-NEXT: i32.store 40($pop2), $2 ; CHECK-NEXT: return %add = add nsw i32 10, %n %arrayidx = getelementptr inbounds i32, ptr %p, i32 %add @@ -693,10 +665,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $1, $pop0 -; CHECK-NEXT: i32.add $push2=, $pop1, $0 -; CHECK-NEXT: i32.const $push3=, 40 -; CHECK-NEXT: i32.add $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.store 0($pop4), $2 +; CHECK-NEXT: i32.add $push2=, $0, $pop1 +; CHECK-NEXT: i32.store 40($pop2), $2 ; CHECK-NEXT: return %add = add nsw i32 %n, 10 %add.ptr = getelementptr inbounds i32, ptr %p, i32 %add @@ -725,10 +695,8 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const $push0=, 2 ; CHECK-NEXT: i32.shl $push1=, $1, $pop0 -; CHECK-NEXT: i32.add $push2=, $pop1, $0 -; CHECK-NEXT: i32.const $push3=, 40 -; CHECK-NEXT: i32.add $push4=, $pop2, $pop3 -; CHECK-NEXT: i32.store 0($pop4), $2 +; CHECK-NEXT: i32.add $push2=, $0, $pop1 +; CHECK-NEXT: i32.store 40($pop2), $2 ; CHECK-NEXT: return %add = add nsw i32 10, %n %add.ptr = getelementptr inbounds i32, ptr %p, i32 %add Index: llvm/test/CodeGen/WebAssembly/cfg-stackify.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/cfg-stackify.ll +++ llvm/test/CodeGen/WebAssembly/cfg-stackify.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 | FileCheck %s +; RUN: llc < %s -start-after=licm -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 | FileCheck %s ; Test the CFG stackifier pass. Index: llvm/test/CodeGen/WebAssembly/debugtrap.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/debugtrap.ll +++ llvm/test/CodeGen/WebAssembly/debugtrap.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -start-after=licm -asm-verbose=false -verify-machineinstrs | FileCheck %s ; Test lowering of __builtin_debugtrap in cases where lowering it via ; the normal UNREACHABLE instruction would yield invalid Index: llvm/test/CodeGen/WebAssembly/fast-isel-br-i1.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/fast-isel-br-i1.ll +++ llvm/test/CodeGen/WebAssembly/fast-isel-br-i1.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -fast-isel -asm-verbose=false -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -start-after=licm -fast-isel -asm-verbose=false -wasm-keep-registers | FileCheck %s target triple = "wasm32-unknown-unknown" Index: llvm/test/CodeGen/WebAssembly/fpclamptosat.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -108,7 +108,8 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi half %x to i64 @@ -127,7 +128,8 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i64.trunc_sat_f32_u +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui half %x to i64 @@ -140,11 +142,19 @@ define i32 @ustest_f16i32(half %x) { ; CHECK-LABEL: ustest_f16i32: ; CHECK: .functype ustest_f16i32 (f32) -> (i32) +; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64.gt_s +; CHECK-NEXT: i64.select +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi half %x to i64 @@ -357,18 +367,11 @@ define i16 @utesth_f16i16(half %x) { ; CHECK-LABEL: utesth_f16i16: ; CHECK: .functype utesth_f16i16 (f32) -> (i32) -; CHECK-NEXT: .local i32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: i32.lt_u -; CHECK-NEXT: i32.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui half %x to i32 @@ -388,12 +391,6 @@ ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: i32.lt_s -; CHECK-NEXT: i32.select -; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i32.const 0 @@ -664,7 +661,7 @@ define i64 @utesth_f16i64(half %x) { ; CHECK-LABEL: utesth_f16i64: ; CHECK: .functype utesth_f16i64 (f32) -> (i64) -; CHECK-NEXT: .local i32, i64, i64 +; CHECK-NEXT: .local i32, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer ; CHECK-NEXT: i32.const 16 @@ -677,22 +674,13 @@ ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add ; CHECK-NEXT: i64.load 0 ; CHECK-NEXT: local.set 2 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz -; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui half %x to i128 @@ -863,7 +851,8 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi half %x to i64 @@ -880,7 +869,8 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i64.trunc_sat_f32_u +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui half %x to i64 @@ -892,11 +882,19 @@ define i32 @ustest_f16i32_mm(half %x) { ; CHECK-LABEL: ustest_f16i32_mm: ; CHECK: .functype ustest_f16i32_mm (f32) -> (i32) +; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64.gt_s +; CHECK-NEXT: i64.select +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi half %x to i64 @@ -1095,18 +1093,11 @@ define i16 @utesth_f16i16_mm(half %x) { ; CHECK-LABEL: utesth_f16i16_mm: ; CHECK: .functype utesth_f16i16_mm (f32) -> (i32) -; CHECK-NEXT: .local i32 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: i32.lt_u -; CHECK-NEXT: i32.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui half %x to i32 @@ -1125,12 +1116,6 @@ ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 65535 -; CHECK-NEXT: i32.lt_s -; CHECK-NEXT: i32.select -; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i32.const 0 @@ -1403,7 +1388,7 @@ define i64 @utesth_f16i64_mm(half %x) { ; CHECK-LABEL: utesth_f16i64_mm: ; CHECK: .functype utesth_f16i64_mm (f32) -> (i64) -; CHECK-NEXT: .local i32, i64, i64 +; CHECK-NEXT: .local i32, i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer ; CHECK-NEXT: i32.const 16 @@ -1416,27 +1401,13 @@ ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add ; CHECK-NEXT: i64.load 0 ; CHECK-NEXT: local.set 2 ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui half %x to i128 Index: llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -185,20 +185,24 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: # fallthrough-return entry: @@ -222,20 +226,24 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i64.trunc_sat_f32_u +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i64.trunc_sat_f32_u +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i64.trunc_sat_f32_u +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i64.trunc_sat_f32_u +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: # fallthrough-return entry: @@ -249,7 +257,16 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-LABEL: ustest_f16i32: ; CHECK: .functype ustest_f16i32 (f32, f32, f32, f32) -> (v128) +; CHECK-NEXT: .local v128, v128 ; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: local.set 3 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: local.set 2 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 @@ -257,21 +274,31 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u -; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.trunc_sat_f32_u -; CHECK-NEXT: i32x4.replace_lane 1 +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i64x2.replace_lane 1 +; CHECK-NEXT: local.tee 4 +; CHECK-NEXT: v128.const 0, 0 +; CHECK-NEXT: local.tee 5 +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: i64x2.gt_s +; CHECK-NEXT: v128.bitselect ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u -; CHECK-NEXT: i32x4.replace_lane 2 +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u -; CHECK-NEXT: i32x4.replace_lane 3 +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i64x2.replace_lane 1 +; CHECK-NEXT: local.tee 4 +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: i64x2.gt_s +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -560,7 +587,7 @@ ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 ; CHECK-NEXT: local.tee 8 -; CHECK-NEXT: i32x4.min_u +; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat @@ -574,7 +601,7 @@ ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: local.get 8 -; CHECK-NEXT: i32x4.min_u +; CHECK-NEXT: v128.and ; CHECK-NEXT: i16x8.narrow_i32x4_u ; CHECK-NEXT: # fallthrough-return entry: @@ -628,12 +655,12 @@ ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 -; CHECK-NEXT: local.tee 8 -; CHECK-NEXT: i32x4.min_s ; CHECK-NEXT: v128.const 0, 0, 0, 0 -; CHECK-NEXT: local.tee 9 +; CHECK-NEXT: local.tee 8 ; CHECK-NEXT: i32x4.max_s +; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: local.tee 9 +; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat @@ -647,9 +674,9 @@ ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: local.get 8 -; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: local.get 9 ; CHECK-NEXT: i32x4.max_s +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: v128.and ; CHECK-NEXT: i16x8.narrow_i32x4_u ; CHECK-NEXT: # fallthrough-return entry: @@ -1342,7 +1369,7 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-LABEL: utesth_f16i64: ; CHECK: .functype utesth_f16i64 (f32, f32) -> (v128) -; CHECK-NEXT: .local i32, i64, i64, i64, i64 +; CHECK-NEXT: .local i32, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer ; CHECK-NEXT: i32.const 32 @@ -1350,13 +1377,6 @@ ; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.add -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: call __fixunssfti -; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 @@ -1364,37 +1384,20 @@ ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.load 16 -; CHECK-NEXT: local.set 4 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 5 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 6 +; CHECK-NEXT: v128.load64_splat 0 +; CHECK-NEXT: v128.load64_lane 16, 1 +; CHECK-NEXT: local.set 3 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.eqz -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.eqz -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui <2 x half> %x to <2 x i128> @@ -1683,20 +1686,24 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: # fallthrough-return entry: @@ -1718,20 +1725,24 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i64.trunc_sat_f32_u +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i64.trunc_sat_f32_u +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i64.trunc_sat_f32_u +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: i64.trunc_sat_f32_u +; CHECK-NEXT: i32.wrap_i64 ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: # fallthrough-return entry: @@ -1744,7 +1755,16 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-LABEL: ustest_f16i32_mm: ; CHECK: .functype ustest_f16i32_mm (f32, f32, f32, f32) -> (v128) +; CHECK-NEXT: .local v128, v128 ; CHECK-NEXT: # %bb.0: # %entry +; CHECK-NEXT: local.get 3 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: local.set 3 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: local.set 2 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 @@ -1752,21 +1772,31 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u -; CHECK-NEXT: i32x4.splat +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.trunc_sat_f32_u -; CHECK-NEXT: i32x4.replace_lane 1 +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i64x2.replace_lane 1 +; CHECK-NEXT: local.tee 4 +; CHECK-NEXT: v128.const 0, 0 +; CHECK-NEXT: local.tee 5 +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: i64x2.gt_s +; CHECK-NEXT: v128.bitselect ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u -; CHECK-NEXT: i32x4.replace_lane 2 +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i32.trunc_sat_f32_u -; CHECK-NEXT: i32x4.replace_lane 3 +; CHECK-NEXT: i64.trunc_sat_f32_s +; CHECK-NEXT: i64x2.replace_lane 1 +; CHECK-NEXT: local.tee 4 +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: local.get 4 +; CHECK-NEXT: local.get 5 +; CHECK-NEXT: i64x2.gt_s +; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -2041,7 +2071,7 @@ ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 ; CHECK-NEXT: local.tee 8 -; CHECK-NEXT: i32x4.min_u +; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.splat @@ -2055,7 +2085,7 @@ ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: local.get 8 -; CHECK-NEXT: i32x4.min_u +; CHECK-NEXT: v128.and ; CHECK-NEXT: i16x8.narrow_i32x4_u ; CHECK-NEXT: # fallthrough-return entry: @@ -2108,12 +2138,12 @@ ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 -; CHECK-NEXT: local.tee 8 -; CHECK-NEXT: i32x4.min_s ; CHECK-NEXT: v128.const 0, 0, 0, 0 -; CHECK-NEXT: local.tee 9 +; CHECK-NEXT: local.tee 8 ; CHECK-NEXT: i32x4.max_s +; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: local.tee 9 +; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 4 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.splat @@ -2127,9 +2157,9 @@ ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: local.get 8 -; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: local.get 9 ; CHECK-NEXT: i32x4.max_s +; CHECK-NEXT: local.get 9 +; CHECK-NEXT: v128.and ; CHECK-NEXT: i16x8.narrow_i32x4_u ; CHECK-NEXT: # fallthrough-return entry: @@ -2876,7 +2906,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: utesth_f16i64_mm: ; CHECK: .functype utesth_f16i64_mm (f32, f32) -> (v128) -; CHECK-NEXT: .local i32, i64, i64, i64, i64 +; CHECK-NEXT: .local i32, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: global.get __stack_pointer ; CHECK-NEXT: i32.const 32 @@ -2884,13 +2914,6 @@ ; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: global.set __stack_pointer ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.add -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: call __fixunssfti -; CHECK-NEXT: local.get 2 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 @@ -2898,47 +2921,20 @@ ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.load 16 -; CHECK-NEXT: local.set 4 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: call __fixunssfti ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 5 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 6 +; CHECK-NEXT: v128.load64_splat 0 +; CHECK-NEXT: v128.load64_lane 16, 1 +; CHECK-NEXT: local.set 3 ; CHECK-NEXT: local.get 2 ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i32.add ; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.eqz -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.eqz -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select -; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptoui <2 x half> %x to <2 x i128> Index: llvm/test/CodeGen/WebAssembly/function-addr-offset.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/function-addr-offset.ll +++ llvm/test/CodeGen/WebAssembly/function-addr-offset.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -start-after=licm -verify-machineinstrs | FileCheck %s ; Wasm does not currently support function addresses with offsets, so we ; shouldn't try to create a folded SDNode like (function + offset). This is a Index: llvm/test/CodeGen/WebAssembly/implicit-def.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/implicit-def.ll +++ llvm/test/CodeGen/WebAssembly/implicit-def.ll @@ -1,4 +1,4 @@ -; RUN: llc -o - %s -asm-verbose=false -wasm-keep-registers -disable-wasm-fallthrough-return-opt -mattr=+simd128 | FileCheck %s +; RUN: llc -o - %s -start-after=licm -asm-verbose=false -wasm-keep-registers -disable-wasm-fallthrough-return-opt -mattr=+simd128 | FileCheck %s target triple = "wasm32-unknown-unknown" Index: llvm/test/CodeGen/WebAssembly/ir-locals.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/ir-locals.ll +++ llvm/test/CodeGen/WebAssembly/ir-locals.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false | FileCheck %s +; RUN: llc < %s -start-after=licm --mtriple=wasm32-unknown-unknown -asm-verbose=false | FileCheck %s %i32_cell = type ptr addrspace(1) %i64_cell = type ptr addrspace(1) Index: llvm/test/CodeGen/WebAssembly/libcalls.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/libcalls.ll +++ llvm/test/CodeGen/WebAssembly/libcalls.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -start-after=early-cse -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck %s ; Test a subset of compiler-rt/libm libcalls expected to be emitted by the wasm backend Index: llvm/test/CodeGen/WebAssembly/load-store-pic.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/load-store-pic.ll +++ llvm/test/CodeGen/WebAssembly/load-store-pic.ll @@ -35,9 +35,7 @@ ; PIC: global.get $push[[L0:[0-9]+]]=, __memory_base{{$}} ; PIC-NEXT: [[PTR]].const $push[[L1:[0-9]+]]=, hidden_global_array@MBREL{{$}} ; PIC-NEXT: [[PTR]].add $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1:[0-9]+]]{{$}} -; PIC-NEXT: [[PTR]].const $push[[L3:[0-9]+]]=, 20{{$}} -; PIC-NEXT: [[PTR]].add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; PIC-NEXT: i32.load $push{{[0-9]+}}=, 0($pop[[L4]]){{$}} +; PIC-NEXT: i32.load $push{{[0-9]+}}=, 20($pop[[L2]]){{$}} ; CHECK-NEXT: end_function %1 = getelementptr [10 x i32], ptr @hidden_global_array, i32 0, i32 5 @@ -50,10 +48,10 @@ define void @store_hidden_global(i32 %n) { ; CHECK-LABEL: store_hidden_global: ; PIC: global.get $push[[L0:[0-9]+]]=, __memory_base{{$}} -; PIC-NEXT: [[PTR]].const $push[[L1:[0-9]+]]=, hidden_global@MBREL{{$}} +; PIC-NEXT: [[PTR]].const $push[[L1:[0-9]+]]=, hidden_global@MBREL ; PIC-NEXT: [[PTR]].add $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} ; PIC-NEXT: i32.store 0($pop[[L2]]), $0{{$}} -; CHECK-NEXT: end_function +; CHECK-NEXT: end_function store i32 %n, ptr @hidden_global ret void @@ -64,9 +62,7 @@ ; PIC: global.get $push[[L0:[0-9]+]]=, __memory_base{{$}} ; PIC-NEXT: [[PTR]].const $push[[L1:[0-9]+]]=, hidden_global_array@MBREL{{$}} ; PIC-NEXT: [[PTR]].add $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; PIC-NEXT: [[PTR]].const $push[[L3:[0-9]+]]=, 20{{$}} -; PIC-NEXT: [[PTR]].add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}} -; PIC-NEXT: i32.store 0($pop[[L4]]), $0{{$}} +; PIC-NEXT: i32.store 20($pop[[L2]]), $0{{$}} ; CHECK-NEXT: end_function @@ -93,9 +89,7 @@ define i32 @load_external_global_offset() { ; CHECK-LABEL: load_external_global_offset: ; PIC: global.get $push[[L0:[0-9]+]]=, external_global_array@GOT{{$}} -; PIC-NEXT: [[PTR]].const $push[[L1:[0-9]+]]=, 20{{$}} -; PIC-NEXT: [[PTR]].add $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; PIC-NEXT: i32.load $push{{[0-9]+}}=, 0($pop[[L2]]){{$}} +; PIC-NEXT: i32.load $push{{[0-9]+}}=, 20($pop[[L0]]){{$}} ; CHECK-NEXT: end_function @@ -120,9 +114,7 @@ define void @store_external_global_offset(i32 %n) { ; CHECK-LABEL: store_external_global_offset: ; PIC: global.get $push[[L0:[0-9]+]]=, external_global_array@GOT{{$}} -; PIC-NEXT: [[PTR]].const $push[[L1:[0-9]+]]=, 20{{$}} -; PIC-NEXT: [[PTR]].add $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]]{{$}} -; PIC-NEXT: i32.store 0($pop[[L2]]), $0{{$}} +; PIC-NEXT: i32.store 20($pop[[L0]]), $0{{$}} ; CHECK-NEXT: end_function Index: llvm/test/CodeGen/WebAssembly/pr59626.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/pr59626.ll +++ llvm/test/CodeGen/WebAssembly/pr59626.ll @@ -13,16 +13,16 @@ ; CHECK-32-NEXT: i32.const 0 ; CHECK-32-NEXT: i32.store16 0 ; CHECK-32-NEXT: local.get 1 -; CHECK-32-NEXT: local.get 0 -; CHECK-32-NEXT: i8x16.splat -; CHECK-32-NEXT: v128.store16_lane 0, 0 -; CHECK-32-NEXT: v128.const 0, 0 -; CHECK-32-NEXT: i32x4.extract_lane 0 +; CHECK-32-NEXT: i32.const 5 +; CHECK-32-NEXT: i32.store8 2 +; CHECK-32-NEXT: local.get 1 +; CHECK-32-NEXT: i32.const 769 +; CHECK-32-NEXT: i32.store16 0 +; CHECK-32-NEXT: i32.const 1 ; CHECK-32-NEXT: # fallthrough-return ; ; CHECK-64-LABEL: f: ; CHECK-64: .functype f (i64, i64) -> (i32) -; CHECK-64-NEXT: .local i32 ; CHECK-64-NEXT: # %bb.0: # %BB ; CHECK-64-NEXT: local.get 0 ; CHECK-64-NEXT: i32.const 0 @@ -31,12 +31,12 @@ ; CHECK-64-NEXT: i32.const 0 ; CHECK-64-NEXT: i32.store16 0 ; CHECK-64-NEXT: local.get 1 -; CHECK-64-NEXT: local.get 2 -; CHECK-64-NEXT: i8x16.splat -; CHECK-64-NEXT: v128.store16_lane 0, 0 -; CHECK-64-NEXT: drop -; CHECK-64-NEXT: v128.const 0, 0 -; CHECK-64-NEXT: i32x4.extract_lane 0 +; CHECK-64-NEXT: i32.const 5 +; CHECK-64-NEXT: i32.store8 2 +; CHECK-64-NEXT: local.get 1 +; CHECK-64-NEXT: i32.const 769 +; CHECK-64-NEXT: i32.store16 0 +; CHECK-64-NEXT: i32.const 1 ; CHECK-64-NEXT: # fallthrough-return BB: store <3 x i8> zeroinitializer, ptr %0 Index: llvm/test/CodeGen/WebAssembly/reg-stackify.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/reg-stackify.ll +++ llvm/test/CodeGen/WebAssembly/reg-stackify.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -verify-machineinstrs | FileCheck %s --check-prefix=NOREGS +; RUN: llc < %s -asm-verbose=false -start-after=licm -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -asm-verbose=false -start-after=licm -disable-wasm-fallthrough-return-opt -verify-machineinstrs | FileCheck %s --check-prefix=NOREGS ; Test the register stackifier pass. Index: llvm/test/CodeGen/WebAssembly/simd-comparisons.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/simd-comparisons.ll +++ llvm/test/CodeGen/WebAssembly/simd-comparisons.ll @@ -1100,10 +1100,8 @@ ; CHECK-LABEL: compare_ord_nnan_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype compare_ord_nnan_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.eq $push[[T0:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f32x4.eq $push[[T1:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} +; SIMD128-NEXT: v128.const $push0=, 1, 1, 1, 1 +; SIMD128-NEXT: return $pop0 define <4 x i1> @compare_ord_nnan_v4f32 (<4 x float> %x, <4 x float> %y) { %res = fcmp nnan ord <4 x float> %x, %y ret <4 x i1> %res @@ -1125,10 +1123,8 @@ ; CHECK-LABEL: compare_sext_ord_nnan_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype compare_sext_ord_nnan_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.eq $push[[T0:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f32x4.eq $push[[T1:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} +; SIMD128-NEXT: v128.const $push0=, -1, -1, -1, -1 +; SIMD128-NEXT: return $pop0 define <4 x i32> @compare_sext_ord_nnan_v4f32 (<4 x float> %x, <4 x float> %y) { %cmp = fcmp nnan ord <4 x float> %x, %y %res = sext <4 x i1> %cmp to <4 x i32> @@ -1416,10 +1412,8 @@ ; CHECK-LABEL: compare_uno_nnan_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype compare_uno_nnan_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.ne $push[[T0:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f32x4.ne $push[[T1:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} +; SIMD128-NEXT: v128.const $push0=, 0, 0, 0, 0 +; SIMD128-NEXT: return $pop0 define <4 x i1> @compare_uno_nnan_v4f32 (<4 x float> %x, <4 x float> %y) { %res = fcmp nnan uno <4 x float> %x, %y ret <4 x i1> %res @@ -1441,10 +1435,8 @@ ; CHECK-LABEL: compare_sext_uno_nnan_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype compare_sext_uno_nnan_v4f32 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f32x4.ne $push[[T0:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f32x4.ne $push[[T1:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} +; SIMD128-NEXT: v128.const $push0=, 0, 0, 0, 0 +; SIMD128-NEXT: return $pop0 define <4 x i32> @compare_sext_uno_nnan_v4f32 (<4 x float> %x, <4 x float> %y) { %cmp = fcmp nnan uno <4 x float> %x, %y %res = sext <4 x i1> %cmp to <4 x i32> @@ -1722,10 +1714,8 @@ ; CHECK-LABEL: compare_ord_nnan_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype compare_ord_nnan_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.eq $push[[T0:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f64x2.eq $push[[T1:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} +; SIMD128-NEXT: v128.const $push0=, 1, 1 +; SIMD128-NEXT: return $pop0 define <2 x i1> @compare_ord_nnan_v2f64 (<2 x double> %x, <2 x double> %y) { %res = fcmp nnan ord <2 x double> %x, %y ret <2 x i1> %res @@ -1747,10 +1737,8 @@ ; CHECK-LABEL: compare_sext_ord_nnan_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype compare_sext_ord_nnan_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.eq $push[[T0:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f64x2.eq $push[[T1:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.and $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} +; SIMD128-NEXT: v128.const $push0=, -1, -1 +; SIMD128-NEXT: return $pop0 define <2 x i64> @compare_sext_ord_nnan_v2f64 (<2 x double> %x, <2 x double> %y) { %cmp = fcmp nnan ord <2 x double> %x, %y %res = sext <2 x i1> %cmp to <2 x i64> @@ -2038,10 +2026,8 @@ ; CHECK-LABEL: compare_uno_nnan_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype compare_uno_nnan_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.ne $push[[T0:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f64x2.ne $push[[T1:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} +; SIMD128-NEXT: v128.const $push0=, 0, 0 +; SIMD128-NEXT: return $pop0 define <2 x i1> @compare_uno_nnan_v2f64 (<2 x double> %x, <2 x double> %y) { %res = fcmp nnan uno <2 x double> %x, %y ret <2 x i1> %res @@ -2063,10 +2049,8 @@ ; CHECK-LABEL: compare_sext_uno_nnan_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype compare_sext_uno_nnan_v2f64 (v128, v128) -> (v128){{$}} -; SIMD128-NEXT: f64x2.ne $push[[T0:[0-9]+]]=, $0, $0{{$}} -; SIMD128-NEXT: f64x2.ne $push[[T1:[0-9]+]]=, $1, $1{{$}} -; SIMD128-NEXT: v128.or $push[[R:[0-9]+]]=, $pop[[T0]], $pop[[T1]]{{$}} -; SIMD128-NEXT: return $pop[[R]]{{$}} +; SIMD128-NEXT: v128.const $push0=, 0, 0 +; SIMD128-NEXT: return $pop0 define <2 x i64> @compare_sext_uno_nnan_v2f64 (<2 x double> %x, <2 x double> %y) { %cmp = fcmp nnan uno <2 x double> %x, %y %res = sext <2 x i1> %cmp to <2 x i64> Index: llvm/test/CodeGen/WebAssembly/simd.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/simd.ll +++ llvm/test/CodeGen/WebAssembly/simd.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128,+sign-ext | FileCheck %s --check-prefixes CHECK,SIMD128 -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefixes CHECK,NO-SIMD128 +; RUN: llc < %s -start-after=early-cse -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128,+sign-ext | FileCheck %s --check-prefixes CHECK,SIMD128 +; RUN: llc < %s -start-after=early-cse -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefixes CHECK,NO-SIMD128 ; Test that basic SIMD128 vector manipulation operations assemble as expected. Index: llvm/test/CodeGen/WebAssembly/userstack.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/userstack.ll +++ llvm/test/CodeGen/WebAssembly/userstack.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=32 %s -; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=64 %s +; RUN: llc < %s -start-after=early-cse --mtriple=wasm32-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=32 %s +; RUN: llc < %s -start-after=early-cse --mtriple=wasm64-unknown-unknown -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers | FileCheck -DPTR=64 %s declare void @ext_func(ptr %ptr) declare void @ext_func_i32(ptr %ptr) Index: llvm/test/MC/WebAssembly/stack-ptr-mclower.ll =================================================================== --- llvm/test/MC/WebAssembly/stack-ptr-mclower.ll +++ llvm/test/MC/WebAssembly/stack-ptr-mclower.ll @@ -1,5 +1,5 @@ -; RUN: llc --mtriple=wasm32-unknown-unknown -asm-verbose=false %s -o - | FileCheck --check-prefixes CHECK -DPTR=i32 %s -; RUN: llc --mtriple=wasm64-unknown-unknown -asm-verbose=false %s -o - | FileCheck --check-prefixes CHECK -DPTR=i64 %s +; RUN: llc -start-after=early-cse --mtriple=wasm32-unknown-unknown -asm-verbose=false %s -o - | FileCheck --check-prefixes CHECK -DPTR=i32 %s +; RUN: llc -start-after=early-cse --mtriple=wasm64-unknown-unknown -asm-verbose=false %s -o - | FileCheck --check-prefixes CHECK -DPTR=i64 %s ; empty function that does not use stack, to check WebAssemblyMCLowerPrePass ; works correctly. Index: llvm/test/MC/WebAssembly/stack-ptr.ll =================================================================== --- llvm/test/MC/WebAssembly/stack-ptr.ll +++ llvm/test/MC/WebAssembly/stack-ptr.ll @@ -1,5 +1,5 @@ -; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o - | obj2yaml | FileCheck --check-prefixes CHECK,CHK32 %s -; RUN: llc --mtriple=wasm64-unknown-unknown -filetype=obj %s -o - | obj2yaml | FileCheck --check-prefixes CHECK,CHK64 %s +; RUN: llc -start-after=early-cse --mtriple=wasm32-unknown-unknown -filetype=obj %s -o - | obj2yaml | FileCheck --check-prefixes CHECK,CHK32 %s +; RUN: llc -start-after=early-cse --mtriple=wasm64-unknown-unknown -filetype=obj %s -o - | obj2yaml | FileCheck --check-prefixes CHECK,CHK64 %s ; Function that uses explict stack, and should generate a reference to ; __stack_pointer, along with the corresponding reloction entry. Index: llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_generated_funcs.ll =================================================================== --- llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_generated_funcs.ll +++ llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_generated_funcs.ll @@ -1,4 +1,4 @@ -; RUN: llc -enable-machine-outliner -mtriple=wasm32-unknown-linux < %s | FileCheck %s +; RUN: llc -O1 -enable-machine-outliner -mtriple=wasm32-unknown-linux < %s | FileCheck %s ; NOTE: Machine outliner doesn't run. @x = global i32 0, align 4 Index: llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_generated_funcs.ll.generated.expected =================================================================== --- llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_generated_funcs.ll.generated.expected +++ llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_generated_funcs.ll.generated.expected @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --include-generated-funcs -; RUN: llc -enable-machine-outliner -mtriple=wasm32-unknown-linux < %s | FileCheck %s +; RUN: llc -O1 -enable-machine-outliner -mtriple=wasm32-unknown-linux < %s | FileCheck %s ; NOTE: Machine outliner doesn't run. @x = global i32 0, align 4 Index: llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_generated_funcs.ll.nogenerated.expected =================================================================== --- llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_generated_funcs.ll.nogenerated.expected +++ llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/wasm_generated_funcs.ll.nogenerated.expected @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -enable-machine-outliner -mtriple=wasm32-unknown-linux < %s | FileCheck %s +; RUN: llc -O1 -enable-machine-outliner -mtriple=wasm32-unknown-linux < %s | FileCheck %s ; NOTE: Machine outliner doesn't run. @x = global i32 0, align 4