diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -297,6 +297,23 @@ SDValue &Addr) { assert(N.getNumOperands() == 2 && "Attempting to fold in a non-binary op"); + // Fold target global addresses in an add into the offset. + if (!TM.isPositionIndependent()) { + for (size_t i = 0; i < 2; ++i) { + SDValue Op = N.getOperand(i); + SDValue OtherOp = N.getOperand(i == 0 ? 1 : 0); + + if (Op.getOpcode() == WebAssemblyISD::Wrapper) + Op = Op.getOperand(0); + + if (Op.getOpcode() == ISD::TargetGlobalAddress) { + Offset = Op; + Addr = OtherOp; + return true; + } + } + } + // WebAssembly constant offsets are performed as unsigned with infinite // precision, so we need to check for NoUnsignedWrap so that we don't fold an // offset for an add that needs wrapping. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -75,14 +75,19 @@ if (AddrOperandNum == FIOperandNum) { unsigned OffsetOperandNum = WebAssembly::getNamedOperandIdx( MI.getOpcode(), WebAssembly::OpName::off); - assert(FrameOffset >= 0 && MI.getOperand(OffsetOperandNum).getImm() >= 0); - int64_t Offset = MI.getOperand(OffsetOperandNum).getImm() + FrameOffset; - - if (static_cast(Offset) <= std::numeric_limits::max()) { - MI.getOperand(OffsetOperandNum).setImm(Offset); - MI.getOperand(FIOperandNum) - .ChangeToRegister(FrameRegister, /*isDef=*/false); - return false; + auto &OffsetOp = MI.getOperand(OffsetOperandNum); + // Don't fold offset in if offset is a global address to be resolved later + if (OffsetOp.isImm()) { + assert(FrameOffset >= 0 && OffsetOp.getImm() >= 0); + int64_t Offset = OffsetOp.getImm() + FrameOffset; + + if (static_cast(Offset) <= + std::numeric_limits::max()) { + OffsetOp.setImm(Offset); + MI.getOperand(FIOperandNum) + .ChangeToRegister(FrameRegister, /*isDef=*/false); + return false; + } } } diff --git a/llvm/test/CodeGen/WebAssembly/eh-lsda.ll b/llvm/test/CodeGen/WebAssembly/eh-lsda.ll --- a/llvm/test/CodeGen/WebAssembly/eh-lsda.ll +++ b/llvm/test/CodeGen/WebAssembly/eh-lsda.ll @@ -66,9 +66,9 @@ ; CHECK-LABEL: test1: ; In static linking, we load GCC_except_table as a constant directly. -; NOPIC: i[[PTR]].const $push[[CONTEXT:.*]]=, __wasm_lpad_context +; NOPIC: i[[PTR]].const $push[[X:.*]]=, {{[48]}} ; NOPIC-NEXT: i[[PTR]].const $push[[EXCEPT_TABLE:.*]]=, GCC_except_table1 -; NOPIC-NEXT: i[[PTR]].store {{[48]}}($pop[[CONTEXT]]), $pop[[EXCEPT_TABLE]] +; NOPIC-NEXT: i[[PTR]].store __wasm_lpad_context($pop[[X]]), $pop[[EXCEPT_TABLE]] ; In case of PIC, we make GCC_except_table symbols a relative on based on ; __memory_base. diff --git a/llvm/test/CodeGen/WebAssembly/exception.ll b/llvm/test/CodeGen/WebAssembly/exception.ll --- a/llvm/test/CodeGen/WebAssembly/exception.ll +++ b/llvm/test/CodeGen/WebAssembly/exception.ll @@ -34,7 +34,7 @@ ; CHECK: call foo ; CHECK: catch $[[EXN:[0-9]+]]=, __cpp_exception ; CHECK: global.set __stack_pointer -; CHECK: i32.{{store|const}} {{.*}} __wasm_lpad_context +; CHECK: i32.{{store|const}} __wasm_lpad_context({{.*}}) ; CHECK: call $drop=, _Unwind_CallPersonality, $[[EXN]] ; CHECK: block ; CHECK: br_if 0 diff --git a/llvm/test/CodeGen/WebAssembly/negative-base-reg.ll b/llvm/test/CodeGen/WebAssembly/negative-base-reg.ll --- a/llvm/test/CodeGen/WebAssembly/negative-base-reg.ll +++ b/llvm/test/CodeGen/WebAssembly/negative-base-reg.ll @@ -17,9 +17,9 @@ for.body: ; preds = %for.body, %entry %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ] ; The offset should not be folded into the store. -; CHECK: i32.const $push{{[0-9]+}}=, args+128 -; CHECK: i32.add $push[[L1:[0-9]+]]=, -; CHECK: i32.store 0($pop[[L1]]) +; CHECK: local.get $push[[L1:[0-9]+]]=, 0 +; CHECK-NEXT: i32.const $push[[V:[0-9]+]]=, 1 +; CHECK-NEXT: i32.store args+128($pop[[L1]]), $pop[[V]] %arrayidx = getelementptr inbounds [32 x i32], ptr @args, i32 0, i32 %i.04 store i32 1, ptr %arrayidx, align 4, !tbaa !1 %inc = add nuw nsw i32 %i.04, 1 diff --git a/llvm/test/CodeGen/WebAssembly/offset.ll b/llvm/test/CodeGen/WebAssembly/offset.ll --- a/llvm/test/CodeGen/WebAssembly/offset.ll +++ b/llvm/test/CodeGen/WebAssembly/offset.ll @@ -666,3 +666,29 @@ define {i64,i32,i16,i8} @aggregate_return_without_merge() { ret {i64,i32,i16,i8} zeroinitializer } + +;===---------------------------------------------------------------------------- +; Global address loads +;===---------------------------------------------------------------------------- + +@global_i32 = external global i32 +@global_i8 = external global i8 + +; CHECK-LABEL: load_i32_global_address_with_folded_offset: +; CHECK: i32.const $push0=, 2 +; CHECK: i32.shl $push1=, $0, $pop0 +; CHECK: i32.load $push2=, global_i32($pop1) +define i32 @load_i32_global_address_with_folded_offset(i32 %n) { + %s = getelementptr inbounds i32, i32* @global_i32, i32 %n + %t = load i32, i32* %s + ret i32 %t +} + +; CHECK-LABEL: load_i8_i32s_global_address_with_folded_offset: +; CHECK: i32.load8_s $push0=, global_i8($0) +define i32 @load_i8_i32s_global_address_with_folded_offset(i32 %n) { + %s = getelementptr inbounds i8, i8* @global_i8, i32 %n + %t = load i8, i8* %s + %u = sext i8 %t to i32 + ret i32 %u +} diff --git a/llvm/test/CodeGen/WebAssembly/userstack.ll b/llvm/test/CodeGen/WebAssembly/userstack.ll --- a/llvm/test/CodeGen/WebAssembly/userstack.ll +++ b/llvm/test/CodeGen/WebAssembly/userstack.ll @@ -329,9 +329,10 @@ ; We optimize the format of "frame offset + operand" by folding it, but this is ; only possible when that operand is an immediate. In this example it is a -; global address, so we should not fold it. +; global address, so we should fold the global address into the offset, but not +; the frame offset. ; CHECK-LABEL: frame_offset_with_global_address -; CHECK: i[[PTR]].const ${{.*}}=, str +; CHECK: i32.load8_u ${{.*}}=, str({{.*}}) @str = local_unnamed_addr global [3 x i8] c"abc", align 16 define i8 @frame_offset_with_global_address() { %1 = alloca i8, align 4