diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -297,9 +297,26 @@ SDValue &Addr) { assert(N.getNumOperands() == 2 && "Attempting to fold in a non-binary op"); - // WebAssembly constant offsets are performed as unsigned with infinite - // precision, so we need to check for NoUnsignedWrap so that we don't fold an - // offset for an add that needs wrapping. + // Fold target global addresses in an add into the offset. + if (!TM.isPositionIndependent()) { + for (size_t i = 0; i < 2; ++i) { + SDValue Op = N.getOperand(i); + SDValue OtherOp = N.getOperand(i == 0 ? 1 : 0); + + if (Op.getOpcode() == WebAssemblyISD::Wrapper) + Op = Op.getOperand(0); + + if (Op.getOpcode() == ISD::TargetGlobalAddress) { + Offset = Op; + Addr = OtherOp; + return true; + } + } + } + + // WebAssembly constant offsets are performed as unsigned with + // infinite precision, so we need to check for NoUnsignedWrap so + // that we don't fold an offset for an add that needs wrapping. if (N.getOpcode() == ISD::ADD && !N.getNode()->getFlags().hasNoUnsignedWrap()) return false; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -75,14 +75,19 @@ if (AddrOperandNum == FIOperandNum) { unsigned OffsetOperandNum = WebAssembly::getNamedOperandIdx( MI.getOpcode(), WebAssembly::OpName::off); - assert(FrameOffset >= 0 && MI.getOperand(OffsetOperandNum).getImm() >= 0); - int64_t Offset = MI.getOperand(OffsetOperandNum).getImm() + FrameOffset; - - if (static_cast(Offset) <= std::numeric_limits::max()) { - MI.getOperand(OffsetOperandNum).setImm(Offset); - MI.getOperand(FIOperandNum) - .ChangeToRegister(FrameRegister, /*isDef=*/false); - return false; + auto &OffsetOp = MI.getOperand(OffsetOperandNum); + // Don't fold offset in if offset is a global address to be resolved later + if (OffsetOp.isImm()) { + assert(FrameOffset >= 0 && OffsetOp.getImm() >= 0); + int64_t Offset = OffsetOp.getImm() + FrameOffset; + + if (static_cast(Offset) <= + std::numeric_limits::max()) { + OffsetOp.setImm(Offset); + MI.getOperand(FIOperandNum) + .ChangeToRegister(FrameRegister, /*isDef=*/false); + return false; + } } } diff --git a/llvm/test/CodeGen/WebAssembly/offset.ll b/llvm/test/CodeGen/WebAssembly/offset.ll --- a/llvm/test/CodeGen/WebAssembly/offset.ll +++ b/llvm/test/CodeGen/WebAssembly/offset.ll @@ -666,3 +666,29 @@ define {i64,i32,i16,i8} @aggregate_return_without_merge() { ret {i64,i32,i16,i8} zeroinitializer } + +;===---------------------------------------------------------------------------- +; Global address loads +;===---------------------------------------------------------------------------- + +@global_i32 = external global i32 +@global_i8 = external global i8 + +; CHECK-LABEL: load_i32_global_address_with_folded_offset: +; CHECK: i32.const $push0=, 2 +; CHECK: i32.shl $push1=, $0, $pop0 +; CHECK: i32.load $push2=, global_i32($pop1) +define i32 @load_i32_global_address_with_folded_offset(i32 %n) { + %s = getelementptr inbounds i32, i32* @global_i32, i32 %n + %t = load i32, i32* %s + ret i32 %t +} + +; CHECK-LABEL: load_i8_i32s_global_address_with_folded_offset: +; CHECK: i32.load8_s $push0=, global_i8($0) +define i32 @load_i8_i32s_global_address_with_folded_offset(i32 %n) { + %s = getelementptr inbounds i8, i8* @global_i8, i32 %n + %t = load i8, i8* %s + %u = sext i8 %t to i32 + ret i32 %u +} diff --git a/llvm/test/CodeGen/WebAssembly/userstack.ll b/llvm/test/CodeGen/WebAssembly/userstack.ll --- a/llvm/test/CodeGen/WebAssembly/userstack.ll +++ b/llvm/test/CodeGen/WebAssembly/userstack.ll @@ -329,9 +329,10 @@ ; We optimize the format of "frame offset + operand" by folding it, but this is ; only possible when that operand is an immediate. In this example it is a -; global address, so we should not fold it. +; global address, so we should fold the global address into the offset, but not +; the frame offset. ; CHECK-LABEL: frame_offset_with_global_address -; CHECK: i[[PTR]].const ${{.*}}=, str +; CHECK: i32.load8_u ${{.*}}=, str({{.*}}) @str = local_unnamed_addr global [3 x i8] c"abc", align 16 define i8 @frame_offset_with_global_address() { %1 = alloca i8, align 4