Index: llvm/lib/Target/WebAssembly/CMakeLists.txt =================================================================== --- llvm/lib/Target/WebAssembly/CMakeLists.txt +++ llvm/lib/Target/WebAssembly/CMakeLists.txt @@ -40,6 +40,7 @@ WebAssemblyMCInstLower.cpp WebAssemblyMCLowerPrePass.cpp WebAssemblyNullifyDebugValueLists.cpp + WebAssemblyOptimizeGEPs.cpp WebAssemblyOptimizeLiveIntervals.cpp WebAssemblyOptimizeReturned.cpp WebAssemblyPeephole.cpp Index: llvm/lib/Target/WebAssembly/WebAssembly.h =================================================================== --- llvm/lib/Target/WebAssembly/WebAssembly.h +++ llvm/lib/Target/WebAssembly/WebAssembly.h @@ -23,6 +23,7 @@ class WebAssemblyTargetMachine; class ModulePass; class FunctionPass; +class Pass; // LLVM IR passes. ModulePass *createWebAssemblyLowerEmscriptenEHSjLj(); @@ -30,6 +31,7 @@ ModulePass *createWebAssemblyFixFunctionBitcasts(); FunctionPass *createWebAssemblyOptimizeReturned(); FunctionPass *createWebAssemblyLowerRefTypesIntPtrConv(); +Pass *createWebAssemblyOptimizeGEPs(); // ISel and immediate followup passes. FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, @@ -82,6 +84,7 @@ void initializeWebAssemblyPeepholePass(PassRegistry &); void initializeWebAssemblyMCLowerPrePassPass(PassRegistry &); void initializeWebAssemblyLowerRefTypesIntPtrConvPass(PassRegistry &); +void initializeWebAssemblyOptimizeGEPsPass(PassRegistry &); namespace WebAssembly { enum TargetIndex { Index: llvm/lib/Target/WebAssembly/WebAssemblyOptimizeGEPs.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/WebAssembly/WebAssemblyOptimizeGEPs.cpp @@ -0,0 +1,278 @@ +//===--- WebAssemblyOptimizeGEPs.cpp - GetElementPtr index processing ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Optimize inbounds GetElemenPtr instructions so that LoopStrengthReduce and +/// ScalarEvolutionExpander don't make modifications to the IR which loose the +/// inbounds information. +/// +/// To use immediate address offsets, the add operations need to be marked as +/// nuw due to WebAssemblys infinite precision address offset calculation. +/// Unfortunately, the inbounds information is easily lost when converting +/// between IR -> SCEV -> IR during LSR. This pass finds inbounds GEPs, with the +/// form: (getelementptr %base, (or %reg_offset, %constant)) which is what is +/// generated after loop unrolling + instcombine. The GEPs are then converted +/// to use a constant index and a shared base pointer which is calculated using +/// raw pointer arithmetic. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "WebAssemblySubtarget.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" + + +#define DEBUG_TYPE "wasm-optimize-geps" +#define PASS_DESC "Optimize inner-loop GEPs for Webassembly" + +using namespace llvm; + +namespace { + +using BaseOffsetPair = std::pair; + +class RebaseCandidate { +public: + RebaseCandidate(GetElementPtrInst *GEP, ConstantInt *ImmOffset) : + GEP(GEP), ImmOffset(ImmOffset) { } + + void update(Value *NewBasePtr) { + assert(NewBasePtr->getType()->isPointerTy() && "Expected PointerTy"); + GEP->setOperand(0, NewBasePtr); + GEP->setOperand(1, ImmOffset); + } + + GetElementPtrInst* getGEP() const { + return GEP; + } + +#ifndef NDEBUG + void dump() { + LLVM_DEBUG(dbgs() << "Candidate:\n" + << " ImmOffset: " << *ImmOffset << "\n" + << " Base: " << *GEP->getPointerOperand() << "\n" + << " GEP: " << *GEP << "\n"); + } +#endif + +private: + GetElementPtrInst *GEP; + ConstantInt *ImmOffset; +}; + +class WebAssemblyOptimizeGEPs : public LoopPass { +public: + static char ID; + + WebAssemblyOptimizeGEPs() : LoopPass(ID) { } + +private: + StringRef getPassName() const override { + return PASS_DESC; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + + bool run(Loop *L, const DominatorTree &DT); + void addCandidate(GetElementPtrInst *GEP, Value *RegOffset, + ConstantInt *ImmOffset); + // Create a new base address using pointer casting and add nuw. This should + // prevent LSR from messing with our inbound geps. + Instruction* createNewBaseAddr(unsigned Key, Loop *L, const DominatorTree &DT); + + SmallVector Keys; + DenseMap> Candidates; + DenseMap BaseOffsetPairs; +}; + +unsigned getBaseOffsetHash(Value *Base, Value *Offset) { + return llvm::hash_combine(llvm::hash_value(Base), llvm::hash_value(Offset)); +} + +} // end anonymous namespace + +char WebAssemblyOptimizeGEPs::ID = 0; +INITIALIZE_PASS_BEGIN(WebAssemblyOptimizeGEPs, DEBUG_TYPE, PASS_DESC, + false, false) +INITIALIZE_PASS_END(WebAssemblyOptimizeGEPs, DEBUG_TYPE, PASS_DESC, + false, false) + +Pass *llvm::createWebAssemblyOptimizeGEPs() { + return new WebAssemblyOptimizeGEPs(); +} + +bool WebAssemblyOptimizeGEPs::runOnLoop(Loop *L, LPPassManager&) { + if (skipLoop(L)) + return false; + + if (!L->getLoopPreheader()) + return false; + + const WebAssemblySubtarget &ST = getAnalysis() + .getTM() + .getSubtarget(*(L->getLoopPreheader()->getParent())); + + // Addresses are currently hardcoded to use i32. + if (ST.hasAddr64()) { + LLVM_DEBUG(dbgs() << "WasmOptGep: Currently only supporting wasm32.\n"); + return false; + } + + // LSR only operates on the inner most loops. + if (!L->isInnermost()) { + LLVM_DEBUG(dbgs() << "WasmOptGep: Not inner most loop.\n"); + return false; + } + + if (L->getNumBlocks() > 1) { + LLVM_DEBUG(dbgs() << "WasmOptGep: Only handling single-block loops.\n"); + return false; + } + + Candidates.clear(); + BaseOffsetPairs.clear(); + Keys.clear(); + + auto &DT = getAnalysis().getDomTree(); + return run(L, DT); +} + +void WebAssemblyOptimizeGEPs::addCandidate(GetElementPtrInst *GEP, Value *RegOffset, + ConstantInt *ImmOffset) { + Value *BasePtr = GEP->getPointerOperand(); + unsigned Key = getBaseOffsetHash(BasePtr, RegOffset); + Candidates[Key].emplace_back(GEP, ImmOffset); + if (!BaseOffsetPairs.count(Key)) { + BaseOffsetPairs.try_emplace(Key, std::make_pair(BasePtr, RegOffset)); + Keys.push_back(Key); + } +} + +Instruction* +WebAssemblyOptimizeGEPs::createNewBaseAddr(unsigned Key, Loop *L, + const DominatorTree &DT) { + const BaseOffsetPair &BasePair = BaseOffsetPairs.lookup(Key); + Value *BasePtr = BasePair.first; + Value *Offset = BasePair.second; + Type *BaseType = BasePtr->getType(); + LLVMContext &Ctx = L->getHeader()->getParent()->getContext(); + IRBuilder<> Builder(Ctx); + + LLVM_DEBUG(dbgs() << "WasmOptGep: Creating new base addr.\n" + << " with base address: " << *BasePtr << "\n" + << " and reg offset: " << *Offset << "\n"); + + assert(BasePtr->getType()->isPointerTy() && + "Expected PointerTy"); + + auto *PtrToInt = + cast(Builder.CreatePtrToInt(BasePtr, Type::getInt32Ty(Ctx))); + auto *PtrArith = + cast(Builder.CreateAdd(PtrToInt, Offset, "", /*nuw*/true)); + auto *NewBase = cast( + Builder.CreateIntToPtr(PtrArith, BaseType)); + + // Choose an insertion point for the address calculation: + // - either in the preheader, + // - or just before the first gep. + if (L->isLoopInvariant(BasePtr) && L->isLoopInvariant(Offset)) { + NewBase->insertBefore(&L->getLoopPreheader()->back()); + } else { + SmallVectorImpl &Sorted = Candidates[Key]; + llvm::sort(Sorted, [&DT](RebaseCandidate &A, RebaseCandidate &B) { + return DT.dominates(A.getGEP(), B.getGEP()); + }); + const RebaseCandidate &FirstCandidate = Sorted.front(); + NewBase->insertBefore(FirstCandidate.getGEP()); + } + + PtrArith->insertBefore(NewBase); + PtrToInt->insertBefore(PtrArith); + + LLVM_DEBUG(dbgs() << " new base addr: " << *NewBase << "\n"); + return NewBase; +} + +bool WebAssemblyOptimizeGEPs::run(Loop *L, const DominatorTree &DT) { + // An inbound GetElementPtr with a single index. + auto IsValidGEP = [](GetElementPtrInst *GEP) { + return GEP && GEP->isInBounds() && GEP->getNumIndices() == 1; + }; + + // - An Or with a constant. + auto AddIfValidIndex = [this](GetElementPtrInst *GEP) { + if (auto *Index = dyn_cast(GEP->getOperand(1))) + if (Index->getOpcode() == Instruction::Or) + if (auto *ImmOffset = dyn_cast(Index->getOperand(1))) { + addCandidate(GEP, Index->getOperand(0), ImmOffset); + return true; + } + return false; + }; + + // Search the loop for all the GEPs and indices that meet our requirements, + // and also record any other valid GEPs to revisit. + assert(L->getNumBlocks() == 1 && "Expected single block loop"); + SmallVector ToRevisit; + for (auto &I : *L->getHeader()) { + auto *GEP = dyn_cast(&I); + if (IsValidGEP(GEP)) + if (!AddIfValidIndex(GEP)) + ToRevisit.push_back(GEP); + } + + // Look for any GEPs that are already accessing an address that we are + // going to regenerate during the rebase. + LLVMContext &Ctx = L->getHeader()->getParent()->getContext(); + for (auto *GEP : ToRevisit) { + unsigned AddrHash = + getBaseOffsetHash(GEP->getOperand(0), GEP->getOperand(1)); + if (BaseOffsetPairs.count(AddrHash)) + addCandidate(GEP, GEP->getOperand(1), + ConstantInt::get(Type::getInt32Ty(Ctx), 0)); + } + + if (Candidates.empty()) + return false; + + LLVM_DEBUG(dbgs() << "WasmOptGep: Found rebase candidates:\n"; + for (auto Key : Keys) + for (auto &Candidate : Candidates[Key]) + Candidate.dump(); + ); + + // Refactor the common base components into a new base address, updating each + // GEP to use it, as well as an immediate index. + for (auto Key : Keys) { + Instruction *NewBase = createNewBaseAddr(Key, L, DT); + if (Candidates[Key].size() < 2) + continue; + for (auto &Candidate : Candidates[Key]) + Candidate.update(NewBase); + } + + return true; +} Index: llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -80,6 +80,7 @@ initializeWebAssemblyDebugFixupPass(PR); initializeWebAssemblyPeepholePass(PR); initializeWebAssemblyMCLowerPrePassPass(PR); + initializeWebAssemblyOptimizeGEPsPass(PR); } //===----------------------------------------------------------------------===// @@ -450,6 +451,10 @@ // Expand indirectbr instructions to switches. addPass(createIndirectBrExpandPass()); + // Modify GetElementPtr before LoopStrengthReduce. + if (getOptLevel() != CodeGenOpt::None) + addPass(createWebAssemblyOptimizeGEPs()); + TargetPassConfig::addIRPasses(); } Index: llvm/test/CodeGen/WebAssembly/optimize-geps.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/WebAssembly/optimize-geps.ll @@ -0,0 +1,391 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=wasm32 -instcombine -wasm-optimize-geps -loop-reduce -S %s -o - | FileCheck %s + +target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20" + +define hidden void @one_dim(ptr nocapture noundef readonly %arg, ptr nocapture noundef readonly %arg1, ptr nocapture noundef writeonly %arg2) { +; CHECK-LABEL: @one_dim( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[ARG3:%.*]] = ptrtoint ptr [[ARG:%.*]] to i32 +; CHECK-NEXT: [[ARG12:%.*]] = ptrtoint ptr [[ARG1:%.*]] to i32 +; CHECK-NEXT: [[ARG21:%.*]] = ptrtoint ptr [[ARG2:%.*]] to i32 +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; CHECK: bb4: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[I22:%.*]], [[BB4]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG3]], [[I]] +; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i32 [[TMP0]] to ptr +; CHECK-NEXT: [[I5:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[I6:%.*]] = load i16, ptr [[I5]], align 2 +; CHECK-NEXT: [[I7:%.*]] = sext i16 [[I6]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[ARG12]], [[I]] +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr +; CHECK-NEXT: [[I8:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[I9:%.*]] = load i16, ptr [[I8]], align 2 +; CHECK-NEXT: [[I10:%.*]] = sext i16 [[I9]] to i32 +; CHECK-NEXT: [[I11:%.*]] = add nsw i32 [[I10]], [[I7]] +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[ARG21]], [[I]] +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP4]] to ptr +; CHECK-NEXT: [[I12:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 +; CHECK-NEXT: store i32 [[I11]], ptr [[I12]], align 4 +; CHECK-NEXT: [[I13:%.*]] = or i32 [[I]], 1 +; CHECK-NEXT: [[I14:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 1 +; CHECK-NEXT: [[I15:%.*]] = load i16, ptr [[I14]], align 2 +; CHECK-NEXT: [[I16:%.*]] = sext i16 [[I15]] to i32 +; CHECK-NEXT: [[I17:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 1 +; CHECK-NEXT: [[I18:%.*]] = load i16, ptr [[I17]], align 2 +; CHECK-NEXT: [[I19:%.*]] = sext i16 [[I18]] to i32 +; CHECK-NEXT: [[I20:%.*]] = add nsw i32 [[I19]], [[I16]] +; CHECK-NEXT: [[I21:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 1 +; CHECK-NEXT: store i32 [[I20]], ptr [[I21]], align 4 +; CHECK-NEXT: [[I22]] = add nuw nsw i32 [[I]], 2 +; CHECK-NEXT: [[I23:%.*]] = icmp eq i32 [[I22]], 10000 +; CHECK-NEXT: br i1 [[I23]], label [[BB3:%.*]], label [[BB4]] +; +bb: + br label %bb4 + +bb3: ; preds = %bb4 + ret void + +bb4: ; preds = %bb4, %bb + %i = phi i32 [ 0, %bb ], [ %i22, %bb4 ] + %i5 = getelementptr inbounds i16, ptr %arg, i32 %i + %i6 = load i16, ptr %i5, align 2 + %i7 = sext i16 %i6 to i32 + %i8 = getelementptr inbounds i16, ptr %arg1, i32 %i + %i9 = load i16, ptr %i8, align 2 + %i10 = sext i16 %i9 to i32 + %i11 = add nsw i32 %i10, %i7 + %i12 = getelementptr inbounds i32, ptr %arg2, i32 %i + store i32 %i11, ptr %i12, align 4 + %i13 = or i32 %i, 1 + %i14 = getelementptr inbounds i16, ptr %arg, i32 %i13 + %i15 = load i16, ptr %i14, align 2 + %i16 = sext i16 %i15 to i32 + %i17 = getelementptr inbounds i16, ptr %arg1, i32 %i13 + %i18 = load i16, ptr %i17, align 2 + %i19 = sext i16 %i18 to i32 + %i20 = add nsw i32 %i19, %i16 + %i21 = getelementptr inbounds i32, ptr %arg2, i32 %i13 + store i32 %i20, ptr %i21, align 4 + %i22 = add nuw nsw i32 %i, 2 + %i23 = icmp eq i32 %i22, 10000 + br i1 %i23, label %bb3, label %bb4 +} + +define hidden void @one_dim_no_inbound_loads(ptr nocapture noundef readonly %arg, ptr nocapture noundef readonly %arg1, ptr nocapture noundef writeonly %arg2) { +; CHECK-LABEL: @one_dim_no_inbound_loads( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[ARG21:%.*]] = ptrtoint ptr [[ARG2:%.*]] to i32 +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; CHECK: bb4: +; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i32 [ [[LSR_IV_NEXT3:%.*]], [[BB4]] ], [ 0, [[BB:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB4]] ], [ [[ARG21]], [[BB]] ] +; CHECK-NEXT: [[UGLYGEP8:%.*]] = getelementptr i8, ptr [[ARG:%.*]], i32 [[LSR_IV2]] +; CHECK-NEXT: [[I6:%.*]] = load i16, ptr [[UGLYGEP8]], align 2 +; CHECK-NEXT: [[I7:%.*]] = sext i16 [[I6]] to i32 +; CHECK-NEXT: [[UGLYGEP7:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i32 [[LSR_IV2]] +; CHECK-NEXT: [[I9:%.*]] = load i16, ptr [[UGLYGEP7]], align 2 +; CHECK-NEXT: [[I10:%.*]] = sext i16 [[I9]] to i32 +; CHECK-NEXT: [[I11:%.*]] = add nsw i32 [[I10]], [[I7]] +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i32 [[LSR_IV]] to ptr +; CHECK-NEXT: [[I12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 +; CHECK-NEXT: store i32 [[I11]], ptr [[I12]], align 4 +; CHECK-NEXT: [[UGLYGEP5:%.*]] = getelementptr i8, ptr [[ARG]], i32 [[LSR_IV2]] +; CHECK-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, ptr [[UGLYGEP5]], i32 2 +; CHECK-NEXT: [[I15:%.*]] = load i16, ptr [[UGLYGEP6]], align 2 +; CHECK-NEXT: [[I16:%.*]] = sext i16 [[I15]] to i32 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[ARG1]], i32 [[LSR_IV2]] +; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i32 2 +; CHECK-NEXT: [[I18:%.*]] = load i16, ptr [[UGLYGEP4]], align 2 +; CHECK-NEXT: [[I19:%.*]] = sext i16 [[I18]] to i32 +; CHECK-NEXT: [[I20:%.*]] = add nsw i32 [[I19]], [[I16]] +; CHECK-NEXT: [[I21:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +; CHECK-NEXT: store i32 [[I20]], ptr [[I21]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 2 +; CHECK-NEXT: [[LSR_IV_NEXT3]] = add nuw nsw i32 [[LSR_IV2]], 4 +; CHECK-NEXT: [[I23:%.*]] = icmp eq i32 [[LSR_IV_NEXT3]], 20000 +; CHECK-NEXT: br i1 [[I23]], label [[BB3:%.*]], label [[BB4]] +; +bb: + br label %bb4 + +bb3: ; preds = %bb4 + ret void + +bb4: ; preds = %bb4, %bb + %i = phi i32 [ 0, %bb ], [ %i22, %bb4 ] + %i5 = getelementptr i16, ptr %arg, i32 %i + %i6 = load i16, ptr %i5, align 2 + %i7 = sext i16 %i6 to i32 + %i8 = getelementptr i16, ptr %arg1, i32 %i + %i9 = load i16, ptr %i8, align 2 + %i10 = sext i16 %i9 to i32 + %i11 = add nsw i32 %i10, %i7 + %i12 = getelementptr inbounds i32, ptr %arg2, i32 %i + store i32 %i11, ptr %i12, align 4 + %i13 = or i32 %i, 1 + %i14 = getelementptr i16, ptr %arg, i32 %i13 + %i15 = load i16, ptr %i14, align 2 + %i16 = sext i16 %i15 to i32 + %i17 = getelementptr i16, ptr %arg1, i32 %i13 + %i18 = load i16, ptr %i17, align 2 + %i19 = sext i16 %i18 to i32 + %i20 = add nsw i32 %i19, %i16 + %i21 = getelementptr inbounds i32, ptr %arg2, i32 %i13 + store i32 %i20, ptr %i21, align 4 + %i22 = add nuw nsw i32 %i, 2 + %i23 = icmp eq i32 %i22, 10000 + br i1 %i23, label %bb3, label %bb4 +} + +define hidden void @two_dims(ptr nocapture noundef readonly %arg, ptr nocapture noundef readonly %arg1, ptr nocapture noundef %arg2) { +; CHECK-LABEL: @two_dims( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[I12:%.*]], [[BB11:%.*]] ] +; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], i32 [[I]] +; CHECK-NEXT: [[I5:%.*]] = load ptr, ptr [[I4]], align 4 +; CHECK-NEXT: [[I54:%.*]] = ptrtoint ptr [[I5]] to i32 +; CHECK-NEXT: [[I6:%.*]] = getelementptr inbounds ptr, ptr [[ARG1:%.*]], i32 [[I]] +; CHECK-NEXT: [[I7:%.*]] = load ptr, ptr [[I6]], align 4 +; CHECK-NEXT: [[I71:%.*]] = ptrtoint ptr [[I7]] to i32 +; CHECK-NEXT: [[I8:%.*]] = getelementptr inbounds i32, ptr [[ARG2:%.*]], i32 [[I]] +; CHECK-NEXT: [[I9:%.*]] = load i32, ptr [[I8]], align 4 +; CHECK-NEXT: br label [[BB14:%.*]] +; CHECK: bb10: +; CHECK-NEXT: ret void +; CHECK: bb11: +; CHECK-NEXT: store i32 [[I51:%.*]], ptr [[I8]], align 4 +; CHECK-NEXT: [[I12]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[I13:%.*]] = icmp eq i32 [[I12]], 10000 +; CHECK-NEXT: br i1 [[I13]], label [[BB10:%.*]], label [[BB3]] +; CHECK: bb14: +; CHECK-NEXT: [[I15:%.*]] = phi i32 [ 0, [[BB3]] ], [ [[I52:%.*]], [[BB14]] ] +; CHECK-NEXT: [[I16:%.*]] = phi i32 [ [[I9]], [[BB3]] ], [ [[I51]], [[BB14]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[I54]], [[I15]] +; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i32 [[TMP0]] to ptr +; CHECK-NEXT: [[I17:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[I18:%.*]] = load i16, ptr [[I17]], align 2 +; CHECK-NEXT: [[I19:%.*]] = sext i16 [[I18]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[I71]], [[I15]] +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr +; CHECK-NEXT: [[I20:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[I21:%.*]] = load i16, ptr [[I20]], align 2 +; CHECK-NEXT: [[I22:%.*]] = sext i16 [[I21]] to i32 +; CHECK-NEXT: [[I23:%.*]] = add nsw i32 [[I22]], [[I19]] +; CHECK-NEXT: [[I24:%.*]] = add nsw i32 [[I23]], [[I16]] +; CHECK-NEXT: [[I25:%.*]] = or i32 [[I15]], 1 +; CHECK-NEXT: [[I26:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 1 +; CHECK-NEXT: [[I27:%.*]] = load i16, ptr [[I26]], align 2 +; CHECK-NEXT: [[I28:%.*]] = sext i16 [[I27]] to i32 +; CHECK-NEXT: [[I29:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 1 +; CHECK-NEXT: [[I30:%.*]] = load i16, ptr [[I29]], align 2 +; CHECK-NEXT: [[I31:%.*]] = sext i16 [[I30]] to i32 +; CHECK-NEXT: [[I32:%.*]] = add nsw i32 [[I31]], [[I28]] +; CHECK-NEXT: [[I33:%.*]] = add nsw i32 [[I32]], [[I24]] +; CHECK-NEXT: [[I34:%.*]] = or i32 [[I15]], 2 +; CHECK-NEXT: [[I35:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[I36:%.*]] = load i16, ptr [[I35]], align 2 +; CHECK-NEXT: [[I37:%.*]] = sext i16 [[I36]] to i32 +; CHECK-NEXT: [[I38:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 2 +; CHECK-NEXT: [[I39:%.*]] = load i16, ptr [[I38]], align 2 +; CHECK-NEXT: [[I40:%.*]] = sext i16 [[I39]] to i32 +; CHECK-NEXT: [[I41:%.*]] = add nsw i32 [[I40]], [[I37]] +; CHECK-NEXT: [[I42:%.*]] = add nsw i32 [[I41]], [[I33]] +; CHECK-NEXT: [[I43:%.*]] = or i32 [[I15]], 3 +; CHECK-NEXT: [[I44:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 3 +; CHECK-NEXT: [[I45:%.*]] = load i16, ptr [[I44]], align 2 +; CHECK-NEXT: [[I46:%.*]] = sext i16 [[I45]] to i32 +; CHECK-NEXT: [[I47:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 3 +; CHECK-NEXT: [[I48:%.*]] = load i16, ptr [[I47]], align 2 +; CHECK-NEXT: [[I49:%.*]] = sext i16 [[I48]] to i32 +; CHECK-NEXT: [[I50:%.*]] = add nsw i32 [[I49]], [[I46]] +; CHECK-NEXT: [[I51]] = add nsw i32 [[I50]], [[I42]] +; CHECK-NEXT: [[I52]] = add nuw nsw i32 [[I15]], 4 +; CHECK-NEXT: [[I53:%.*]] = icmp eq i32 [[I52]], 10000 +; CHECK-NEXT: br i1 [[I53]], label [[BB11]], label [[BB14]] +; +bb: + br label %bb3 + +bb3: ; preds = %bb11, %bb + %i = phi i32 [ 0, %bb ], [ %i12, %bb11 ] + %i4 = getelementptr inbounds ptr, ptr %arg, i32 %i + %i5 = load ptr, ptr %i4, align 4 + %i6 = getelementptr inbounds ptr, ptr %arg1, i32 %i + %i7 = load ptr, ptr %i6, align 4 + %i8 = getelementptr inbounds i32, ptr %arg2, i32 %i + %i9 = load i32, ptr %i8, align 4 + br label %bb14 + +bb10: ; preds = %bb11 + ret void + +bb11: ; preds = %bb14 + store i32 %i51, ptr %i8, align 4 + %i12 = add nuw nsw i32 %i, 1 + %i13 = icmp eq i32 %i12, 10000 + br i1 %i13, label %bb10, label %bb3 + +bb14: ; preds = %bb14, %bb3 + %i15 = phi i32 [ 0, %bb3 ], [ %i52, %bb14 ] + %i16 = phi i32 [ %i9, %bb3 ], [ %i51, %bb14 ] + %i17 = getelementptr inbounds i16, ptr %i5, i32 %i15 + %i18 = load i16, ptr %i17, align 2 + %i19 = sext i16 %i18 to i32 + %i20 = getelementptr inbounds i16, ptr %i7, i32 %i15 + %i21 = load i16, ptr %i20, align 2 + %i22 = sext i16 %i21 to i32 + %i23 = add nsw i32 %i22, %i19 + %i24 = add nsw i32 %i23, %i16 + %i25 = or i32 %i15, 1 + %i26 = getelementptr inbounds i16, ptr %i5, i32 %i25 + %i27 = load i16, ptr %i26, align 2 + %i28 = sext i16 %i27 to i32 + %i29 = getelementptr inbounds i16, ptr %i7, i32 %i25 + %i30 = load i16, ptr %i29, align 2 + %i31 = sext i16 %i30 to i32 + %i32 = add nsw i32 %i31, %i28 + %i33 = add nsw i32 %i32, %i24 + %i34 = or i32 %i15, 2 + %i35 = getelementptr inbounds i16, ptr %i5, i32 %i34 + %i36 = load i16, ptr %i35, align 2 + %i37 = sext i16 %i36 to i32 + %i38 = getelementptr inbounds i16, ptr %i7, i32 %i34 + %i39 = load i16, ptr %i38, align 2 + %i40 = sext i16 %i39 to i32 + %i41 = add nsw i32 %i40, %i37 + %i42 = add nsw i32 %i41, %i33 + %i43 = or i32 %i15, 3 + %i44 = getelementptr inbounds i16, ptr %i5, i32 %i43 + %i45 = load i16, ptr %i44, align 2 + %i46 = sext i16 %i45 to i32 + %i47 = getelementptr inbounds i16, ptr %i7, i32 %i43 + %i48 = load i16, ptr %i47, align 2 + %i49 = sext i16 %i48 to i32 + %i50 = add nsw i32 %i49, %i46 + %i51 = add nsw i32 %i50, %i42 + %i52 = add nuw nsw i32 %i15, 4 + %i53 = icmp eq i32 %i52, 10000 + br i1 %i53, label %bb11, label %bb14 +} + +define hidden void @runtime(ptr nocapture noundef readonly %arg, ptr nocapture noundef readonly %arg1, ptr nocapture noundef writeonly %arg2, i32 noundef %arg3) { +; CHECK-LABEL: @runtime( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[ARG4:%.*]] = ptrtoint ptr [[ARG:%.*]] to i32 +; CHECK-NEXT: [[ARG12:%.*]] = ptrtoint ptr [[ARG1:%.*]] to i32 +; CHECK-NEXT: [[ARG21:%.*]] = ptrtoint ptr [[ARG2:%.*]] to i32 +; CHECK-NEXT: [[I:%.*]] = icmp eq i32 [[ARG3:%.*]], 0 +; CHECK-NEXT: br i1 [[I]], label [[BB19:%.*]], label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[I5:%.*]] = and i32 [[ARG3]], 1 +; CHECK-NEXT: [[I6:%.*]] = icmp eq i32 [[ARG3]], 1 +; CHECK-NEXT: br i1 [[I6]], label [[BB9:%.*]], label [[BB7:%.*]] +; CHECK: bb7: +; CHECK-NEXT: [[I8:%.*]] = and i32 [[ARG3]], -2 +; CHECK-NEXT: br label [[BB20:%.*]] +; CHECK: bb9.loopexit: +; CHECK-NEXT: br label [[BB9]] +; CHECK: bb9: +; CHECK-NEXT: [[I10:%.*]] = phi i32 [ 0, [[BB4]] ], [ [[I36:%.*]], [[BB9_LOOPEXIT:%.*]] ] +; CHECK-NEXT: [[I11:%.*]] = icmp eq i32 [[I5]], 0 +; CHECK-NEXT: br i1 [[I11]], label [[BB19]], label [[BB12:%.*]] +; CHECK: bb12: +; CHECK-NEXT: [[I13:%.*]] = getelementptr inbounds float, ptr [[ARG]], i32 [[I10]] +; CHECK-NEXT: [[I14:%.*]] = load float, ptr [[I13]], align 4 +; CHECK-NEXT: [[I15:%.*]] = getelementptr inbounds float, ptr [[ARG1]], i32 [[I10]] +; CHECK-NEXT: [[I16:%.*]] = load float, ptr [[I15]], align 4 +; CHECK-NEXT: [[I17:%.*]] = fadd float [[I14]], [[I16]] +; CHECK-NEXT: [[I18:%.*]] = getelementptr inbounds float, ptr [[ARG2]], i32 [[I10]] +; CHECK-NEXT: store float [[I17]], ptr [[I18]], align 4 +; CHECK-NEXT: br label [[BB19]] +; CHECK: bb19: +; CHECK-NEXT: ret void +; CHECK: bb20: +; CHECK-NEXT: [[I21:%.*]] = phi i32 [ 0, [[BB7]] ], [ [[I36]], [[BB20]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG4]], [[I21]] +; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i32 [[TMP0]] to ptr +; CHECK-NEXT: [[I23:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[I24:%.*]] = load float, ptr [[I23]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[ARG12]], [[I21]] +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr +; CHECK-NEXT: [[I25:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[I26:%.*]] = load float, ptr [[I25]], align 4 +; CHECK-NEXT: [[I27:%.*]] = fadd float [[I24]], [[I26]] +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[ARG21]], [[I21]] +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP4]] to ptr +; CHECK-NEXT: [[I28:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 +; CHECK-NEXT: store float [[I27]], ptr [[I28]], align 4 +; CHECK-NEXT: [[I29:%.*]] = or i32 [[I21]], 1 +; CHECK-NEXT: [[I30:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +; CHECK-NEXT: [[I31:%.*]] = load float, ptr [[I30]], align 4 +; CHECK-NEXT: [[I32:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +; CHECK-NEXT: [[I33:%.*]] = load float, ptr [[I32]], align 4 +; CHECK-NEXT: [[I34:%.*]] = fadd float [[I31]], [[I33]] +; CHECK-NEXT: [[I35:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 1 +; CHECK-NEXT: store float [[I34]], ptr [[I35]], align 4 +; CHECK-NEXT: [[I36]] = add i32 [[I21]], 2 +; CHECK-NEXT: [[I38:%.*]] = icmp eq i32 [[I8]], [[I36]] +; CHECK-NEXT: br i1 [[I38]], label [[BB9_LOOPEXIT]], label [[BB20]] +; +bb: + %i = icmp eq i32 %arg3, 0 + br i1 %i, label %bb19, label %bb4 + +bb4: ; preds = %bb + %i5 = and i32 %arg3, 1 + %i6 = icmp eq i32 %arg3, 1 + br i1 %i6, label %bb9, label %bb7 + +bb7: ; preds = %bb4 + %i8 = and i32 %arg3, -2 + br label %bb20 + +bb9: ; preds = %bb20, %bb4 + %i10 = phi i32 [ 0, %bb4 ], [ %i36, %bb20 ] + %i11 = icmp eq i32 %i5, 0 + br i1 %i11, label %bb19, label %bb12 + +bb12: ; preds = %bb9 + %i13 = getelementptr inbounds float, ptr %arg, i32 %i10 + %i14 = load float, ptr %i13, align 4 + %i15 = getelementptr inbounds float, ptr %arg1, i32 %i10 + %i16 = load float, ptr %i15, align 4 + %i17 = fadd float %i14, %i16 + %i18 = getelementptr inbounds float, ptr %arg2, i32 %i10 + store float %i17, ptr %i18, align 4 + br label %bb19 + +bb19: ; preds = %bb12, %bb9, %bb + ret void + +bb20: ; preds = %bb20, %bb7 + %i21 = phi i32 [ 0, %bb7 ], [ %i36, %bb20 ] + %i22 = phi i32 [ 0, %bb7 ], [ %i37, %bb20 ] + %i23 = getelementptr inbounds float, ptr %arg, i32 %i21 + %i24 = load float, ptr %i23, align 4 + %i25 = getelementptr inbounds float, ptr %arg1, i32 %i21 + %i26 = load float, ptr %i25, align 4 + %i27 = fadd float %i24, %i26 + %i28 = getelementptr inbounds float, ptr %arg2, i32 %i21 + store float %i27, ptr %i28, align 4 + %i29 = or i32 %i21, 1 + %i30 = getelementptr inbounds float, ptr %arg, i32 %i29 + %i31 = load float, ptr %i30, align 4 + %i32 = getelementptr inbounds float, ptr %arg1, i32 %i29 + %i33 = load float, ptr %i32, align 4 + %i34 = fadd float %i31, %i33 + %i35 = getelementptr inbounds float, ptr %arg2, i32 %i29 + store float %i34, ptr %i35, align 4 + %i36 = add nuw i32 %i21, 2 + %i37 = add i32 %i22, 2 + %i38 = icmp eq i32 %i37, %i8 + br i1 %i38, label %bb9, label %bb20 +} Index: llvm/test/CodeGen/WebAssembly/unrolled-mem-indices.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/unrolled-mem-indices.ll +++ llvm/test/CodeGen/WebAssembly/unrolled-mem-indices.ll @@ -6,50 +6,43 @@ define hidden void @one_dim(ptr nocapture noundef readonly %arg, ptr nocapture noundef readonly %arg1, ptr nocapture noundef writeonly %arg2) { ; CHECK-LABEL: one_dim: ; CHECK: .functype one_dim (i32, i32, i32) -> () -; CHECK-NEXT: .local i32, i32, i32 +; CHECK-NEXT: .local i32, i32, i32, i32 ; CHECK-NEXT: # %bb.0: # %bb -; CHECK-NEXT: i32.const $push22=, 0 -; CHECK-NEXT: local.set 3, $pop22 +; CHECK-NEXT: i32.const $push17=, 0 +; CHECK-NEXT: local.set 3, $pop17 ; CHECK-NEXT: .LBB0_1: # %bb4 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: loop # label0: -; CHECK-NEXT: local.get $push27=, 2 -; CHECK-NEXT: local.get $push24=, 1 -; CHECK-NEXT: local.get $push23=, 3 -; CHECK-NEXT: i32.add $push21=, $pop24, $pop23 -; CHECK-NEXT: local.tee $push20=, 4, $pop21 -; CHECK-NEXT: i32.load16_s $push1=, 0($pop20) -; CHECK-NEXT: local.get $push26=, 0 -; CHECK-NEXT: local.get $push25=, 3 -; CHECK-NEXT: i32.add $push19=, $pop26, $pop25 -; CHECK-NEXT: local.tee $push18=, 5, $pop19 -; CHECK-NEXT: i32.load16_s $push0=, 0($pop18) +; CHECK-NEXT: local.get $push19=, 2 +; CHECK-NEXT: local.get $push18=, 3 +; CHECK-NEXT: i32.add $push16=, $pop19, $pop18 +; CHECK-NEXT: local.tee $push15=, 4, $pop16 +; CHECK-NEXT: local.get $push21=, 1 +; CHECK-NEXT: local.get $push20=, 3 +; CHECK-NEXT: i32.add $push14=, $pop21, $pop20 +; CHECK-NEXT: local.tee $push13=, 5, $pop14 +; CHECK-NEXT: i32.load16_s $push1=, 0($pop13) +; CHECK-NEXT: local.get $push23=, 0 +; CHECK-NEXT: local.get $push22=, 3 +; CHECK-NEXT: i32.add $push12=, $pop23, $pop22 +; CHECK-NEXT: local.tee $push11=, 6, $pop12 +; CHECK-NEXT: i32.load16_s $push0=, 0($pop11) ; CHECK-NEXT: i32.add $push2=, $pop1, $pop0 -; CHECK-NEXT: i32.store 0($pop27), $pop2 -; CHECK-NEXT: local.get $push28=, 2 -; CHECK-NEXT: i32.const $push17=, 4 -; CHECK-NEXT: i32.add $push8=, $pop28, $pop17 -; CHECK-NEXT: local.get $push29=, 4 -; CHECK-NEXT: i32.const $push16=, 2 -; CHECK-NEXT: i32.add $push5=, $pop29, $pop16 -; CHECK-NEXT: i32.load16_s $push6=, 0($pop5) -; CHECK-NEXT: local.get $push30=, 5 -; CHECK-NEXT: i32.const $push15=, 2 -; CHECK-NEXT: i32.add $push3=, $pop30, $pop15 -; CHECK-NEXT: i32.load16_s $push4=, 0($pop3) -; CHECK-NEXT: i32.add $push7=, $pop6, $pop4 -; CHECK-NEXT: i32.store 0($pop8), $pop7 -; CHECK-NEXT: local.get $push32=, 2 -; CHECK-NEXT: i32.const $push14=, 8 -; CHECK-NEXT: i32.add $push31=, $pop32, $pop14 -; CHECK-NEXT: local.set 2, $pop31 -; CHECK-NEXT: local.get $push33=, 3 -; CHECK-NEXT: i32.const $push13=, 4 -; CHECK-NEXT: i32.add $push12=, $pop33, $pop13 -; CHECK-NEXT: local.tee $push11=, 3, $pop12 -; CHECK-NEXT: i32.const $push10=, 20000 -; CHECK-NEXT: i32.ne $push9=, $pop11, $pop10 -; CHECK-NEXT: br_if 0, $pop9 # 0: up to label0 +; CHECK-NEXT: i32.store 0($pop15), $pop2 +; CHECK-NEXT: local.get $push26=, 4 +; CHECK-NEXT: local.get $push24=, 5 +; CHECK-NEXT: i32.load16_s $push4=, 2($pop24) +; CHECK-NEXT: local.get $push25=, 6 +; CHECK-NEXT: i32.load16_s $push3=, 2($pop25) +; CHECK-NEXT: i32.add $push5=, $pop4, $pop3 +; CHECK-NEXT: i32.store 4($pop26), $pop5 +; CHECK-NEXT: local.get $push27=, 3 +; CHECK-NEXT: i32.const $push10=, 2 +; CHECK-NEXT: i32.add $push9=, $pop27, $pop10 +; CHECK-NEXT: local.tee $push8=, 3, $pop9 +; CHECK-NEXT: i32.const $push7=, 10000 +; CHECK-NEXT: i32.ne $push6=, $pop8, $pop7 +; CHECK-NEXT: br_if 0, $pop6 # 0: up to label0 ; CHECK-NEXT: # %bb.2: # %bb3 ; CHECK-NEXT: end_loop ; CHECK-NEXT: # fallthrough-return @@ -90,48 +83,46 @@ ; CHECK: .functype one_dim_no_inbound_loads (i32, i32, i32) -> () ; CHECK-NEXT: .local i32, i32, i32 ; CHECK-NEXT: # %bb.0: # %bb -; CHECK-NEXT: i32.const $push22=, 0 -; CHECK-NEXT: local.set 3, $pop22 +; CHECK-NEXT: i32.const $push20=, 0 +; CHECK-NEXT: local.set 3, $pop20 ; CHECK-NEXT: .LBB1_1: # %bb4 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: loop # label1: -; CHECK-NEXT: local.get $push27=, 2 -; CHECK-NEXT: local.get $push24=, 1 +; CHECK-NEXT: local.get $push25=, 2 +; CHECK-NEXT: local.get $push22=, 1 +; CHECK-NEXT: local.get $push21=, 3 +; CHECK-NEXT: i32.add $push19=, $pop22, $pop21 +; CHECK-NEXT: local.tee $push18=, 4, $pop19 +; CHECK-NEXT: i32.load16_s $push1=, 0($pop18) +; CHECK-NEXT: local.get $push24=, 0 ; CHECK-NEXT: local.get $push23=, 3 -; CHECK-NEXT: i32.add $push21=, $pop24, $pop23 -; CHECK-NEXT: local.tee $push20=, 4, $pop21 -; CHECK-NEXT: i32.load16_s $push1=, 0($pop20) -; CHECK-NEXT: local.get $push26=, 0 -; CHECK-NEXT: local.get $push25=, 3 -; CHECK-NEXT: i32.add $push19=, $pop26, $pop25 -; CHECK-NEXT: local.tee $push18=, 5, $pop19 -; CHECK-NEXT: i32.load16_s $push0=, 0($pop18) +; CHECK-NEXT: i32.add $push17=, $pop24, $pop23 +; CHECK-NEXT: local.tee $push16=, 5, $pop17 +; CHECK-NEXT: i32.load16_s $push0=, 0($pop16) ; CHECK-NEXT: i32.add $push2=, $pop1, $pop0 -; CHECK-NEXT: i32.store 0($pop27), $pop2 +; CHECK-NEXT: i32.store 0($pop25), $pop2 ; CHECK-NEXT: local.get $push28=, 2 -; CHECK-NEXT: i32.const $push17=, 4 -; CHECK-NEXT: i32.add $push8=, $pop28, $pop17 -; CHECK-NEXT: local.get $push29=, 4 -; CHECK-NEXT: i32.const $push16=, 2 -; CHECK-NEXT: i32.add $push5=, $pop29, $pop16 -; CHECK-NEXT: i32.load16_s $push6=, 0($pop5) -; CHECK-NEXT: local.get $push30=, 5 +; CHECK-NEXT: local.get $push26=, 4 ; CHECK-NEXT: i32.const $push15=, 2 -; CHECK-NEXT: i32.add $push3=, $pop30, $pop15 +; CHECK-NEXT: i32.add $push5=, $pop26, $pop15 +; CHECK-NEXT: i32.load16_s $push6=, 0($pop5) +; CHECK-NEXT: local.get $push27=, 5 +; CHECK-NEXT: i32.const $push14=, 2 +; CHECK-NEXT: i32.add $push3=, $pop27, $pop14 ; CHECK-NEXT: i32.load16_s $push4=, 0($pop3) ; CHECK-NEXT: i32.add $push7=, $pop6, $pop4 -; CHECK-NEXT: i32.store 0($pop8), $pop7 -; CHECK-NEXT: local.get $push32=, 2 -; CHECK-NEXT: i32.const $push14=, 8 -; CHECK-NEXT: i32.add $push31=, $pop32, $pop14 -; CHECK-NEXT: local.set 2, $pop31 -; CHECK-NEXT: local.get $push33=, 3 -; CHECK-NEXT: i32.const $push13=, 4 -; CHECK-NEXT: i32.add $push12=, $pop33, $pop13 -; CHECK-NEXT: local.tee $push11=, 3, $pop12 -; CHECK-NEXT: i32.const $push10=, 20000 -; CHECK-NEXT: i32.ne $push9=, $pop11, $pop10 -; CHECK-NEXT: br_if 0, $pop9 # 0: up to label1 +; CHECK-NEXT: i32.store 4($pop28), $pop7 +; CHECK-NEXT: local.get $push30=, 2 +; CHECK-NEXT: i32.const $push13=, 2 +; CHECK-NEXT: i32.add $push29=, $pop30, $pop13 +; CHECK-NEXT: local.set 2, $pop29 +; CHECK-NEXT: local.get $push31=, 3 +; CHECK-NEXT: i32.const $push12=, 4 +; CHECK-NEXT: i32.add $push11=, $pop31, $pop12 +; CHECK-NEXT: local.tee $push10=, 3, $pop11 +; CHECK-NEXT: i32.const $push9=, 20000 +; CHECK-NEXT: i32.ne $push8=, $pop10, $pop9 +; CHECK-NEXT: br_if 0, $pop8 # 0: up to label1 ; CHECK-NEXT: # %bb.2: # %bb3 ; CHECK-NEXT: end_loop ; CHECK-NEXT: # fallthrough-return @@ -172,101 +163,89 @@ ; CHECK: .functype two_dims (i32, i32, i32) -> () ; CHECK-NEXT: .local i32, i32, i32, i32, i32, i32, i32, i32 ; CHECK-NEXT: # %bb.0: # %bb -; CHECK-NEXT: i32.const $push48=, 0 -; CHECK-NEXT: local.set 3, $pop48 +; CHECK-NEXT: i32.const $push36=, 0 +; CHECK-NEXT: local.set 3, $pop36 ; CHECK-NEXT: .LBB2_1: # %bb3 ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB2_2 Depth 2 ; CHECK-NEXT: loop # label2: -; CHECK-NEXT: local.get $push50=, 2 -; CHECK-NEXT: local.get $push49=, 3 -; CHECK-NEXT: i32.const $push29=, 2 -; CHECK-NEXT: i32.shl $push28=, $pop49, $pop29 -; CHECK-NEXT: local.tee $push27=, 4, $pop28 -; CHECK-NEXT: i32.add $push26=, $pop50, $pop27 -; CHECK-NEXT: local.tee $push25=, 5, $pop26 -; CHECK-NEXT: i32.load $push51=, 0($pop25) -; CHECK-NEXT: local.set 6, $pop51 -; CHECK-NEXT: local.get $push53=, 1 -; CHECK-NEXT: local.get $push52=, 4 -; CHECK-NEXT: i32.add $push0=, $pop53, $pop52 -; CHECK-NEXT: i32.load $push54=, 0($pop0) -; CHECK-NEXT: local.set 7, $pop54 -; CHECK-NEXT: local.get $push56=, 0 -; CHECK-NEXT: local.get $push55=, 4 -; CHECK-NEXT: i32.add $push1=, $pop56, $pop55 -; CHECK-NEXT: i32.load $push57=, 0($pop1) -; CHECK-NEXT: local.set 8, $pop57 -; CHECK-NEXT: i32.const $push58=, 0 -; CHECK-NEXT: local.set 4, $pop58 +; CHECK-NEXT: local.get $push38=, 2 +; CHECK-NEXT: local.get $push37=, 3 +; CHECK-NEXT: i32.const $push23=, 2 +; CHECK-NEXT: i32.shl $push22=, $pop37, $pop23 +; CHECK-NEXT: local.tee $push21=, 4, $pop22 +; CHECK-NEXT: i32.add $push20=, $pop38, $pop21 +; CHECK-NEXT: local.tee $push19=, 5, $pop20 +; CHECK-NEXT: i32.load $push39=, 0($pop19) +; CHECK-NEXT: local.set 6, $pop39 +; CHECK-NEXT: local.get $push41=, 1 +; CHECK-NEXT: local.get $push40=, 4 +; CHECK-NEXT: i32.add $push0=, $pop41, $pop40 +; CHECK-NEXT: i32.load $push42=, 0($pop0) +; CHECK-NEXT: local.set 7, $pop42 +; CHECK-NEXT: local.get $push44=, 0 +; CHECK-NEXT: local.get $push43=, 4 +; CHECK-NEXT: i32.add $push1=, $pop44, $pop43 +; CHECK-NEXT: i32.load $push45=, 0($pop1) +; CHECK-NEXT: local.set 8, $pop45 +; CHECK-NEXT: i32.const $push46=, 0 +; CHECK-NEXT: local.set 4, $pop46 ; CHECK-NEXT: .LBB2_2: # %bb14 ; CHECK-NEXT: # Parent Loop BB2_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: loop # label3: -; CHECK-NEXT: local.get $push60=, 7 -; CHECK-NEXT: local.get $push59=, 4 -; CHECK-NEXT: i32.add $push43=, $pop60, $pop59 -; CHECK-NEXT: local.tee $push42=, 9, $pop43 -; CHECK-NEXT: i32.const $push41=, 6 -; CHECK-NEXT: i32.add $push20=, $pop42, $pop41 -; CHECK-NEXT: i32.load16_s $push21=, 0($pop20) -; CHECK-NEXT: local.get $push62=, 8 -; CHECK-NEXT: local.get $push61=, 4 -; CHECK-NEXT: i32.add $push40=, $pop62, $pop61 -; CHECK-NEXT: local.tee $push39=, 10, $pop40 -; CHECK-NEXT: i32.const $push38=, 6 -; CHECK-NEXT: i32.add $push18=, $pop39, $pop38 -; CHECK-NEXT: i32.load16_s $push19=, 0($pop18) -; CHECK-NEXT: i32.add $push22=, $pop21, $pop19 -; CHECK-NEXT: local.get $push63=, 9 -; CHECK-NEXT: i32.const $push37=, 4 -; CHECK-NEXT: i32.add $push14=, $pop63, $pop37 -; CHECK-NEXT: i32.load16_s $push15=, 0($pop14) -; CHECK-NEXT: local.get $push64=, 10 -; CHECK-NEXT: i32.const $push36=, 4 -; CHECK-NEXT: i32.add $push12=, $pop64, $pop36 -; CHECK-NEXT: i32.load16_s $push13=, 0($pop12) -; CHECK-NEXT: i32.add $push16=, $pop15, $pop13 -; CHECK-NEXT: local.get $push65=, 9 -; CHECK-NEXT: i32.const $push35=, 2 -; CHECK-NEXT: i32.add $push8=, $pop65, $pop35 -; CHECK-NEXT: i32.load16_s $push9=, 0($pop8) -; CHECK-NEXT: local.get $push66=, 10 -; CHECK-NEXT: i32.const $push34=, 2 -; CHECK-NEXT: i32.add $push6=, $pop66, $pop34 -; CHECK-NEXT: i32.load16_s $push7=, 0($pop6) -; CHECK-NEXT: i32.add $push10=, $pop9, $pop7 -; CHECK-NEXT: local.get $push67=, 9 -; CHECK-NEXT: i32.load16_s $push3=, 0($pop67) -; CHECK-NEXT: local.get $push68=, 10 -; CHECK-NEXT: i32.load16_s $push2=, 0($pop68) +; CHECK-NEXT: local.get $push48=, 7 +; CHECK-NEXT: local.get $push47=, 4 +; CHECK-NEXT: i32.add $push31=, $pop48, $pop47 +; CHECK-NEXT: local.tee $push30=, 9, $pop31 +; CHECK-NEXT: i32.load16_s $push15=, 6($pop30) +; CHECK-NEXT: local.get $push50=, 8 +; CHECK-NEXT: local.get $push49=, 4 +; CHECK-NEXT: i32.add $push29=, $pop50, $pop49 +; CHECK-NEXT: local.tee $push28=, 10, $pop29 +; CHECK-NEXT: i32.load16_s $push14=, 6($pop28) +; CHECK-NEXT: i32.add $push16=, $pop15, $pop14 +; CHECK-NEXT: local.get $push51=, 9 +; CHECK-NEXT: i32.load16_s $push11=, 4($pop51) +; CHECK-NEXT: local.get $push52=, 10 +; CHECK-NEXT: i32.load16_s $push10=, 4($pop52) +; CHECK-NEXT: i32.add $push12=, $pop11, $pop10 +; CHECK-NEXT: local.get $push53=, 9 +; CHECK-NEXT: i32.load16_s $push3=, 2($pop53) +; CHECK-NEXT: local.get $push54=, 10 +; CHECK-NEXT: i32.load16_s $push2=, 2($pop54) ; CHECK-NEXT: i32.add $push4=, $pop3, $pop2 -; CHECK-NEXT: local.get $push69=, 6 -; CHECK-NEXT: i32.add $push5=, $pop4, $pop69 -; CHECK-NEXT: i32.add $push11=, $pop10, $pop5 -; CHECK-NEXT: i32.add $push17=, $pop16, $pop11 -; CHECK-NEXT: i32.add $push70=, $pop22, $pop17 -; CHECK-NEXT: local.set 6, $pop70 -; CHECK-NEXT: local.get $push71=, 4 -; CHECK-NEXT: i32.const $push33=, 8 -; CHECK-NEXT: i32.add $push32=, $pop71, $pop33 -; CHECK-NEXT: local.tee $push31=, 4, $pop32 -; CHECK-NEXT: i32.const $push30=, 20000 -; CHECK-NEXT: i32.ne $push23=, $pop31, $pop30 -; CHECK-NEXT: br_if 0, $pop23 # 0: up to label3 +; CHECK-NEXT: local.get $push55=, 9 +; CHECK-NEXT: i32.load16_s $push6=, 0($pop55) +; CHECK-NEXT: local.get $push56=, 10 +; CHECK-NEXT: i32.load16_s $push5=, 0($pop56) +; CHECK-NEXT: i32.add $push7=, $pop6, $pop5 +; CHECK-NEXT: local.get $push57=, 6 +; CHECK-NEXT: i32.add $push8=, $pop7, $pop57 +; CHECK-NEXT: i32.add $push9=, $pop4, $pop8 +; CHECK-NEXT: i32.add $push13=, $pop12, $pop9 +; CHECK-NEXT: i32.add $push58=, $pop16, $pop13 +; CHECK-NEXT: local.set 6, $pop58 +; CHECK-NEXT: local.get $push59=, 4 +; CHECK-NEXT: i32.const $push27=, 4 +; CHECK-NEXT: i32.add $push26=, $pop59, $pop27 +; CHECK-NEXT: local.tee $push25=, 4, $pop26 +; CHECK-NEXT: i32.const $push24=, 10000 +; CHECK-NEXT: i32.ne $push17=, $pop25, $pop24 +; CHECK-NEXT: br_if 0, $pop17 # 0: up to label3 ; CHECK-NEXT: # %bb.3: # %bb11 ; CHECK-NEXT: # in Loop: Header=BB2_1 Depth=1 ; CHECK-NEXT: end_loop -; CHECK-NEXT: local.get $push73=, 5 -; CHECK-NEXT: local.get $push72=, 6 -; CHECK-NEXT: i32.store 0($pop73), $pop72 -; CHECK-NEXT: local.get $push74=, 3 -; CHECK-NEXT: i32.const $push47=, 1 -; CHECK-NEXT: i32.add $push46=, $pop74, $pop47 -; CHECK-NEXT: local.tee $push45=, 3, $pop46 -; CHECK-NEXT: i32.const $push44=, 10000 -; CHECK-NEXT: i32.ne $push24=, $pop45, $pop44 -; CHECK-NEXT: br_if 0, $pop24 # 0: up to label2 +; CHECK-NEXT: local.get $push61=, 5 +; CHECK-NEXT: local.get $push60=, 6 +; CHECK-NEXT: i32.store 0($pop61), $pop60 +; CHECK-NEXT: local.get $push62=, 3 +; CHECK-NEXT: i32.const $push35=, 1 +; CHECK-NEXT: i32.add $push34=, $pop62, $pop35 +; CHECK-NEXT: local.tee $push33=, 3, $pop34 +; CHECK-NEXT: i32.const $push32=, 10000 +; CHECK-NEXT: i32.ne $push18=, $pop33, $pop32 +; CHECK-NEXT: br_if 0, $pop18 # 0: up to label2 ; CHECK-NEXT: # %bb.4: # %bb10 ; CHECK-NEXT: end_loop ; CHECK-NEXT: # fallthrough-return @@ -341,99 +320,84 @@ ; CHECK-NEXT: .local i32, i32, i32, i32, i32 ; CHECK-NEXT: # %bb.0: # %bb ; CHECK-NEXT: block -; CHECK-NEXT: local.get $push32=, 3 -; CHECK-NEXT: i32.eqz $push64=, $pop32 -; CHECK-NEXT: br_if 0, $pop64 # 0: down to label4 +; CHECK-NEXT: local.get $push29=, 3 +; CHECK-NEXT: i32.eqz $push55=, $pop29 +; CHECK-NEXT: br_if 0, $pop55 # 0: down to label4 ; CHECK-NEXT: # %bb.1: # %bb4 -; CHECK-NEXT: local.get $push34=, 3 +; CHECK-NEXT: local.get $push31=, 3 ; CHECK-NEXT: i32.const $push0=, 1 -; CHECK-NEXT: i32.and $push33=, $pop34, $pop0 -; CHECK-NEXT: local.set 4, $pop33 -; CHECK-NEXT: i32.const $push35=, 0 -; CHECK-NEXT: local.set 5, $pop35 +; CHECK-NEXT: i32.and $push30=, $pop31, $pop0 +; CHECK-NEXT: local.set 4, $pop30 +; CHECK-NEXT: i32.const $push32=, 0 +; CHECK-NEXT: local.set 5, $pop32 ; CHECK-NEXT: block -; CHECK-NEXT: local.get $push36=, 3 -; CHECK-NEXT: i32.const $push20=, 1 -; CHECK-NEXT: i32.eq $push1=, $pop36, $pop20 +; CHECK-NEXT: local.get $push33=, 3 +; CHECK-NEXT: i32.const $push17=, 1 +; CHECK-NEXT: i32.eq $push1=, $pop33, $pop17 ; CHECK-NEXT: br_if 0, $pop1 # 0: down to label5 ; CHECK-NEXT: # %bb.2: # %bb7 -; CHECK-NEXT: local.get $push38=, 3 +; CHECK-NEXT: local.get $push35=, 3 ; CHECK-NEXT: i32.const $push2=, -2 -; CHECK-NEXT: i32.and $push37=, $pop38, $pop2 -; CHECK-NEXT: local.set 6, $pop37 -; CHECK-NEXT: i32.const $push39=, 0 -; CHECK-NEXT: local.set 5, $pop39 -; CHECK-NEXT: local.get $push40=, 0 -; CHECK-NEXT: local.set 3, $pop40 -; CHECK-NEXT: local.get $push41=, 1 -; CHECK-NEXT: local.set 7, $pop41 -; CHECK-NEXT: local.get $push42=, 2 -; CHECK-NEXT: local.set 8, $pop42 +; CHECK-NEXT: i32.and $push34=, $pop35, $pop2 +; CHECK-NEXT: local.set 6, $pop34 +; CHECK-NEXT: i32.const $push36=, 0 +; CHECK-NEXT: local.set 5, $pop36 ; CHECK-NEXT: .LBB3_3: # %bb20 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: loop # label6: -; CHECK-NEXT: local.get $push45=, 8 -; CHECK-NEXT: local.get $push43=, 3 -; CHECK-NEXT: f32.load $push4=, 0($pop43) -; CHECK-NEXT: local.get $push44=, 7 -; CHECK-NEXT: f32.load $push3=, 0($pop44) +; CHECK-NEXT: local.get $push38=, 2 +; CHECK-NEXT: local.get $push37=, 5 +; CHECK-NEXT: i32.add $push26=, $pop38, $pop37 +; CHECK-NEXT: local.tee $push25=, 3, $pop26 +; CHECK-NEXT: local.get $push40=, 0 +; CHECK-NEXT: local.get $push39=, 5 +; CHECK-NEXT: i32.add $push24=, $pop40, $pop39 +; CHECK-NEXT: local.tee $push23=, 7, $pop24 +; CHECK-NEXT: f32.load $push4=, 0($pop23) +; CHECK-NEXT: local.get $push42=, 1 +; CHECK-NEXT: local.get $push41=, 5 +; CHECK-NEXT: i32.add $push22=, $pop42, $pop41 +; CHECK-NEXT: local.tee $push21=, 8, $pop22 +; CHECK-NEXT: f32.load $push3=, 0($pop21) ; CHECK-NEXT: f32.add $push5=, $pop4, $pop3 -; CHECK-NEXT: f32.store 0($pop45), $pop5 -; CHECK-NEXT: local.get $push46=, 8 -; CHECK-NEXT: i32.const $push29=, 4 -; CHECK-NEXT: i32.add $push11=, $pop46, $pop29 -; CHECK-NEXT: local.get $push47=, 3 -; CHECK-NEXT: i32.const $push28=, 4 -; CHECK-NEXT: i32.add $push8=, $pop47, $pop28 -; CHECK-NEXT: f32.load $push9=, 0($pop8) -; CHECK-NEXT: local.get $push48=, 7 -; CHECK-NEXT: i32.const $push27=, 4 -; CHECK-NEXT: i32.add $push6=, $pop48, $pop27 -; CHECK-NEXT: f32.load $push7=, 0($pop6) -; CHECK-NEXT: f32.add $push10=, $pop9, $pop7 -; CHECK-NEXT: f32.store 0($pop11), $pop10 -; CHECK-NEXT: local.get $push50=, 3 -; CHECK-NEXT: i32.const $push26=, 8 -; CHECK-NEXT: i32.add $push49=, $pop50, $pop26 -; CHECK-NEXT: local.set 3, $pop49 -; CHECK-NEXT: local.get $push52=, 7 -; CHECK-NEXT: i32.const $push25=, 8 -; CHECK-NEXT: i32.add $push51=, $pop52, $pop25 -; CHECK-NEXT: local.set 7, $pop51 -; CHECK-NEXT: local.get $push54=, 8 -; CHECK-NEXT: i32.const $push24=, 8 -; CHECK-NEXT: i32.add $push53=, $pop54, $pop24 -; CHECK-NEXT: local.set 8, $pop53 -; CHECK-NEXT: local.get $push56=, 6 -; CHECK-NEXT: local.get $push55=, 5 -; CHECK-NEXT: i32.const $push23=, 2 -; CHECK-NEXT: i32.add $push22=, $pop55, $pop23 -; CHECK-NEXT: local.tee $push21=, 5, $pop22 -; CHECK-NEXT: i32.ne $push12=, $pop56, $pop21 -; CHECK-NEXT: br_if 0, $pop12 # 0: up to label6 +; CHECK-NEXT: f32.store 0($pop25), $pop5 +; CHECK-NEXT: local.get $push45=, 3 +; CHECK-NEXT: local.get $push43=, 7 +; CHECK-NEXT: f32.load $push7=, 4($pop43) +; CHECK-NEXT: local.get $push44=, 8 +; CHECK-NEXT: f32.load $push6=, 4($pop44) +; CHECK-NEXT: f32.add $push8=, $pop7, $pop6 +; CHECK-NEXT: f32.store 4($pop45), $pop8 +; CHECK-NEXT: local.get $push47=, 6 +; CHECK-NEXT: local.get $push46=, 5 +; CHECK-NEXT: i32.const $push20=, 2 +; CHECK-NEXT: i32.add $push19=, $pop46, $pop20 +; CHECK-NEXT: local.tee $push18=, 5, $pop19 +; CHECK-NEXT: i32.ne $push9=, $pop47, $pop18 +; CHECK-NEXT: br_if 0, $pop9 # 0: up to label6 ; CHECK-NEXT: .LBB3_4: # %bb9 ; CHECK-NEXT: end_loop ; CHECK-NEXT: end_block # label5: -; CHECK-NEXT: local.get $push57=, 4 -; CHECK-NEXT: i32.eqz $push65=, $pop57 -; CHECK-NEXT: br_if 0, $pop65 # 0: down to label4 +; CHECK-NEXT: local.get $push48=, 4 +; CHECK-NEXT: i32.eqz $push56=, $pop48 +; CHECK-NEXT: br_if 0, $pop56 # 0: down to label4 ; CHECK-NEXT: # %bb.5: # %bb12 -; CHECK-NEXT: local.get $push59=, 2 -; CHECK-NEXT: local.get $push58=, 5 -; CHECK-NEXT: i32.const $push13=, 2 -; CHECK-NEXT: i32.shl $push31=, $pop58, $pop13 -; CHECK-NEXT: local.tee $push30=, 3, $pop31 -; CHECK-NEXT: i32.add $push19=, $pop59, $pop30 -; CHECK-NEXT: local.get $push61=, 0 -; CHECK-NEXT: local.get $push60=, 3 -; CHECK-NEXT: i32.add $push16=, $pop61, $pop60 -; CHECK-NEXT: f32.load $push17=, 0($pop16) -; CHECK-NEXT: local.get $push63=, 1 -; CHECK-NEXT: local.get $push62=, 3 -; CHECK-NEXT: i32.add $push14=, $pop63, $pop62 -; CHECK-NEXT: f32.load $push15=, 0($pop14) -; CHECK-NEXT: f32.add $push18=, $pop17, $pop15 -; CHECK-NEXT: f32.store 0($pop19), $pop18 +; CHECK-NEXT: local.get $push50=, 2 +; CHECK-NEXT: local.get $push49=, 5 +; CHECK-NEXT: i32.const $push10=, 2 +; CHECK-NEXT: i32.shl $push28=, $pop49, $pop10 +; CHECK-NEXT: local.tee $push27=, 5, $pop28 +; CHECK-NEXT: i32.add $push16=, $pop50, $pop27 +; CHECK-NEXT: local.get $push52=, 0 +; CHECK-NEXT: local.get $push51=, 5 +; CHECK-NEXT: i32.add $push13=, $pop52, $pop51 +; CHECK-NEXT: f32.load $push14=, 0($pop13) +; CHECK-NEXT: local.get $push54=, 1 +; CHECK-NEXT: local.get $push53=, 5 +; CHECK-NEXT: i32.add $push11=, $pop54, $pop53 +; CHECK-NEXT: f32.load $push12=, 0($pop11) +; CHECK-NEXT: f32.add $push15=, $pop14, $pop12 +; CHECK-NEXT: f32.store 0($pop16), $pop15 ; CHECK-NEXT: .LBB3_6: # %bb19 ; CHECK-NEXT: end_block # label4: ; CHECK-NEXT: # fallthrough-return Index: llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn +++ llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn @@ -57,6 +57,7 @@ "WebAssemblyMachineFunctionInfo.cpp", "WebAssemblyMemIntrinsicResults.cpp", "WebAssemblyNullifyDebugValueLists.cpp", + "WebAssemblyOptimizeGEPs.cpp", "WebAssemblyOptimizeLiveIntervals.cpp", "WebAssemblyOptimizeReturned.cpp", "WebAssemblyPeephole.cpp",