diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp --- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp +++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp @@ -22,8 +22,10 @@ /// and small in size. //===----------------------------------------------------------------------===// +#include "flang/Common/Fortran.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/LowLevelIntrinsics.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIRType.h" @@ -33,13 +35,17 @@ #include "flang/Runtime/entry-names.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Matchers.h" -#include "mlir/IR/TypeUtilities.h" +#include "mlir/IR/Operation.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/RegionUtils.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include #include #include #include @@ -94,6 +100,8 @@ void simplifyLogicalDim1Reduction(fir::CallOp call, const fir::KindMapping &kindMap, GenReductionBodyTy genBodyFunc); + void simplifyMinlocReduction(fir::CallOp call, + const fir::KindMapping &kindMap); void simplifyReductionBody(fir::CallOp call, const fir::KindMapping &kindMap, GenReductionBodyTy genBodyFunc, fir::FirOpBuilder &builder, @@ -141,6 +149,104 @@ {elementType}); } +template +Op expectOp(mlir::Value val) { + if (Op op = mlir::dyn_cast_or_null(val.getDefiningOp())) + return op; + LLVM_DEBUG(llvm::dbgs() << "Didn't find expected " << Op::getOperationName() + << '\n'); + return nullptr; +} + +static bool isOperandAbsent(mlir::Value val) { + if (auto op = expectOp(val)) { + assert(op->getOperands().size() != 0); + return mlir::isa_and_nonnull( + op->getOperand(0).getDefiningOp()); + } + return false; +} + +static bool isTrueOrNotConstant(mlir::Value val) { + if (auto op = expectOp(val)) { + return !mlir::matchPattern(val, mlir::m_Zero()); + } + return true; +} + +static bool isZero(mlir::Value val) { + if (auto op = expectOp(val)) { + assert(op->getOperands().size() != 0); + if (mlir::Operation *defOp = op->getOperand(0).getDefiningOp()) + return mlir::matchPattern(defOp, mlir::m_Zero()); + } + return false; +} + +static mlir::Value findBoxDef(mlir::Value val) { + if (auto op = expectOp(val)) { + assert(op->getOperands().size() != 0); + if (auto box = mlir::dyn_cast_or_null( + op->getOperand(0).getDefiningOp())) + return box.getResult(); + if (auto box = mlir::dyn_cast_or_null( + op->getOperand(0).getDefiningOp())) + return box.getResult(); + } + return {}; +} + +static unsigned getDimCount(mlir::Value val) { + // In order to find the dimensions count, we look for EmboxOp/ReboxOp + // and take the count from its *result* type. Note that in case + // of sliced emboxing the operand and the result of EmboxOp/ReboxOp + // have different types. + // Actually, we can take the box type from the operand of + // the first ConvertOp that has non-opaque box type that we meet + // going through the ConvertOp chain. + if (mlir::Value emboxVal = findBoxDef(val)) + if (auto boxTy = emboxVal.getType().dyn_cast()) + if (auto seqTy = boxTy.getEleTy().dyn_cast()) + return seqTy.getDimension(); + return 0; +} + +static mlir::Value findMaskDef(mlir::Value val) { + if (auto op = expectOp(val)) { + assert(op->getOperands().size() != 0); + if (auto box = expectOp(op->getOperand(0))) + return box.getResult(); + if (auto box = expectOp(op->getOperand(0))) + return box.getResult(); + if (auto absent = expectOp(op->getOperand(0))) + return absent.getResult(); + } + return {}; +} + +/// Given the call operation's box argument \p val, discover +/// the element type of the underlying array object. +/// \returns the element type or std::nullopt if the type cannot +/// be reliably found. +/// We expect that the argument is a result of fir.convert +/// with the destination type of !fir.box. +static std::optional getArgElementType(mlir::Value val) { + mlir::Operation *defOp; + do { + defOp = val.getDefiningOp(); + // Analyze only sequences of convert operations. + if (!mlir::isa(defOp)) + return std::nullopt; + val = defOp->getOperand(0); + // The convert operation is expected to convert from one + // box type to another box type. + auto boxType = val.getType().cast(); + auto elementType = fir::unwrapSeqOrBoxedSeqType(boxType); + if (!elementType.isa()) + return elementType; + } while (true); +} + using BodyOpGeneratorTy = llvm::function_ref; @@ -186,7 +292,7 @@ mlir::Type resultType = funcOp.getResultTypes()[0]; mlir::Value init = initVal(builder, loc, resultType); - llvm::SmallVector bounds; + llvm::SmallVector bounds; assert(rank > 0 && "rank cannot be zero"); mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); @@ -210,7 +316,7 @@ // array's element. // The loops are generated such that the innermost loop processes // the 0 dimension. - llvm::SmallVector indices; + llvm::SmallVector indices; for (unsigned i = rank; 0 < i; --i) { mlir::Value step = one; mlir::Value loopCount = bounds[i - 1]; @@ -254,9 +360,202 @@ // Return the reduction value from the function. builder.create(loc, results[resultIndex]); } +using MinlocBodyOpGeneratorTy = llvm::function_ref &)>; + +static void +genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp, + InitValGeneratorTy initVal, + MinlocBodyOpGeneratorTy genBody, unsigned rank, + mlir::Type elementType, mlir::Location loc, bool hasMask, + mlir::Type maskElemType, mlir::Value resultArr) { + + mlir::IndexType idxTy = builder.getIndexType(); + + mlir::Block::BlockArgListType args = funcOp.front().getArguments(); + mlir::Value arg = args[1]; + + mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0); + + fir::SequenceType::Shape flatShape(rank, + fir::SequenceType::getUnknownExtent()); + mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType); + mlir::Type boxArrTy = fir::BoxType::get(arrTy); + mlir::Value array = builder.create(loc, boxArrTy, arg); + + mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType()); + mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1); + mlir::Value zero = builder.createIntegerConstant(loc, resultElemType, 0); + mlir::Value flagRef = builder.createTemporary(loc, resultElemType); + builder.create(loc, zero, flagRef); + + mlir::Value mask; + if (hasMask) { + mlir::Type maskTy = fir::SequenceType::get(flatShape, maskElemType); + mlir::Type boxMaskTy = fir::BoxType::get(maskTy); + mask = builder.create(loc, boxMaskTy, args[2]); + } + + mlir::Value init = initVal(builder, loc, elementType); + llvm::SmallVector bounds; + + assert(rank > 0 && "rank cannot be zero"); + mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); + + // Compute all the upper bounds before the loop nest. + // It is not strictly necessary for performance, since the loop nest + // does not have any store operations and any LICM optimization + // should be able to optimize the redundancy. + for (unsigned i = 0; i < rank; ++i) { + mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i); + auto dims = + builder.create(loc, idxTy, idxTy, idxTy, array, dimIdx); + mlir::Value len = dims.getResult(1); + // We use C indexing here, so len-1 as loopcount + mlir::Value loopCount = builder.create(loc, len, one); + bounds.push_back(loopCount); + } + // Create a loop nest consisting of OP operations. + // Collect the loops' induction variables into indices array, + // which will be used in the innermost loop to load the input + // array's element. + // The loops are generated such that the innermost loop processes + // the 0 dimension. + llvm::SmallVector indices; + for (unsigned i = rank; 0 < i; --i) { + mlir::Value step = one; + mlir::Value loopCount = bounds[i - 1]; + auto loop = + builder.create(loc, zeroIdx, loopCount, step, false, + /*finalCountValue=*/false, init); + init = loop.getRegionIterArgs()[0]; + indices.push_back(loop.getInductionVar()); + // Set insertion point to the loop body so that the next loop + // is inserted inside the current one. + builder.setInsertionPointToStart(loop.getBody()); + } + + // Reverse the indices such that they are ordered as: + // + std::reverse(indices.begin(), indices.end()); + // We are in the innermost loop: generate the reduction body. + if (hasMask) { + mlir::Type logicalRef = builder.getRefType(maskElemType); + mlir::Value maskAddr = + builder.create(loc, logicalRef, mask, indices); + mlir::Value maskElem = builder.create(loc, maskAddr); + + // fir::IfOp requires argument to be I1 - won't accept logical or any other + // Integer. + mlir::Type ifCompatType = builder.getI1Type(); + mlir::Value ifCompatElem = + builder.create(loc, ifCompatType, maskElem); + + llvm::SmallVector resultsTy = {elementType, elementType}; + fir::IfOp ifOp = builder.create(loc, elementType, ifCompatElem, + /*withElseRegion=*/true); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + } + + // Set flag that mask was true at some point + builder.create(loc, flagSet, flagRef); + mlir::Type eleRefTy = builder.getRefType(elementType); + mlir::Value addr = + builder.create(loc, eleRefTy, array, indices); + mlir::Value elem = builder.create(loc, addr); + + mlir::Value reductionVal = + genBody(builder, loc, elementType, elem, init, indices); + + if (hasMask) { + fir::IfOp ifOp = + mlir::dyn_cast(builder.getBlock()->getParentOp()); + builder.create(loc, reductionVal); + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + builder.create(loc, init); + reductionVal = ifOp.getResult(0); + builder.setInsertionPointAfter(ifOp); + } + + // Unwind the loop nest and insert ResultOp on each level + // to return the updated value of the reduction to the enclosing + // loops. + for (unsigned i = 0; i < rank; ++i) { + auto result = builder.create(loc, reductionVal); + // Proceed to the outer loop. + auto loop = mlir::cast(result->getParentOp()); + reductionVal = loop.getResult(0); + // Set insertion point after the loop operation that we have + // just processed. + builder.setInsertionPointAfter(loop.getOperation()); + } + // End of loop nest. The insertion point is after the outermost loop. + if (mlir::isa(builder.getBlock()->getParentOp())) { + builder.create(loc, reductionVal); + fir::IfOp ifOp = + mlir::dyn_cast(builder.getBlock()->getParentOp()); + builder.setInsertionPointAfter(ifOp); + // Redefine flagSet to escape scope of ifOp + flagSet = builder.createIntegerConstant(loc, resultElemType, 1); + reductionVal = ifOp.getResult(0); + } + + // Check for case where array was full of max values. + // flag will be 0 if mask was never true, 1 if mask was true as some point, + // this is needed to avoid catching cases where we didn't access any elements + // e.g. mask=.FALSE. + mlir::Value flagValue = + builder.create(loc, resultElemType, flagRef); + mlir::Value flagCmp = builder.create( + loc, mlir::arith::CmpIPredicate::eq, flagValue, flagSet); + fir::IfOp ifMaskTrueOp = + builder.create(loc, flagCmp, /*withElseRegion=*/false); + builder.setInsertionPointToStart(&ifMaskTrueOp.getThenRegion().front()); + + mlir::Value testInit = initVal(builder, loc, elementType); + fir::IfOp ifMinSetOp; + if (elementType.isa()) { + mlir::Value cmp = builder.create( + loc, mlir::arith::CmpFPredicate::OEQ, testInit, reductionVal); + ifMinSetOp = builder.create(loc, cmp, + /*withElseRegion*/ false); + } else { + mlir::Value cmp = builder.create( + loc, mlir::arith::CmpIPredicate::eq, testInit, reductionVal); + ifMinSetOp = builder.create(loc, cmp, + /*withElseRegion*/ false); + } + builder.setInsertionPointToStart(&ifMinSetOp.getThenRegion().front()); + + // Load output array with 1s instead of 0s + for (unsigned int i = 0; i < rank; ++i) { + mlir::Type resultRefTy = builder.getRefType(resultElemType); + // mlir::Value one = builder.createIntegerConstant(loc, resultElemType, 1); + mlir::Value index = builder.createIntegerConstant(loc, idxTy, i); + mlir::Value resultElemAddr = + builder.create(loc, resultRefTy, resultArr, index); + builder.create(loc, flagSet, resultElemAddr); + } + builder.setInsertionPointAfter(ifMaskTrueOp); + // Store newly created output array to the reference passed in + fir::SequenceType::Shape resultShape(1, rank); + mlir::Type outputArrTy = fir::SequenceType::get(resultShape, resultElemType); + mlir::Type outputHeapTy = fir::HeapType::get(outputArrTy); + mlir::Type outputBoxTy = fir::BoxType::get(outputHeapTy); + mlir::Type outputRefTy = builder.getRefType(outputBoxTy); + + mlir::Value outputArrNone = args[0]; + mlir::Value outputArr = + builder.create(loc, outputRefTy, outputArrNone); + + // Store nearly created array to output array + builder.create(loc, resultArr, outputArr); + builder.create(loc); +} static llvm::SmallVector nopLoopCond(fir::FirOpBuilder &builder, - mlir::Location, + mlir::Location loc, mlir::Value reductionVal) { return {reductionVal}; } @@ -441,6 +740,151 @@ loc); } +static mlir::FunctionType genRuntimeMinlocType(fir::FirOpBuilder &builder, + unsigned int rank) { + mlir::Type boxType = fir::BoxType::get(builder.getNoneType()); + mlir::Type boxRefType = builder.getRefType(boxType); + + return mlir::FunctionType::get(builder.getContext(), + {boxRefType, boxType, boxType}, {}); +} + +static void genRuntimeMinlocBody(fir::FirOpBuilder &builder, + mlir::func::FuncOp &funcOp, unsigned rank, + int maskRank, mlir::Type elementType, + mlir::Type maskElemType, + mlir::Type resultElemTy) { + auto init = [](fir::FirOpBuilder builder, mlir::Location loc, + mlir::Type elementType) { + if (auto ty = elementType.dyn_cast()) { + const llvm::fltSemantics &sem = ty.getFloatSemantics(); + return builder.createRealConstant( + loc, elementType, llvm::APFloat::getLargest(sem, /*Negative=*/false)); + } + unsigned bits = elementType.getIntOrFloatBitWidth(); + int64_t maxInt = llvm::APInt::getSignedMaxValue(bits).getSExtValue(); + return builder.createIntegerConstant(loc, elementType, maxInt); + }; + + mlir::Location loc = mlir::UnknownLoc::get(builder.getContext()); + builder.setInsertionPointToEnd(funcOp.addEntryBlock()); + + mlir::Value mask = funcOp.front().getArgument(2); + + // Set up result array in case of early exit / 0 length array + mlir::IndexType idxTy = builder.getIndexType(); + mlir::Type resultTy = fir::SequenceType::get(rank, resultElemTy); + mlir::Type resultHeapTy = fir::HeapType::get(resultTy); + mlir::Type resultBoxTy = fir::BoxType::get(resultHeapTy); + + mlir::Value returnValue = builder.createIntegerConstant(loc, resultElemTy, 0); + mlir::Value resultArrSize = builder.createIntegerConstant(loc, idxTy, rank); + + mlir::Value resultArrInit = builder.create(loc, resultTy); + mlir::Value resultArrShape = builder.create(loc, resultArrSize); + mlir::Value resultArr = builder.create( + loc, resultBoxTy, resultArrInit, resultArrShape); + + mlir::Type resultRefTy = builder.getRefType(resultElemTy); + + for (unsigned int i = 0; i < rank; ++i) { + mlir::Value index = builder.createIntegerConstant(loc, idxTy, i); + mlir::Value resultElemAddr = + builder.create(loc, resultRefTy, resultArr, index); + builder.create(loc, returnValue, resultElemAddr); + } + + auto genBodyOp = + [&rank, &resultArr]( + fir::FirOpBuilder builder, mlir::Location loc, mlir::Type elementType, + mlir::Value elem1, mlir::Value elem2, + llvm::SmallVector indices) + -> mlir::Value { + fir::IfOp ifOp; + + if (elementType.isa()) { + mlir::Value cmp = builder.create( + loc, mlir::arith::CmpFPredicate::OLT, elem1, elem2); + ifOp = builder.create(loc, elementType, cmp, + /*withElseRegion*/ true); + } else if (elementType.isa()) { + mlir::Value cmp = builder.create( + loc, mlir::arith::CmpIPredicate::slt, elem1, elem2); + ifOp = builder.create(loc, elementType, cmp, + /*withElseRegion*/ true); + } else { + llvm_unreachable("unsupported type"); + } + + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + mlir::Type resultElemTy = hlfir::getFortranElementType(resultArr.getType()); + mlir::Type returnRefTy = builder.getRefType(resultElemTy); + mlir::IndexType idxTy = builder.getIndexType(); + + mlir::Value one = builder.createIntegerConstant(loc, resultElemTy, 1); + + for (unsigned int i = 0; i < rank; ++i) { + mlir::Value index = builder.createIntegerConstant(loc, idxTy, i); + mlir::Value resultElemAddr = + builder.create(loc, returnRefTy, resultArr, index); + mlir::Value convert = + builder.create(loc, resultElemTy, indices[i]); + mlir::Value fortranIndex = + builder.create(loc, convert, one); + builder.create(loc, fortranIndex, resultElemAddr); + } + builder.create(loc, elem1); + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + builder.create(loc, elem2); + builder.setInsertionPointAfter(ifOp); + return ifOp.getResult(0); + }; + + bool hasMask = true; + // if mask is a logical scalar, we can check its value before the main loop + // and either ignore the fact it is there or exit early. + if (maskRank == 0) { + hasMask = false; + + mlir::Type logical = builder.getI1Type(); + mlir::IndexType idxTy = builder.getIndexType(); + + fir::SequenceType::Shape singleElement(1, 1); + mlir::Type arrTy = fir::SequenceType::get(singleElement, logical); + mlir::Type boxArrTy = fir::BoxType::get(arrTy); + mlir::Value array = builder.create(loc, boxArrTy, mask); + + mlir::Value indx = builder.createIntegerConstant(loc, idxTy, 0); + mlir::Type logicalRefTy = builder.getRefType(logical); + mlir::Value condAddr = + builder.create(loc, logicalRefTy, array, indx); + mlir::Value cond = builder.create(loc, condAddr); + + fir::IfOp ifOp = builder.create(loc, elementType, cond, + /*withElseRegion=*/true); + + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + mlir::Value basicValue; + if (elementType.isa()) { + basicValue = builder.createIntegerConstant(loc, elementType, 0); + } else { + basicValue = builder.createRealConstant(loc, elementType, 0); + } + builder.create(loc, basicValue); + + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + } + + // bit of a hack - maskRank is set to -1 for absent mask arg, so don't + // generate high level mask or element by element mask. + if (maskRank == -1) { + hasMask = false; + } + + genMinlocReductionLoop(builder, funcOp, init, genBodyOp, rank, elementType, + loc, hasMask, maskElemType, resultArr); +} + /// Generate function type for the simplified version of RTNAME(DotProduct) /// operating on the given \p elementType. static mlir::FunctionType genRuntimeDotType(fir::FirOpBuilder &builder, @@ -594,83 +1038,6 @@ return newFunc; } -fir::ConvertOp expectConvertOp(mlir::Value val) { - if (fir::ConvertOp op = - mlir::dyn_cast_or_null(val.getDefiningOp())) - return op; - LLVM_DEBUG(llvm::dbgs() << "Didn't find expected fir::ConvertOp\n"); - return nullptr; -} - -static bool isOperandAbsent(mlir::Value val) { - if (auto op = expectConvertOp(val)) { - assert(op->getOperands().size() != 0); - return mlir::isa_and_nonnull( - op->getOperand(0).getDefiningOp()); - } - return false; -} - -static bool isZero(mlir::Value val) { - if (auto op = expectConvertOp(val)) { - assert(op->getOperands().size() != 0); - if (mlir::Operation *defOp = op->getOperand(0).getDefiningOp()) - return mlir::matchPattern(defOp, mlir::m_Zero()); - } - return false; -} - -static mlir::Value findBoxDef(mlir::Value val) { - if (auto op = expectConvertOp(val)) { - assert(op->getOperands().size() != 0); - if (auto box = mlir::dyn_cast_or_null( - op->getOperand(0).getDefiningOp())) - return box.getResult(); - if (auto box = mlir::dyn_cast_or_null( - op->getOperand(0).getDefiningOp())) - return box.getResult(); - } - return {}; -} - -static unsigned getDimCount(mlir::Value val) { - // In order to find the dimensions count, we look for EmboxOp/ReboxOp - // and take the count from its *result* type. Note that in case - // of sliced emboxing the operand and the result of EmboxOp/ReboxOp - // have different types. - // Actually, we can take the box type from the operand of - // the first ConvertOp that has non-opaque box type that we meet - // going through the ConvertOp chain. - if (mlir::Value emboxVal = findBoxDef(val)) - if (auto boxTy = emboxVal.getType().dyn_cast()) - if (auto seqTy = boxTy.getEleTy().dyn_cast()) - return seqTy.getDimension(); - return 0; -} - -/// Given the call operation's box argument \p val, discover -/// the element type of the underlying array object. -/// \returns the element type or std::nullopt if the type cannot -/// be reliably found. -/// We expect that the argument is a result of fir.convert -/// with the destination type of !fir.box. -static std::optional getArgElementType(mlir::Value val) { - mlir::Operation *defOp; - do { - defOp = val.getDefiningOp(); - // Analyze only sequences of convert operations. - if (!mlir::isa(defOp)) - return std::nullopt; - val = defOp->getOperand(0); - // The convert operation is expected to convert from one - // box type to another box type. - auto boxType = val.getType().cast(); - auto elementType = fir::unwrapSeqOrBoxedSeqType(boxType); - if (!elementType.isa()) - return elementType; - } while (true); -} - void SimplifyIntrinsicsPass::simplifyIntOrFloatReduction( fir::CallOp call, const fir::KindMapping &kindMap, GenReductionBodyTy genBodyFunc) { @@ -741,8 +1108,7 @@ // Treating logicals as integers makes things a lot easier fir::LogicalType logicalType = {elementType.dyn_cast()}; fir::KindTy kind = logicalType.getFKind(); - mlir::Type intElementType = - mlir::IntegerType::get(builder.getContext(), kind * 8); + mlir::Type intElementType = builder.getIntegerType(kind * 8); // Mangle kind into function name as it is not done by default std::string funcName = @@ -777,8 +1143,7 @@ // Treating logicals as integers makes things a lot easier fir::LogicalType logicalType = {elementType.dyn_cast()}; fir::KindTy kind = logicalType.getFKind(); - mlir::Type intElementType = - mlir::IntegerType::get(builder.getContext(), kind * 8); + mlir::Type intElementType = builder.getIntegerType(kind * 8); // Mangle kind into function name as it is not done by default std::string funcName = @@ -790,6 +1155,87 @@ intElementType); } +void SimplifyIntrinsicsPass::simplifyMinlocReduction( + fir::CallOp call, const fir::KindMapping &kindMap) { + + mlir::Operation::operand_range args = call.getArgs(); + + mlir::Value back = args[6]; + if (isTrueOrNotConstant(back)) + return; + + mlir::Value mask = args[5]; + mlir::Value maskDef = findMaskDef(mask); + + // maskDef is set to NULL when its not a convertOp by expectConvertOp() + // This tends to be because it is set by a selectOp, in which case let the + // runtime deal with it. + if (maskDef == NULL) + return; + + mlir::SymbolRefAttr callee = call.getCalleeAttr(); + mlir::StringRef funcNameBase = callee.getLeafReference().getValue(); + unsigned rank = getDimCount(args[1]); + if (funcNameBase.ends_with("Dim") || !(rank > 0)) + return; + + fir::FirOpBuilder builder{getSimplificationBuilder(call, kindMap)}; + mlir::Location loc = call.getLoc(); + auto inputBox = findBoxDef(args[1]); + mlir::Type inputType = hlfir::getFortranElementType(inputBox.getType()); + + if (inputType.isa()) + return; + + int maskRank; + fir::KindTy kind = 0; + mlir::Type logicalConvertType = builder.getI1Type(); + if (isOperandAbsent(mask)) { + maskRank = -1; + } else { + maskRank = getDimCount(mask); + mlir::Type maskElemTy = hlfir::getFortranElementType(maskDef.getType()); + fir::LogicalType maskLogiTy = {maskElemTy.dyn_cast()}; + kind = maskLogiTy.getFKind(); + logicalConvertType = builder.getIntegerType(kind * 8); + } + + mlir::Operation *outputDef = args[0].getDefiningOp(); + mlir::Value outputAlloc = outputDef->getOperand(0); + mlir::Type outType = hlfir::getFortranElementType(outputAlloc.getType()); + + std::string fmfString{getFastMathFlagsString(builder)}; + std::string funcName = + (mlir::Twine{callee.getLeafReference().getValue(), "x"} + + mlir::Twine{rank} + + (maskRank >= 0 + ? "_Logical" + mlir::Twine{kind} + "x" + mlir::Twine{maskRank} + : "") + + "_") + .str(); + + llvm::raw_string_ostream nameOS(funcName); + outType.print(nameOS); + nameOS << '_' << fmfString; + + auto typeGenerator = [&rank](fir::FirOpBuilder &builder) { + return genRuntimeMinlocType(builder, rank); + }; + auto bodyGenerator = [&rank, &maskRank, &inputType, &logicalConvertType, + &outType](fir::FirOpBuilder &builder, + mlir::func::FuncOp &funcOp) { + genRuntimeMinlocBody(builder, funcOp, rank, maskRank, inputType, + logicalConvertType, outType); + }; + + mlir::func::FuncOp newFunc = + getOrCreateFunction(builder, funcName, typeGenerator, bodyGenerator); + builder.create(loc, newFunc, + mlir::ValueRange{args[0], args[1], args[5]}); + call->dropAllReferences(); + call->erase(); +} + void SimplifyIntrinsicsPass::simplifyReductionBody( fir::CallOp call, const fir::KindMapping &kindMap, GenReductionBodyTy genBodyFunc, fir::FirOpBuilder &builder, @@ -924,6 +1370,10 @@ simplifyLogicalDim1Reduction(call, kindMap, genRuntimeAllBody); return; } + if (funcName.startswith(RTNAME_STRING(Minloc))) { + simplifyMinlocReduction(call, kindMap); + return; + } } } }); diff --git a/flang/test/Transforms/simplifyintrinsics.fir b/flang/test/Transforms/simplifyintrinsics.fir --- a/flang/test/Transforms/simplifyintrinsics.fir +++ b/flang/test/Transforms/simplifyintrinsics.fir @@ -1636,3 +1636,666 @@ // CHECK-NOT fir.call @_FortranAAllDimLogical4x1_simplified({{.*}}) // CHECK: fir.call @_FortranAAllDim({{.*}}) fastmath : (!fir.ref>, !fir.box, i32, !fir.ref, i32) -> none // CHECK-NOT fir.call @_FortranAAllDimLogical4x1_simplified({{.*}}) + +// ----- +// Check Minloc simplifies correctly for 1D case with 1D mask, I32 input +func.func @_QPtestminloc_works1d(%arg0: !fir.ref> {fir.bindc_name = "a"}, %arg1: !fir.ref>> {fir.bindc_name = "b"}) -> !fir.array<1xi32> { + %0 = fir.alloca !fir.box>> + %c10 = arith.constant 10 : index + %c10_0 = arith.constant 10 : index + %c1 = arith.constant 1 : index + %1 = fir.alloca !fir.array<1xi32> {bindc_name = "testminloc_works1d", uniq_name = "_QFtestminloc_works1dEtestminloc_works1d"} + %2 = fir.shape %c1 : (index) -> !fir.shape<1> + %3 = fir.array_load %1(%2) : (!fir.ref>, !fir.shape<1>) -> !fir.array<1xi32> + %4 = fir.shape %c10 : (index) -> !fir.shape<1> + %5 = fir.embox %arg0(%4) : (!fir.ref>, !fir.shape<1>) -> !fir.box> + %6 = fir.shape %c10_0 : (index) -> !fir.shape<1> + %7 = fir.embox %arg1(%6) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> + %c4 = arith.constant 4 : index + %false = arith.constant false + %8 = fir.zero_bits !fir.heap> + %c0 = arith.constant 0 : index + %9 = fir.shape %c0 : (index) -> !fir.shape<1> + %10 = fir.embox %8(%9) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> + fir.store %10 to %0 : !fir.ref>>> + %11 = fir.address_of(@_QQcl.ea5bcf7f706678e1796661f8916f3379) : !fir.ref> + %c5_i32 = arith.constant 5 : i32 + %12 = fir.convert %0 : (!fir.ref>>>) -> !fir.ref> + %13 = fir.convert %5 : (!fir.box>) -> !fir.box + %14 = fir.convert %c4 : (index) -> i32 + %15 = fir.convert %11 : (!fir.ref>) -> !fir.ref + %16 = fir.convert %7 : (!fir.box>>) -> !fir.box + %17 = fir.call @_FortranAMinlocInteger4(%12, %13, %14, %15, %c5_i32, %16, %false) fastmath : (!fir.ref>, !fir.box, i32, !fir.ref, i32, !fir.box, i1) -> none + %18 = fir.load %0 : !fir.ref>>> + %c0_1 = arith.constant 0 : index + %19:3 = fir.box_dims %18, %c0_1 : (!fir.box>>, index) -> (index, index, index) + %20 = fir.box_addr %18 : (!fir.box>>) -> !fir.heap> + %21 = fir.shape_shift %19#0, %19#1 : (index, index) -> !fir.shapeshift<1> + %22 = fir.array_load %20(%21) : (!fir.heap>, !fir.shapeshift<1>) -> !fir.array + %c1_2 = arith.constant 1 : index + %c0_3 = arith.constant 0 : index + %23 = arith.subi %c1, %c1_2 : index + %24 = fir.do_loop %arg2 = %c0_3 to %23 step %c1_2 unordered iter_args(%arg3 = %3) -> (!fir.array<1xi32>) { + %26 = fir.array_fetch %22, %arg2 : (!fir.array, index) -> i32 + %27 = fir.array_update %arg3, %26, %arg2 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32> + fir.result %27 : !fir.array<1xi32> + } + fir.array_merge_store %3, %24 to %1 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref> + fir.freemem %20 : !fir.heap> + %25 = fir.load %1 : !fir.ref> + return %25 : !fir.array<1xi32> +} + +// CHECK-LABEL: func.func @_QPtestminloc_works1d( +// CHECK-SAME: %[[INARR:.*]]: !fir.ref> {fir.bindc_name = "a"}, +// CHECK-SAME: %[[MASK:.*]]: !fir.ref>> {fir.bindc_name = "b"}) -> !fir.array<1xi32> { +// CHECK: %[[OUTARR:.*]] = fir.alloca !fir.box>> +// CHECK: %[[SIZE10_0:.*]] = arith.constant 10 : index +// CHECK: %[[SIZE10_1:.*]] = arith.constant 10 : index +// CHECK: %[[INARR_SHAPE:.*]] = fir.shape %[[SIZE10_0]] : (index) -> !fir.shape<1> +// CHECK: %[[BOX_INARR:.*]] = fir.embox %[[INARR]](%[[INARR_SHAPE]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +// CHECK: %[[MASK_SHAPE:.*]] = fir.shape %[[SIZE10_1]] : (index) -> !fir.shape<1> +// CHECK: %[[BOX_MASK:.*]] = fir.embox %[[MASK]](%[[MASK_SHAPE]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> +// CHECK: %[[REF_BOX_OUTARR_NONE:.*]] = fir.convert %[[OUTARR]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[BOX_INARR_NONE:.*]] = fir.convert %[[BOX_INARR]] : (!fir.box>) -> !fir.box +// CHECK: %[[BOX_MASK_NONE:.*]] = fir.convert %[[BOX_MASK]] : (!fir.box>>) -> !fir.box +// CHECK: fir.call @_FortranAMinlocInteger4x1_Logical4x1_i32_contract_simplified(%[[REF_BOX_OUTARR_NONE]], %[[BOX_INARR_NONE]], %[[BOX_MASK_NONE]]) fastmath : (!fir.ref>, !fir.box, !fir.box) -> () + +// CHECK-LABEL: func.func private @_FortranAMinlocInteger4x1_Logical4x1_i32_contract_simplified( +// CHECK-SAME: %[[REF_BOX_OUTARR_NONE:.*]]: !fir.ref>, +// CHECK-SAME: %[[BOX_INARR_NONE:.*]]: !fir.box, +// CHECK-SAME: %[[BOX_MASK_NONE:.*]]: !fir.box) attributes {llvm.linkage = #llvm.linkage} { +// CHECK: %[[FLAG_ALLOC:.*]] = fir.alloca i32 +// CHECK: %[[INIT_OUT_IDX:.*]] = arith.constant 0 : i32 +// CHECK: %[[OUTARR_SIZE:.*]] = arith.constant 1 : index +// CHECK: %[[OUTARR:.*]] = fir.allocmem !fir.array<1xi32> +// CHECK: %[[OUTARR_SHAPE:.*]] = fir.shape %[[OUTARR_SIZE]] : (index) -> !fir.shape<1> +// CHECK: %[[BOX_OUTARR:.*]] = fir.embox %[[OUTARR]](%[[OUTARR_SHAPE]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> +// CHECK: %[[OUTARR_IDX0:.*]] = arith.constant 0 : index +// CHECK: %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box>>, index) -> !fir.ref +// CHECK: fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM0]] : !fir.ref +// CHECK: %[[CINDEX_0:.*]] = arith.constant 0 : index +// CHECK: %[[BOX_INARR:.*]] = fir.convert %[[BOX_INARR_NONE]] : (!fir.box) -> !fir.box> +// CHECK: %[[FLAG_SET:.*]] = arith.constant 1 : i32 +// CHECK: %[[FLAG_EMPTY:.*]] = arith.constant 0 : i32 +// CHECK: fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref +// CHECK: %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box) -> !fir.box> +// CHECK: %[[MAX:.*]] = arith.constant 2147483647 : i32 +// CHECK: %[[CINDEX_1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM_INDEX0:.*]] = arith.constant 0 : index +// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[BOX_INARR]], %[[DIM_INDEX0]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[CINDEX_1]] : index +// CHECK: %[[DOLOOP:.*]] = fir.do_loop %[[ITER:.*]] = %[[CINDEX_0]] to %[[EXTENT]] step %[[CINDEX_1]] iter_args(%[[MIN:.*]] = %[[MAX]]) -> (i32) { +// CHECK: %[[MASK_ITEM:.*]] = fir.coordinate_of %[[BOX_MASK]], %[[ITER]] : (!fir.box>, index) -> !fir.ref +// CHECK: %[[MASK_ITEMVAL:.*]] = fir.load %[[MASK_ITEM]] : !fir.ref +// CHECK: %[[MASK_IF_ITEM:.*]] = fir.convert %[[MASK_ITEMVAL]] : (i32) -> i1 +// CHECK: %[[IF_MASK:.*]] = fir.if %[[MASK_IF_ITEM]] -> (i32) { +// CHECK: fir.store %[[FLAG_SET]] to %[[FLAG_ALLOC]] : !fir.ref +// CHECK: %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box>, index) -> !fir.ref +// CHECK: %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref +// CHECK: %[[NEW_MIN:.*]] = arith.cmpi slt, %[[INARR_ITEMVAL]], %[[MIN]] : i32 +// CHECK: %[[IF_NEW_MIN:.*]] = fir.if %[[NEW_MIN]] -> (i32) { +// CHECK: %[[ONE:.*]] = arith.constant 1 : i32 +// CHECK: %[[OUTARR_IDX:.*]] = arith.constant 0 : index +// CHECK: %[[OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX]] : (!fir.box>>, index) -> !fir.ref +// CHECK: %[[ITER_I32:.*]] = fir.convert %[[ITER]] : (index) -> i32 +// CHECK: %[[FORTRAN_IDX:.*]] = arith.addi %[[ITER_I32]], %[[ONE]] : i32 +// CHECK: fir.store %[[FORTRAN_IDX]] to %[[OUTARR_ITEM]] : !fir.ref +// CHECK: fir.result %[[INARR_ITEMVAL]] : i32 +// CHECK: } else { +// CHECK: fir.result %[[MIN]] : i32 +// CHECK: } +// CHECK: fir.result %[[IF_NEW_MIN:.*]] : i32 +// CHECK: } else { +// CHECK: fir.result %[[MIN]] : i32 +// CHECK: } +// CHECK: fir.result %[[IF_MASK:.*]] : i32 +// CHECK: } +// CHECK: %[[FLAG_VAL:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref +// CHECK: %[[FLAG_WAS_SET:.*]] = arith.cmpi eq, %[[FLAG_VAL]], %[[FLAG_SET]] : i32 +// CHECK: fir.if %[[FLAG_WAS_SET]] { +// CHECK: %[[TEST_MAX:.*]] = arith.constant 2147483647 : i32 +// CHECK: %[[INIT_NOT_CHANGED:.*]] = arith.cmpi eq, %[[TEST_MAX]], %[[DO_LOOP:.*]] : i32 +// CHECK: fir.if %[[INIT_NOT_CHANGED]] { +// CHECK: %[[FLAG_OUTARR_IDX:.*]] = arith.constant 0 : index +// CHECK: %[[FLAG_OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX]] : (!fir.box>>, index) -> !fir.ref +// CHECK: fir.store %[[FLAG_SET]] to %[[FLAG_OUTARR_ITEM]] : !fir.ref +// CHECK: } +// CHECK: } +// CHECK: %[[REF_BOX_OUTARR:.*]] = fir.convert %[[REF_BOX_OUTARR_NONE]] : (!fir.ref>) -> !fir.ref>>> +// CHECK: fir.store %[[BOX_OUTARR]] to %[[REF_BOX_OUTARR]] : !fir.ref>>> +// CHECK: return +// CHECK: } + +// ----- +// Check Minloc simplifies correctly for 2D case with no mask and I64 Int as result + +func.func @_QPtestminloc_works2d_nomask(%arg0: !fir.ref> {fir.bindc_name = "a"}) -> !fir.array<2xi32> { + %0 = fir.alloca !fir.box>> + %c10 = arith.constant 10 : index + %c10_0 = arith.constant 10 : index + %c2 = arith.constant 2 : index + %1 = fir.alloca !fir.array<2xi32> {bindc_name = "testminloc_works2d_nomask", uniq_name = "_QFtestminloc_works2d_nomaskEtestminloc_works2d_nomask"} + %2 = fir.shape %c2 : (index) -> !fir.shape<1> + %3 = fir.array_load %1(%2) : (!fir.ref>, !fir.shape<1>) -> !fir.array<2xi32> + %4 = fir.shape %c10, %c10_0 : (index, index) -> !fir.shape<2> + %5 = fir.embox %arg0(%4) : (!fir.ref>, !fir.shape<2>) -> !fir.box> + %c8_i32 = arith.constant 8 : i32 + %6 = fir.absent !fir.box + %false = arith.constant false + %7 = fir.zero_bits !fir.heap> + %c0 = arith.constant 0 : index + %8 = fir.shape %c0 : (index) -> !fir.shape<1> + %9 = fir.embox %7(%8) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> + fir.store %9 to %0 : !fir.ref>>> + %10 = fir.address_of(@_QQcl.cba8b79c45ccae77d79d66a39ac99823) : !fir.ref> + %c4_i32 = arith.constant 4 : i32 + %11 = fir.convert %0 : (!fir.ref>>>) -> !fir.ref> + %12 = fir.convert %5 : (!fir.box>) -> !fir.box + %13 = fir.convert %10 : (!fir.ref>) -> !fir.ref + %14 = fir.convert %6 : (!fir.box) -> !fir.box + %15 = fir.call @_FortranAMinlocInteger4(%11, %12, %c8_i32, %13, %c4_i32, %14, %false) fastmath : (!fir.ref>, !fir.box, i32, !fir.ref, i32, !fir.box, i1) -> none + %16 = fir.load %0 : !fir.ref>>> + %c0_1 = arith.constant 0 : index + %17:3 = fir.box_dims %16, %c0_1 : (!fir.box>>, index) -> (index, index, index) + %18 = fir.box_addr %16 : (!fir.box>>) -> !fir.heap> + %19 = fir.shape_shift %17#0, %17#1 : (index, index) -> !fir.shapeshift<1> + %20 = fir.array_load %18(%19) : (!fir.heap>, !fir.shapeshift<1>) -> !fir.array + %c1 = arith.constant 1 : index + %c0_2 = arith.constant 0 : index + %21 = arith.subi %c2, %c1 : index + %22 = fir.do_loop %arg1 = %c0_2 to %21 step %c1 unordered iter_args(%arg2 = %3) -> (!fir.array<2xi32>) { + %24 = fir.array_fetch %20, %arg1 : (!fir.array, index) -> i64 + %25 = fir.convert %24 : (i64) -> i32 + %26 = fir.array_update %arg2, %25, %arg1 : (!fir.array<2xi32>, i32, index) -> !fir.array<2xi32> + fir.result %26 : !fir.array<2xi32> + } + fir.array_merge_store %3, %22 to %1 : !fir.array<2xi32>, !fir.array<2xi32>, !fir.ref> + fir.freemem %18 : !fir.heap> + %23 = fir.load %1 : !fir.ref> + return %23 : !fir.array<2xi32> +} + +// CHECK-LABEL: func.func @_QPtestminloc_works2d_nomask( +// CHECK-SAME: %[[INARR:.*]]: !fir.ref> {fir.bindc_name = "a"}) -> !fir.array<2xi32> { +// CHECK: %[[ABSENT_MASK:.*]] = fir.absent !fir.box +// CHECK: %[[ABSENT_MASK_NONE:.*]] = fir.convert %[[ABSENT_MASK]] : (!fir.box) -> !fir.box +// CHECK: fir.call @_FortranAMinlocInteger4x2_i64_contract_simplified(%{{.*}}, %{{.*}}, %[[ABSENT_MASK_NONE]]) fastmath : (!fir.ref>, !fir.box, !fir.box) -> () + +// CHECK-LABEL: func.func private @_FortranAMinlocInteger4x2_i64_contract_simplified( +// CHECK-SAME: %[[REF_BOX_OUTARR_NONE:.*]]: !fir.ref>, +// CHECK-SAME: %[[BOX_INARR_NONE:.*]]: !fir.box, +// CHECK-SAME: %[[BOX_MASK_NONE:.*]]: !fir.box) attributes {llvm.linkage = #llvm.linkage} { +// CHECK: %[[FLAG_ALLOC:.*]] = fir.alloca i64 +// CHECK: %[[INIT_OUT_IDX:.*]] = arith.constant 0 : i64 +// CHECK: %[[OUTARR_SIZE:.*]] = arith.constant 2 : index +// CHECK: %[[OUTARR:.*]] = fir.allocmem !fir.array<2xi64> +// CHECK: %[[OUTARR_SHAPE:.*]] = fir.shape %[[OUTARR_SIZE]] : (index) -> !fir.shape<1> +// CHECK: %[[BOX_OUTARR:.*]] = fir.embox %[[OUTARR]](%[[OUTARR_SHAPE]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> +// CHECK: %[[OUTARR_IDX0:.*]] = arith.constant 0 : index +// CHECK: %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box>>, index) -> !fir.ref +// CHECK: fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM0]] : !fir.ref +// CHECK: %[[OUTARR_IDX1:.*]] = arith.constant 1 : index +// CHECK: %[[OUTARR_ITEM1:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX1]] : (!fir.box>>, index) -> !fir.ref +// CHECK: fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM1]] : !fir.ref +// CHECK: %[[C_INDEX0:.*]] = arith.constant 0 : index +// CHECK: %[[BOX_INARR:.*]] = fir.convert %[[BOX_INARR_NONE]] : (!fir.box) -> !fir.box> +// CHECK: %[[FLAG_SET:.*]] = arith.constant 1 : i64 +// CHECK: %[[FLAG_EMPTY:.*]] = arith.constant 0 : i64 +// CHECK: fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref +// CHECK: %[[MAX:.*]] = arith.constant 2147483647 : i32 +// CHECK: %[[C_INDEX1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM_INDEX0:.*]] = arith.constant 0 : index +// CHECK: %[[DIMS0:.*]]:3 = fir.box_dims %[[BOX_INARR]], %[[DIM_INDEX0]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[EXTENT0:.*]] = arith.subi %[[DIMS0]]#1, %[[C_INDEX1]] : index +// CHECK: %[[DIM_INDEX1:.*]] = arith.constant 1 : index +// CHECK: %[[DIMS1:.*]]:3 = fir.box_dims %[[BOX_INARR]], %[[DIM_INDEX1]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[EXTENT1:.*]] = arith.subi %[[DIMS1]]#1, %[[C_INDEX1]] : index +// CHECK: %[[DOLOOP0:.*]] = fir.do_loop %[[ITER0:.*]] = %[[C_INDEX0]] to %[[EXTENT1]] step %[[C_INDEX1]] iter_args(%[[MIN0:.*]] = %[[MAX]]) -> (i32) { +// CHECK: %[[DOLOOP1:.*]] = fir.do_loop %[[ITER1:.*]] = %[[C_INDEX0]] to %[[EXTENT0]] step %[[C_INDEX1]] iter_args(%[[MIN1:.*]] = %[[MIN0]]) -> (i32) { +// CHECK: %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER1]], %[[ITER0]] : (!fir.box>, index, index) -> !fir.ref +// CHECK: %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref +// CHECK: %[[NEW_MIN:.*]] = arith.cmpi slt, %[[INARR_ITEMVAL]], %[[MIN1]] : i32 +// CHECK: %[[IF_NEW_MIN:.*]] = fir.if %[[NEW_MIN]] -> (i32) { +// CHECK: %[[ONE:.*]] = arith.constant 1 : i64 +// CHECK: %[[OUTARR_IDX0:.*]] = arith.constant 0 : index +// CHECK: %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box>>, index) -> !fir.ref +// CHECK: %[[ITER1_I64:.*]] = fir.convert %[[ITER1]] : (index) -> i64 +// CHECK: %[[FORTRAN_IDX1:.*]] = arith.addi %[[ITER1_I64]], %[[ONE]] : i64 +// CHECK: fir.store %[[FORTRAN_IDX1]] to %[[OUTARR_ITEM0]] : !fir.ref +// CHECK: %[[OUTARR_IDX1:.*]] = arith.constant 1 : index +// CHECK: %[[OUTARR_ITEM1:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX1]] : (!fir.box>>, index) -> !fir.ref +// CHECK: %[[ITER0_I64:.*]] = fir.convert %[[ITER0]] : (index) -> i64 +// CHECK: %[[FORTRAN_IDX0:.*]] = arith.addi %[[ITER0_I64]], %[[ONE]] : i64 +// CHECK: fir.store %[[FORTRAN_IDX0]] to %[[OUTARR_ITEM1]] : !fir.ref +// CHECK: fir.result %[[INARR_ITEMVAL]] : i32 +// CHECK: } else { +// CHECK: fir.result %[[MIN1]] : i32 +// CHECK: } +// CHECK: fir.result %[[IF_NEW_MIN:.*]] : i32 +// CHECK: } +// CHECK: fir.result %[[DOLOOP1:.*]] : i32 +// CHECK: } +// CHECK: %[[FLAG_VAL:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref +// CHECK: %[[FLAG_WAS_SET:.*]] = arith.cmpi eq, %[[FLAG_VAL]], %[[FLAG_SET]] : i64 +// CHECK: fir.if %[[FLAG_WAS_SET]] { +// CHECK: %[[TEST_MAX:.*]] = arith.constant 2147483647 : i32 +// CHECK: %[[INIT_NOT_CHANGED:.*]] = arith.cmpi eq, %[[TEST_MAX]], %[[DO_LOOP:.*]] : i32 +// CHECK: fir.if %[[INIT_NOT_CHANGED]] { +// CHECK: %[[FLAG_OUTARR_IDX0:.*]] = arith.constant 0 : index +// CHECK: %[[FLAG_OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX0]] : (!fir.box>>, index) -> !fir.ref +// CHECK: fir.store %[[FLAG_SET]] to %[[FLAG_OUTARR_ITEM0]] : !fir.ref +// CHECK: %[[FLAG_OUTARR_IDX1:.*]] = arith.constant 1 : index +// CHECK: %[[FLAG_OUTARR_ITEM1:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX1]] : (!fir.box>>, index) -> !fir.ref +// CHECK: fir.store %[[FLAG_SET]] to %[[FLAG_OUTARR_ITEM1]] : !fir.ref>) -> !fir.ref>>> +// CHECK: fir.store %[[BOX_OUTARR]] to %[[REF_BOX_OUTARR]] : !fir.ref>>> +// CHECK: return +// CHECK: } + +// ----- +// Check Minloc simplifies correctly for 1D case with scalar mask and f64 input + +func.func @_QPtestminloc_works1d_scalarmask_f64(%arg0: !fir.ref> {fir.bindc_name = "a"}, %arg1: !fir.ref> {fir.bindc_name = "b"}) -> !fir.array<1xi32> { + %0 = fir.alloca !fir.box>> + %c10 = arith.constant 10 : index + %c1 = arith.constant 1 : index + %1 = fir.alloca !fir.array<1xi32> {bindc_name = "testminloc_works1d_scalarmask_f64", uniq_name = "_QFtestminloc_works1d_scalarmask_f64Etestminloc_works1d_scalarmask_f64"} + %2 = fir.shape %c1 : (index) -> !fir.shape<1> + %3 = fir.array_load %1(%2) : (!fir.ref>, !fir.shape<1>) -> !fir.array<1xi32> + %4 = fir.shape %c10 : (index) -> !fir.shape<1> + %5 = fir.embox %arg0(%4) : (!fir.ref>, !fir.shape<1>) -> !fir.box> + %6 = fir.embox %arg1 : (!fir.ref>) -> !fir.box> + %c4 = arith.constant 4 : index + %false = arith.constant false + %7 = fir.zero_bits !fir.heap> + %c0 = arith.constant 0 : index + %8 = fir.shape %c0 : (index) -> !fir.shape<1> + %9 = fir.embox %7(%8) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> + fir.store %9 to %0 : !fir.ref>>> + %10 = fir.address_of(@_QQcl.66951c28c5b8bab5cdb25c1ac762b978) : !fir.ref> + %c6_i32 = arith.constant 6 : i32 + %11 = fir.convert %0 : (!fir.ref>>>) -> !fir.ref> + %12 = fir.convert %5 : (!fir.box>) -> !fir.box + %13 = fir.convert %c4 : (index) -> i32 + %14 = fir.convert %10 : (!fir.ref>) -> !fir.ref + %15 = fir.convert %6 : (!fir.box>) -> !fir.box + %16 = fir.call @_FortranAMinlocReal8(%11, %12, %13, %14, %c6_i32, %15, %false) fastmath : (!fir.ref>, !fir.box, i32, !fir.ref, i32, !fir.box, i1) -> none + %17 = fir.load %0 : !fir.ref>>> + %c0_0 = arith.constant 0 : index + %18:3 = fir.box_dims %17, %c0_0 : (!fir.box>>, index) -> (index, index, index) + %19 = fir.box_addr %17 : (!fir.box>>) -> !fir.heap> + %20 = fir.shape_shift %18#0, %18#1 : (index, index) -> !fir.shapeshift<1> + %21 = fir.array_load %19(%20) : (!fir.heap>, !fir.shapeshift<1>) -> !fir.array + %c1_1 = arith.constant 1 : index + %c0_2 = arith.constant 0 : index + %22 = arith.subi %c1, %c1_1 : index + %23 = fir.do_loop %arg2 = %c0_2 to %22 step %c1_1 unordered iter_args(%arg3 = %3) -> (!fir.array<1xi32>) { + %25 = fir.array_fetch %21, %arg2 : (!fir.array, index) -> i32 + %26 = fir.array_update %arg3, %25, %arg2 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32> + fir.result %26 : !fir.array<1xi32> + } + fir.array_merge_store %3, %23 to %1 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref> + fir.freemem %19 : !fir.heap> + %24 = fir.load %1 : !fir.ref> + return %24 : !fir.array<1xi32> +} + +// CHECK-LABEL: func.func @_QPtestminloc_works1d_scalarmask_f64( +// CHECK-SAME: %[[INARR:.*]]: !fir.ref> {fir.bindc_name = "a"}, +// CHECK-SAME: %[[MASK:.*]]: !fir.ref> {fir.bindc_name = "b"}) -> !fir.array<1xi32> { +// CHECK: fir.call @_FortranAMinlocReal8x1_Logical4x0_i32_contract_simplified({{.*}}, {{.*}}, {{.*}}) fastmath : (!fir.ref>, !fir.box, !fir.box) -> () + +// CHECK-LABEL: func.func private @_FortranAMinlocReal8x1_Logical4x0_i32_contract_simplified( +// CHECK-SAME: %[[REF_BOX_OUTARR_NONE:.*]]: !fir.ref>, +// CHECK-SAME: %[[BOX_INARR_NONE:.*]]: !fir.box, +// CHECK-SAME: %[[BOX_MASK_NONE:.*]]: !fir.box) attributes {llvm.linkage = #llvm.linkage} { +// CHECK: %[[FLAG_ALLOC:.*]] = fir.alloca i32 +// CHECK: %[[INIT_OUT_IDX:.*]] = arith.constant 0 : i32 +// CHECK: %[[OUTARR_SIZE:.*]] = arith.constant 1 : index +// CHECK: %[[OUTARR:.*]] = fir.allocmem !fir.array<1xi32> +// CHECK: %[[OUTARR_SHAPE:.*]] = fir.shape %[[OUTARR_SIZE]] : (index) -> !fir.shape<1> +// CHECK: %[[BOX_OUTARR:.*]] = fir.embox %[[OUTARR]](%[[OUTARR_SHAPE]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> +// CHECK: %[[OUTARR_IDX0:.*]] = arith.constant 0 : index +// CHECK: %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box>>, index) -> !fir.ref +// CHECK: fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM0]] : !fir.ref +// CHECK: %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box) -> !fir.box> +// CHECK: %[[MASK_IDX0:.*]] = arith.constant 0 : index +// CHECK: %[[MASK_ITEM:.*]] = fir.coordinate_of %[[BOX_MASK]], %[[MASK_IDX0]] : (!fir.box>, index) -> !fir.ref +// CHECK: %[[MASK:.*]] = fir.load %[[MASK_ITEM]] : !fir.ref +// CHECK: %[[INIT_RES:.*]] = fir.if %[[MASK]] -> (f64) { +// CHECK: %[[C_INDEX0:.*]] = arith.constant 0 : index +// CHECK: %[[BOX_INARR:.*]] = fir.convert %[[BOX_INARR_NONE]] : (!fir.box) -> !fir.box> +// CHECK: %[[FLAG_SET:.*]] = arith.constant 1 : i32 +// CHECK: %[[FLAG_EMPTY:.*]] = arith.constant 0 : i32 +// CHECK: fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref +// CHECK: %[[MAX:.*]] = arith.constant 1.7976931348623157E+308 : f64 +// CHECK: %[[C_INDEX1:.*]] = arith.constant 1 : index +// CHECK: %[[DIM_INDEX:.*]] = arith.constant 0 : index +// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[BOX_INARR]], %[[DIM_INDEX]] : (!fir.box>, index) -> (index, index, index) +// CHECK: %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[C_INDEX1]] : index +// CHECK: %[[DOLOOP:.*]] = fir.do_loop %[[ITER:.*]] = %[[C_INDEX0]] to %[[EXTENT]] step %[[C_INDEX1]] iter_args(%[[MIN:.*]] = %[[MAX]]) -> (f64) { +// CHECK: %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box>, index) -> !fir.ref +// CHECK: %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref +// CHECK: %[[NEW_MIN:.*]] = arith.cmpf olt, %[[INARR_ITEMVAL]], %[[MIN]] : f64 +// CHECK: %[[IF_NEW_MIN:.*]] = fir.if %[[NEW_MIN]] -> (f64) { +// CHECK: %[[ONE:.*]] = arith.constant 1 : i32 +// CHECK: %[[OUTARR_IDX:.*]] = arith.constant 0 : index +// CHECK: %[[OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX]] : (!fir.box>>, index) -> !fir.ref +// CHECK: %[[ITER_I32:.*]] = fir.convert %[[ITER]] : (index) -> i32 +// CHECK: %[[FORTRAN_IDX:.*]] = arith.addi %[[ITER_I32]], %[[ONE]] : i32 +// CHECK: fir.store %[[FORTRAN_IDX]] to %[[OUTARR_ITEM]] : !fir.ref +// CHECK: fir.result %[[INARR_ITEMVAL]] : f64 +// CHECK: } else { +// CHECK: fir.result %[[MIN]] : f64 +// CHECK: } +// CHECK: fir.result %[[IF_NEW_MIN:.*]] : f64 +// CHECK: } +// CHECK: } +// CHECK: %[[FLAG_CHECK:.*]] = arith.constant 1 : i32 +// CHECK: %[[FLAG_VAL:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref +// CHECK: %[[FLAG_WAS_SET:.*]] = arith.cmpi eq, %[[FLAG_VAL]], %[[FLAG_CHECK]] : i32 +// CHECK: fir.if %[[FLAG_WAS_SET]] { +// CHECK: %[[TEST_MAX:.*]] = arith.constant 1.7976931348623157E+308 : f64 +// CHECK: %[[INIT_NOT_CHANGED:.*]] = arith.cmpf oeq, %[[TEST_MAX]], %[[INIT_RES:.*]] : f64 +// CHECK: fir.if %[[INIT_NOT_CHANGED]] { +// CHECK: %[[FLAG_OUTARR_IDX:.*]] = arith.constant 0 : index +// CHECK: %[[FLAG_OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX]] : (!fir.box>>, index) -> !fir.ref +// CHECK: fir.store %[[FLAG_CHECK]] to %[[FLAG_OUTARR_ITEM]] : !fir.ref +// CHECK: } +// CHECK: } +// CHECK: %[[REF_BOX_OUTARR:.*]] = fir.convert %[[VAL_0]] : (!fir.ref>) -> !fir.ref>>> +// CHECK: fir.store %[[BOX_OUTARR]] to %[[REF_BOX_OUTARR]] : !fir.ref>>> +// CHECK: return +// CHECK: } + +// ----- +// Check Minloc is not simplified when BACK arg is set + +func.func @_QPtestminloc_doesntwork1d_back(%arg0: !fir.ref> {fir.bindc_name = "a"}) -> !fir.array<1xi32> { + %0 = fir.alloca !fir.box>> + %c10 = arith.constant 10 : index + %c1 = arith.constant 1 : index + %1 = fir.alloca !fir.array<1xi32> {bindc_name = "testminloc_doesntwork1d_back", uniq_name = "_QFtestminloc_doesntwork1d_backEtestminloc_doesntwork1d_back"} + %2 = fir.shape %c1 : (index) -> !fir.shape<1> + %3 = fir.array_load %1(%2) : (!fir.ref>, !fir.shape<1>) -> !fir.array<1xi32> + %4 = fir.shape %c10 : (index) -> !fir.shape<1> + %5 = fir.embox %arg0(%4) : (!fir.ref>, !fir.shape<1>) -> !fir.box> + %true = arith.constant true + %6 = fir.absent !fir.box + %c4 = arith.constant 4 : index + %7 = fir.zero_bits !fir.heap> + %c0 = arith.constant 0 : index + %8 = fir.shape %c0 : (index) -> !fir.shape<1> + %9 = fir.embox %7(%8) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> + fir.store %9 to %0 : !fir.ref>>> + %10 = fir.address_of(@_QQcl.3791f01d699716ba5914ae524c6a8dee) : !fir.ref> + %c4_i32 = arith.constant 4 : i32 + %11 = fir.convert %0 : (!fir.ref>>>) -> !fir.ref> + %12 = fir.convert %5 : (!fir.box>) -> !fir.box + %13 = fir.convert %c4 : (index) -> i32 + %14 = fir.convert %10 : (!fir.ref>) -> !fir.ref + %15 = fir.convert %6 : (!fir.box) -> !fir.box + %16 = fir.call @_FortranAMinlocInteger4(%11, %12, %13, %14, %c4_i32, %15, %true) fastmath : (!fir.ref>, !fir.box, i32, !fir.ref, i32, !fir.box, i1) -> none + %17 = fir.load %0 : !fir.ref>>> + %c0_0 = arith.constant 0 : index + %18:3 = fir.box_dims %17, %c0_0 : (!fir.box>>, index) -> (index, index, index) + %19 = fir.box_addr %17 : (!fir.box>>) -> !fir.heap> + %20 = fir.shape_shift %18#0, %18#1 : (index, index) -> !fir.shapeshift<1> + %21 = fir.array_load %19(%20) : (!fir.heap>, !fir.shapeshift<1>) -> !fir.array + %c1_1 = arith.constant 1 : index + %c0_2 = arith.constant 0 : index + %22 = arith.subi %c1, %c1_1 : index + %23 = fir.do_loop %arg1 = %c0_2 to %22 step %c1_1 unordered iter_args(%arg2 = %3) -> (!fir.array<1xi32>) { + %25 = fir.array_fetch %21, %arg1 : (!fir.array, index) -> i32 + %26 = fir.array_update %arg2, %25, %arg1 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32> + fir.result %26 : !fir.array<1xi32> + } + fir.array_merge_store %3, %23 to %1 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref> + fir.freemem %19 : !fir.heap> + %24 = fir.load %1 : !fir.ref> + return %24 : !fir.array<1xi32> +} + +// CHECK-LABEL: func.func @_QPtestminloc_doesntwork1d_back( +// CHECK-SAME: %[[ARR:.*]]: !fir.ref> {fir.bindc_name = "a"}) -> !fir.array<1xi32> { +// CHECK-NOT: fir.call @_FortranAMinlocInteger4x1_i32_contract_simplified({{.*}}) fastmath : (!fir.ref>, !fir.box, !fir.box) -> () +// CHECK: fir.call @_FortranAMinlocInteger4({{.*}}) fastmath : (!fir.ref>, !fir.box, i32, !fir.ref, i32, !fir.box, i1) -> none +// CHECK-NOT: fir.call @_FortranAMinlocInteger4x1_i32_contract_simplified({{.*}}) fastmath : (!fir.ref>, !fir.box, !fir.box) -> () + +// ----- +// Check Minloc is not simplified when DIM arg is set + +func.func @_QPtestminloc_doesntwork1d_dim(%arg0: !fir.ref> {fir.bindc_name = "a"}) -> !fir.array<1xi32> { + %0 = fir.alloca !fir.box> + %c10 = arith.constant 10 : index + %c1 = arith.constant 1 : index + %1 = fir.alloca !fir.array<1xi32> {bindc_name = "testminloc_doesntwork1d_dim", uniq_name = "_QFtestminloc_doesntwork1d_dimEtestminloc_doesntwork1d_dim"} + %2 = fir.shape %c1 : (index) -> !fir.shape<1> + %3 = fir.array_load %1(%2) : (!fir.ref>, !fir.shape<1>) -> !fir.array<1xi32> + %4 = fir.shape %c10 : (index) -> !fir.shape<1> + %5 = fir.embox %arg0(%4) : (!fir.ref>, !fir.shape<1>) -> !fir.box> + %c1_i32 = arith.constant 1 : i32 + %6 = fir.absent !fir.box + %c4 = arith.constant 4 : index + %false = arith.constant false + %7 = fir.zero_bits !fir.heap + %8 = fir.embox %7 : (!fir.heap) -> !fir.box> + fir.store %8 to %0 : !fir.ref>> + %9 = fir.address_of(@_QQcl.cfcf4329f25d06a4b02a0c8f532ee9df) : !fir.ref> + %c4_i32 = arith.constant 4 : i32 + %10 = fir.convert %0 : (!fir.ref>>) -> !fir.ref> + %11 = fir.convert %5 : (!fir.box>) -> !fir.box + %12 = fir.convert %c4 : (index) -> i32 + %13 = fir.convert %9 : (!fir.ref>) -> !fir.ref + %14 = fir.convert %6 : (!fir.box) -> !fir.box + %15 = fir.call @_FortranAMinlocDim(%10, %11, %12, %c1_i32, %13, %c4_i32, %14, %false) fastmath : (!fir.ref>, !fir.box, i32, i32, !fir.ref, i32, !fir.box, i1) -> none + %16 = fir.load %0 : !fir.ref>> + %17 = fir.box_addr %16 : (!fir.box>) -> !fir.heap + %18 = fir.load %17 : !fir.heap + fir.freemem %17 : !fir.heap + %c1_0 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %19 = arith.subi %c1, %c1_0 : index + %20 = fir.do_loop %arg1 = %c0 to %19 step %c1_0 unordered iter_args(%arg2 = %3) -> (!fir.array<1xi32>) { + %22 = fir.array_update %arg2, %18, %arg1 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32> + fir.result %22 : !fir.array<1xi32> + } + fir.array_merge_store %3, %20 to %1 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref> + %21 = fir.load %1 : !fir.ref> + return %21 : !fir.array<1xi32> +} +// CHECK-LABEL: func.func @_QPtestminloc_doesntwork1d_dim( +// CHECK-SAME: %[[ARR:.*]]: !fir.ref> {fir.bindc_name = "a"}) -> !fir.array<1xi32> { +// CHECK-NOT: fir.call @_FortranAMinlocDimx1_i32_contract_simplified({{.*}}) fastmath : (!fir.ref>, !fir.box, !fir.box) -> () +// CHECK: fir.call @_FortranAMinlocDim({{.*}}) fastmath : (!fir.ref>, !fir.box, i32, i32, !fir.ref, i32, !fir.box, i1) -> none +// CHECK-NOT: fir.call @_FortranAMinlocDimx1_i32_contract_simplified({{.*}}) fastmath : (!fir.ref>, !fir.box, !fir.box) -> () + +// ----- +// Check Minloc is not simplified when dimension of inputArr is unknown + +func.func @_QPtestminloc_doesntwork1d_unknownsize(%arg0: !fir.box> {fir.bindc_name = "a"}) -> !fir.array<1xi32> { + %0 = fir.alloca !fir.box>> + %c1 = arith.constant 1 : index + %1 = fir.alloca !fir.array<1xi32> {bindc_name = "testminloc_doesntwork1d_unknownsize", uniq_name = "_QFtestminloc_doesntwork1d_unknownsizeEtestminloc_doesntwork1d_unknownsize"} + %2 = fir.shape %c1 : (index) -> !fir.shape<1> + %3 = fir.array_load %1(%2) : (!fir.ref>, !fir.shape<1>) -> !fir.array<1xi32> + %4 = fir.absent !fir.box + %c4 = arith.constant 4 : index + %false = arith.constant false + %5 = fir.zero_bits !fir.heap> + %c0 = arith.constant 0 : index + %6 = fir.shape %c0 : (index) -> !fir.shape<1> + %7 = fir.embox %5(%6) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> + fir.store %7 to %0 : !fir.ref>>> + %8 = fir.address_of(@_QQcl.2064f5e9298c2127417d52b69eac898e) : !fir.ref> + %c4_i32 = arith.constant 4 : i32 + %9 = fir.convert %0 : (!fir.ref>>>) -> !fir.ref> + %10 = fir.convert %arg0 : (!fir.box>) -> !fir.box + %11 = fir.convert %c4 : (index) -> i32 + %12 = fir.convert %8 : (!fir.ref>) -> !fir.ref + %13 = fir.convert %4 : (!fir.box) -> !fir.box + %14 = fir.call @_FortranAMinlocInteger4(%9, %10, %11, %12, %c4_i32, %13, %false) fastmath : (!fir.ref>, !fir.box, i32, !fir.ref, i32, !fir.box, i1) -> none + %15 = fir.load %0 : !fir.ref>>> + %c0_0 = arith.constant 0 : index + %16:3 = fir.box_dims %15, %c0_0 : (!fir.box>>, index) -> (index, index, index) + %17 = fir.box_addr %15 : (!fir.box>>) -> !fir.heap> + %18 = fir.shape_shift %16#0, %16#1 : (index, index) -> !fir.shapeshift<1> + %19 = fir.array_load %17(%18) : (!fir.heap>, !fir.shapeshift<1>) -> !fir.array + %c1_1 = arith.constant 1 : index + %c0_2 = arith.constant 0 : index + %20 = arith.subi %c1, %c1_1 : index + %21 = fir.do_loop %arg1 = %c0_2 to %20 step %c1_1 unordered iter_args(%arg2 = %3) -> (!fir.array<1xi32>) { + %23 = fir.array_fetch %19, %arg1 : (!fir.array, index) -> i32 + %24 = fir.array_update %arg2, %23, %arg1 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32> + fir.result %24 : !fir.array<1xi32> + } + fir.array_merge_store %3, %21 to %1 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref> + fir.freemem %17 : !fir.heap> + %22 = fir.load %1 : !fir.ref> + return %22 : !fir.array<1xi32> +} +// CHECK-LABEL: func.func @_QPtestminloc_doesntwork1d_unknownsize( +// CHECK-SAME: %[[ARR:.*]]: !fir.box> {fir.bindc_name = "a"}) -> !fir.array<1xi32> { +// CHECK-NOT: fir.call @_FortranAMinlocInteger4x1_i32_contract_simplified({{.*}}) fastmath : (!fir.ref>, !fir.box, !fir.box) -> () +// CHECK: fir.call @_FortranAMinlocInteger4({{.*}}) fastmath : (!fir.ref>, !fir.box, i32, !fir.ref, i32, !fir.box, i1) -> none +// CHECK-NOT: fir.call @_FortranAMinlocInteger4x1_i32_contract_simplified({{.*}}) fastmath : (!fir.ref>, !fir.box, !fir.box) -> () + +// ----- +// Check Minloc is not simplified when inputArr is characterType + +func.func @_QPtestminloc_doesntwork1d_chars(%arg0: !fir.boxchar<1> {fir.bindc_name = "a"}) -> !fir.array<1xi32> { + %0 = fir.alloca !fir.box>> + %1:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref>, index) + %2 = fir.convert %1#0 : (!fir.ref>) -> !fir.ref>> + %c10 = arith.constant 10 : index + %c1 = arith.constant 1 : index + %3 = fir.alloca !fir.array<1xi32> {bindc_name = "testminloc_doesntwork1d_chars", uniq_name = "_QFtestminloc_doesntwork1d_charsEtestminloc_doesntwork1d_chars"} + %4 = fir.shape %c1 : (index) -> !fir.shape<1> + %5 = fir.array_load %3(%4) : (!fir.ref>, !fir.shape<1>) -> !fir.array<1xi32> + %6 = fir.shape %c10 : (index) -> !fir.shape<1> + %7 = fir.embox %2(%6) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> + %8 = fir.absent !fir.box + %c4 = arith.constant 4 : index + %false = arith.constant false + %9 = fir.zero_bits !fir.heap> + %c0 = arith.constant 0 : index + %10 = fir.shape %c0 : (index) -> !fir.shape<1> + %11 = fir.embox %9(%10) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> + fir.store %11 to %0 : !fir.ref>>> + %12 = fir.address_of(@_QQcl.74460ff3ef22ea53671c22344e1556b9) : !fir.ref> + %c4_i32 = arith.constant 4 : i32 + %13 = fir.convert %0 : (!fir.ref>>>) -> !fir.ref> + %14 = fir.convert %7 : (!fir.box>>) -> !fir.box + %15 = fir.convert %c4 : (index) -> i32 + %16 = fir.convert %12 : (!fir.ref>) -> !fir.ref + %17 = fir.convert %8 : (!fir.box) -> !fir.box + %18 = fir.call @_FortranAMinlocCharacter(%13, %14, %15, %16, %c4_i32, %17, %false) fastmath : (!fir.ref>, !fir.box, i32, !fir.ref, i32, !fir.box, i1) -> none + %19 = fir.load %0 : !fir.ref>>> + %c0_0 = arith.constant 0 : index + %20:3 = fir.box_dims %19, %c0_0 : (!fir.box>>, index) -> (index, index, index) + %21 = fir.box_addr %19 : (!fir.box>>) -> !fir.heap> + %22 = fir.shape_shift %20#0, %20#1 : (index, index) -> !fir.shapeshift<1> + %23 = fir.array_load %21(%22) : (!fir.heap>, !fir.shapeshift<1>) -> !fir.array + %c1_1 = arith.constant 1 : index + %c0_2 = arith.constant 0 : index + %24 = arith.subi %c1, %c1_1 : index + %25 = fir.do_loop %arg1 = %c0_2 to %24 step %c1_1 unordered iter_args(%arg2 = %5) -> (!fir.array<1xi32>) { + %27 = fir.array_fetch %23, %arg1 : (!fir.array, index) -> i32 + %28 = fir.array_update %arg2, %27, %arg1 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32> + fir.result %28 : !fir.array<1xi32> + } + fir.array_merge_store %5, %25 to %3 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref> + fir.freemem %21 : !fir.heap> + %26 = fir.load %3 : !fir.ref> + return %26 : !fir.array<1xi32> +} + +// CHECK-LABEL: func.func @_QPtestminloc_doesntwork1d_chars( +// CHECK-SAME: %[[ARR:.*]]: !fir.boxchar<1> {fir.bindc_name = "a"}) -> !fir.array<1xi32> { +// CHECK-NOT: fir.call @_FortranAMinlocCharacterx1_i32_contract_simplified({{.*}}) fastmath : (!fir.ref>, !fir.box, !fir.box) -> () +// CHECK: fir.call @_FortranAMinlocCharacter({{.*}}) fastmath : (!fir.ref>, !fir.box, i32, !fir.ref, i32, !fir.box, i1) -> none +// CHECK-NOT: fir.call @_FortranAMinlocCharacterx1_i32_contract_simplified({{.*}}) fastmath : (!fir.ref>, !fir.box, !fir.box) -> () + +// ----- +// Check Minloc is not simplified when mask is unknown rank + +func.func @_QPtestminloc_doesntwork1d_unknownmask(%arg0: !fir.ref> {fir.bindc_name = "a"}) -> !fir.array<1xi32> { + %0 = fir.alloca !fir.box>> + %c10 = arith.constant 10 : index + %1 = fir.alloca i32 {bindc_name = "b", uniq_name = "_QFtestminloc_doesntwork1d_unknownmaskEb"} + %2 = fir.alloca !fir.box>>> {bindc_name = "mask", uniq_name = "_QFtestminloc_doesntwork1d_unknownmaskEmask"} + %3 = fir.alloca !fir.heap>> {uniq_name = "_QFtestminloc_doesntwork1d_unknownmaskEmask.addr"} + %4 = fir.alloca index {uniq_name = "_QFtestminloc_doesntwork1d_unknownmaskEmask.lb0"} + %5 = fir.alloca index {uniq_name = "_QFtestminloc_doesntwork1d_unknownmaskEmask.ext0"} + %6 = fir.zero_bits !fir.heap>> + fir.store %6 to %3 : !fir.ref>>> + %c1 = arith.constant 1 : index + %7 = fir.alloca !fir.array<1xi32> {bindc_name = "testminloc_doesntwork1d_unknownmask", uniq_name = "_QFtestminloc_doesntwork1d_unknownmaskEtestminloc_doesntwork1d_unknownmask"} + %8 = fir.load %1 : !fir.ref + %9 = fir.convert %8 : (i32) -> index + %c0 = arith.constant 0 : index + %10 = arith.cmpi sgt, %9, %c0 : index + %11 = arith.select %10, %9, %c0 : index + %12 = fir.allocmem !fir.array>, %11 {fir.must_be_heap = true, uniq_name = "_QFtestminloc_doesntwork1d_unknownmaskEmask.alloc"} + fir.store %12 to %3 : !fir.ref>>> + fir.store %11 to %5 : !fir.ref + %c1_0 = arith.constant 1 : index + fir.store %c1_0 to %4 : !fir.ref + %13 = fir.shape %c1 : (index) -> !fir.shape<1> + %14 = fir.array_load %7(%13) : (!fir.ref>, !fir.shape<1>) -> !fir.array<1xi32> + %15 = fir.shape %c10 : (index) -> !fir.shape<1> + %16 = fir.embox %arg0(%15) : (!fir.ref>, !fir.shape<1>) -> !fir.box> + %17 = fir.load %3 : !fir.ref>>> + %18 = fir.convert %17 : (!fir.heap>>) -> i64 + %c0_i64 = arith.constant 0 : i64 + %19 = arith.cmpi ne, %18, %c0_i64 : i64 + %20 = fir.load %4 : !fir.ref + %21 = fir.load %5 : !fir.ref + %22 = fir.load %3 : !fir.ref>>> + %23 = fir.shape_shift %20, %21 : (index, index) -> !fir.shapeshift<1> + %24 = fir.embox %22(%23) : (!fir.heap>>, !fir.shapeshift<1>) -> !fir.box>> + %25 = fir.absent !fir.box>> + %26 = arith.select %19, %24, %25 : !fir.box>> + %c4 = arith.constant 4 : index + %false = arith.constant false + %27 = fir.zero_bits !fir.heap> + %c0_1 = arith.constant 0 : index + %28 = fir.shape %c0_1 : (index) -> !fir.shape<1> + %29 = fir.embox %27(%28) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> + fir.store %29 to %0 : !fir.ref>>> + %30 = fir.address_of(@_QQcl.74460ff3ef22ea53671c22344e1556b9) : !fir.ref> + %c7_i32 = arith.constant 7 : i32 + %31 = fir.convert %0 : (!fir.ref>>>) -> !fir.ref> + %32 = fir.convert %16 : (!fir.box>) -> !fir.box + %33 = fir.convert %c4 : (index) -> i32 + %34 = fir.convert %30 : (!fir.ref>) -> !fir.ref + %35 = fir.convert %26 : (!fir.box>>) -> !fir.box + %36 = fir.call @_FortranAMinlocInteger4(%31, %32, %33, %34, %c7_i32, %35, %false) fastmath : (!fir.ref>, !fir.box, i32, !fir.ref, i32, !fir.box, i1) -> none + %37 = fir.load %0 : !fir.ref>>> + %c0_2 = arith.constant 0 : index + %38:3 = fir.box_dims %37, %c0_2 : (!fir.box>>, index) -> (index, index, index) + %39 = fir.box_addr %37 : (!fir.box>>) -> !fir.heap> + %40 = fir.shape_shift %38#0, %38#1 : (index, index) -> !fir.shapeshift<1> + %41 = fir.array_load %39(%40) : (!fir.heap>, !fir.shapeshift<1>) -> !fir.array + %c1_3 = arith.constant 1 : index + %c0_4 = arith.constant 0 : index + %42 = arith.subi %c1, %c1_3 : index + %43 = fir.do_loop %arg1 = %c0_4 to %42 step %c1_3 unordered iter_args(%arg2 = %14) -> (!fir.array<1xi32>) { + %45 = fir.array_fetch %41, %arg1 : (!fir.array, index) -> i32 + %46 = fir.array_update %arg2, %45, %arg1 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32> + fir.result %46 : !fir.array<1xi32> + } + fir.array_merge_store %14, %43 to %7 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref> + fir.freemem %39 : !fir.heap> + %44 = fir.load %7 : !fir.ref> + return %44 : !fir.array<1xi32> +} + +// CHECK-LABEL: func.func @_QPtestminloc_doesntwork1d_unknownmask( +// CHECK-SAME: %[[ARR:.*]]: !fir.ref> {fir.bindc_name = "a"}) -> !fir.array<1xi32> { +// CHECK-NOT: fir.call @_FortranAMinlocInteger4x1_i32_contract_simplified({{.*}}) fastmath : (!fir.ref>, !fir.box, !fir.box) -> () +// CHECK: fir.call @_FortranAMinlocInteger4({{.*}}) fastmath : (!fir.ref>, !fir.box, i32, !fir.ref, i32, !fir.box, i1) -> none +// CHECK-NOT: fir.call @_FortranAMinlocInteger4x1_i32_contract_simplified({{.*}}) fastmath : (!fir.ref>, !fir.box, !fir.box) -> ()