diff --git a/flang/include/flang/Optimizer/CodeGen/CGOps.td b/flang/include/flang/Optimizer/CodeGen/CGOps.td --- a/flang/include/flang/Optimizer/CodeGen/CGOps.td +++ b/flang/include/flang/Optimizer/CodeGen/CGOps.td @@ -127,6 +127,14 @@ unsigned getRank(); // The rank of the result box unsigned getOutRank(); + + unsigned shapeOffset() { return 1; } + unsigned shiftOffset() { return shapeOffset() + shape().size(); } + unsigned sliceOffset() { return shiftOffset() + shift().size(); } + unsigned subcomponentOffset() { return sliceOffset() + slice().size(); } + unsigned substrOffset() { + return subcomponentOffset() + subcomponent().size(); + } }]; } diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "flang/Optimizer/CodeGen/CodeGen.h" +#include "CGOps.h" #include "PassDetail.h" #include "flang/ISO_Fortran_binding.h" #include "flang/Optimizer/Dialect/FIRAttr.h" @@ -187,6 +188,12 @@ return type; } + // Return LLVM type of the base address given the LLVM type + // of the related descriptor (lowered fir.box type). + static mlir::Type getBaseAddrTypeFromBox(mlir::Type type) { + return getBoxEleTy(type, {0}); + } + template mlir::LLVM::GEPOp genGEP(mlir::Location loc, mlir::Type ty, mlir::ConversionPatternRewriter &rewriter, @@ -1448,6 +1455,22 @@ indexesAttr); } + mlir::Value insertLowerBound(mlir::ConversionPatternRewriter &rewriter, + mlir::Location loc, mlir::Value dest, + unsigned dim, mlir::Value lb) const { + return insertField(rewriter, loc, dest, {7, dim, 0}, lb); + } + mlir::Value insertExtent(mlir::ConversionPatternRewriter &rewriter, + mlir::Location loc, mlir::Value dest, unsigned dim, + mlir::Value extent) const { + return insertField(rewriter, loc, dest, {7, dim, 1}, extent); + } + mlir::Value insertStride(mlir::ConversionPatternRewriter &rewriter, + mlir::Location loc, mlir::Value dest, unsigned dim, + mlir::Value stride) const { + return insertField(rewriter, loc, dest, {7, dim, 2}, stride); + } + inline mlir::Value insertBaseAddress(mlir::ConversionPatternRewriter &rewriter, mlir::Location loc, mlir::Value dest, @@ -1543,6 +1566,25 @@ return {boxTy, descriptor, eleSize}; } + /// Compute the base address of a substring given the base address of a scalar + /// string and the zero based string lower bound. + mlir::Value shiftSubstringBase(mlir::ConversionPatternRewriter &rewriter, + mlir::Location loc, mlir::Value base, + mlir::Value lowerBound) const { + llvm::SmallVector gepOperands; + auto baseType = + base.getType().cast().getElementType(); + if (baseType.isa()) { + auto idxTy = this->lowerTy().indexType(); + mlir::Value zero = genConstantIndex(loc, idxTy, rewriter, 0); + gepOperands.push_back(zero); + gepOperands.push_back(lowerBound); + } else { + gepOperands.push_back(lowerBound); + } + return this->genGEP(loc, base.getType(), rewriter, base, gepOperands); + } + /// If the embox is not in a globalOp body, allocate storage for the box; /// store the value inside and return the generated alloca. Return the input /// value otherwise. @@ -1559,6 +1601,22 @@ } }; +/// Compute the extent of a triplet slice (lb:ub:step). +static mlir::Value +computeTripletExtent(mlir::ConversionPatternRewriter &rewriter, + mlir::Location loc, mlir::Value lb, mlir::Value ub, + mlir::Value step, mlir::Value zero, mlir::Type type) { + // auto type = ub.getType(); + mlir::Value extent = rewriter.create(loc, type, ub, lb); + extent = rewriter.create(loc, type, extent, step); + extent = rewriter.create(loc, type, extent, step); + // If the resulting extent is negative (`ub-lb` and `step` have different + // signs), zero must be returned instead. + auto cmp = rewriter.create( + loc, mlir::LLVM::ICmpPredicate::sgt, extent, zero); + return rewriter.create(loc, cmp, extent, zero); +} + /// Create a generic box on a memory reference. This conversions lowers the /// abstract box to the appropriate, initialized descriptor. struct EmboxOpConversion : public EmboxCommonConversion { @@ -1596,6 +1654,220 @@ } }; +/// Create a new box given a box reference. +struct XReboxOpConversion : public EmboxCommonConversion { + using EmboxCommonConversion::EmboxCommonConversion; + + mlir::LogicalResult + matchAndRewrite(fir::cg::XReboxOp rebox, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + auto loc = rebox.getLoc(); + auto idxTy = lowerTy().indexType(); + auto loweredBox = adaptor.getOperands()[0]; + auto operands = adaptor.getOperands(); + + // Create new descriptor and fill its non-shape related data. + llvm::SmallVector lenParams; + auto inputEleTy = getInputEleTy(rebox); + if (auto charTy = inputEleTy.dyn_cast()) { + auto len = loadElementSizeFromBox(loc, idxTy, loweredBox, rewriter); + if (charTy.getFKind() != 1) { + auto width = genConstantIndex(loc, idxTy, rewriter, charTy.getFKind()); + len = rewriter.create(loc, idxTy, len, width); + } + lenParams.emplace_back(len); + } else if (auto recTy = inputEleTy.dyn_cast()) { + if (recTy.getNumLenParams() != 0) + TODO(loc, "reboxing descriptor of derived type with length parameters"); + } + auto [boxTy, dest, eleSize] = + consDescriptorPrefix(rebox, rewriter, rebox.getOutRank(), lenParams); + + // Read input extents, strides, and base address + llvm::SmallVector inputExtents; + llvm::SmallVector inputStrides; + const auto inputRank = rebox.getRank(); + for (unsigned i = 0; i < inputRank; ++i) { + auto dim = genConstantIndex(loc, idxTy, rewriter, i); + auto dimInfo = + getDimsFromBox(loc, {idxTy, idxTy, idxTy}, loweredBox, dim, rewriter); + inputExtents.emplace_back(dimInfo[1]); + inputStrides.emplace_back(dimInfo[2]); + } + + auto baseTy = getBaseAddrTypeFromBox(loweredBox.getType()); + auto baseAddr = loadBaseAddrFromBox(loc, baseTy, loweredBox, rewriter); + + if (!rebox.slice().empty() || !rebox.subcomponent().empty()) + return sliceBox(rebox, dest, baseAddr, inputExtents, inputStrides, + operands, rewriter); + return reshapeBox(rebox, dest, baseAddr, inputExtents, inputStrides, + operands, rewriter); + } + +private: + inline mlir::Type getVoidPtrType(mlir::MLIRContext *context) const { + return mlir::LLVM::LLVMPointerType::get(mlir::IntegerType::get(context, 8)); + } + + /// Write resulting shape and base address in descriptor, and replace rebox + /// op. + mlir::LogicalResult + finalizeRebox(fir::cg::XReboxOp rebox, mlir::Value dest, mlir::Value base, + mlir::ValueRange lbounds, mlir::ValueRange extents, + mlir::ValueRange strides, + mlir::ConversionPatternRewriter &rewriter) const { + auto loc = rebox.getLoc(); + auto one = genConstantIndex(loc, lowerTy().indexType(), rewriter, 1); + llvm::outs() << "extents = " << extents.size() + << ", strides = " << strides.size() << "\n"; + for (auto iter : llvm::enumerate(llvm::zip(extents, strides))) { + auto dim = iter.index(); + auto lb = lbounds.empty() ? one : lbounds[dim]; + dest = insertLowerBound(rewriter, loc, dest, dim, lb); + dest = insertExtent(rewriter, loc, dest, dim, std::get<0>(iter.value())); + dest = insertStride(rewriter, loc, dest, dim, std::get<1>(iter.value())); + } + dest = insertBaseAddress(rewriter, loc, dest, base); + auto result = placeInMemoryIfNotGlobalInit(rewriter, rebox.getLoc(), dest); + rewriter.replaceOp(rebox, result); + return success(); + } + + // Apply slice given the base address, extents and strides of the input box. + mlir::LogicalResult + sliceBox(fir::cg::XReboxOp rebox, mlir::Value dest, mlir::Value base, + mlir::ValueRange inputExtents, mlir::ValueRange inputStrides, + mlir::ValueRange operands, + mlir::ConversionPatternRewriter &rewriter) const { + auto loc = rebox.getLoc(); + auto voidPtrTy = getVoidPtrType(rebox.getContext()); + auto idxTy = lowerTy().indexType(); + mlir::Value zero = genConstantIndex(loc, idxTy, rewriter, 0); + // Apply subcomponent and substring shift on base address. + if (!rebox.subcomponent().empty() || !rebox.substr().empty()) { + // Cast to inputEleTy* so that a GEP can be used. + auto inputEleTy = getInputEleTy(rebox); + auto llvmElePtrTy = + mlir::LLVM::LLVMPointerType::get(convertType(inputEleTy)); + base = rewriter.create(loc, llvmElePtrTy, base); + + if (!rebox.subcomponent().empty()) { + llvm::SmallVector gepOperands = {zero}; + for (unsigned i = 0; i < rebox.subcomponent().size(); ++i) + gepOperands.push_back(operands[rebox.subcomponentOffset() + i]); + base = genGEP(loc, llvmElePtrTy, rewriter, base, gepOperands); + } + if (!rebox.substr().empty()) + base = shiftSubstringBase(rewriter, loc, base, + operands[rebox.substrOffset()]); + } + + if (rebox.slice().empty()) + // The array section is of the form array[%component][substring], keep + // the input array extents and strides. + return finalizeRebox(rebox, dest, base, /*lbounds*/ llvm::None, + inputExtents, inputStrides, rewriter); + + // Strides from the fir.box are in bytes. + base = rewriter.create(loc, voidPtrTy, base); + + // The slice is of the form array(i:j:k)[%component]. Compute new extents + // and strides. + llvm::SmallVector slicedExtents; + llvm::SmallVector slicedStrides; + auto one = genConstantIndex(loc, idxTy, rewriter, 1); + const bool sliceHasOrigins = !rebox.shift().empty(); + auto sliceOps = rebox.sliceOffset(); + auto shiftOps = rebox.shiftOffset(); + auto strideOps = inputStrides.begin(); + const auto inputRank = inputStrides.size(); + for (unsigned i = 0; i < inputRank; + ++i, ++strideOps, ++shiftOps, sliceOps += 3) { + auto sliceLb = integerCast(loc, rewriter, idxTy, operands[sliceOps]); + auto inputStride = *strideOps; // already idxTy + // Apply origin shift: base += (lb-shift)*input_stride + auto sliceOrigin = sliceHasOrigins ? integerCast(loc, rewriter, idxTy, + operands[shiftOps]) + : one; + auto diff = + rewriter.create(loc, idxTy, sliceLb, sliceOrigin); + auto offset = + rewriter.create(loc, idxTy, diff, inputStride); + base = genGEP(loc, voidPtrTy, rewriter, base, offset); + // Apply upper bound and step if this is a triplet. Otherwise, the + // dimension is dropped and no extents/strides are computed. + mlir::Value upper = operands[sliceOps + 1]; + const bool isTripletSlice = + !mlir::isa_and_nonnull(upper.getDefiningOp()); + if (isTripletSlice) { + auto step = integerCast(loc, rewriter, idxTy, operands[sliceOps + 2]); + // extent = ub-lb+step/step + auto sliceUb = integerCast(loc, rewriter, idxTy, upper); + auto extent = computeTripletExtent(rewriter, loc, sliceLb, sliceUb, + step, zero, idxTy); + slicedExtents.emplace_back(extent); + // stride = step*input_stride + auto stride = + rewriter.create(loc, idxTy, step, inputStride); + slicedStrides.emplace_back(stride); + } + } + return finalizeRebox(rebox, dest, base, /*lbounds*/ llvm::None, + slicedExtents, slicedStrides, rewriter); + } + + /// Apply a new shape to the data described by a box given the base address, + /// extents and strides of the box. + mlir::LogicalResult + reshapeBox(fir::cg::XReboxOp rebox, mlir::Value dest, mlir::Value base, + mlir::ValueRange inputExtents, mlir::ValueRange inputStrides, + mlir::ValueRange operands, + mlir::ConversionPatternRewriter &rewriter) const { + mlir::ValueRange reboxShifts{operands.begin() + rebox.shiftOffset(), + operands.begin() + rebox.shiftOffset() + + rebox.shift().size()}; + if (rebox.shape().empty()) { + // Only setting new lower bounds. + return finalizeRebox(rebox, dest, base, reboxShifts, inputExtents, + inputStrides, rewriter); + } + + auto loc = rebox.getLoc(); + // Strides from the fir.box are in bytes. + auto voidPtrTy = getVoidPtrType(rebox.getContext()); + base = rewriter.create(loc, voidPtrTy, base); + + llvm::SmallVector newStrides; + llvm::SmallVector newExtents; + auto idxTy = lowerTy().indexType(); + // First stride from input box is kept. The rest is assumed contiguous + // (it is not possible to reshape otherwise). If the input is scalar, + // which may be OK if all new extents are ones, the stride does not + // matter, use one. + auto stride = inputStrides.empty() + ? genConstantIndex(loc, idxTy, rewriter, 1) + : inputStrides[0]; + for (unsigned i = 0; i < rebox.shape().size(); ++i) { + auto rawExtent = operands[rebox.shapeOffset() + i]; + auto extent = integerCast(loc, rewriter, idxTy, rawExtent); + newExtents.emplace_back(extent); + newStrides.emplace_back(stride); + // nextStride = extent * stride; + stride = rewriter.create(loc, idxTy, extent, stride); + } + return finalizeRebox(rebox, dest, base, reboxShifts, newExtents, newStrides, + rewriter); + } + + /// Return scalar element type of the input box. + static mlir::Type getInputEleTy(fir::cg::XReboxOp rebox) { + auto ty = fir::dyn_cast_ptrOrBoxEleTy(rebox.box().getType()); + if (auto seqTy = ty.dyn_cast()) + return seqTy.getEleTy(); + return ty; + } +}; // Code shared between insert_value and extract_value Ops. struct ValueOpCommon { @@ -2200,8 +2472,8 @@ ShapeOpConversion, ShapeShiftOpConversion, ShiftOpConversion, SliceOpConversion, StoreOpConversion, StringLitOpConversion, SubcOpConversion, UnboxCharOpConversion, UnboxProcOpConversion, - UndefOpConversion, UnreachableOpConversion, ZeroOpConversion>( - typeConverter); + UndefOpConversion, UnreachableOpConversion, XReboxOpConversion, + ZeroOpConversion>(typeConverter); mlir::populateStdToLLVMConversionPatterns(typeConverter, pattern); mlir::arith::populateArithmeticToLLVMConversionPatterns(typeConverter, pattern); diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -1589,3 +1589,94 @@ // CHECK: %[[LOAD:.*]] = llvm.load %[[ARG0]] : !llvm.ptr // CHECK: llvm.store %[[LOAD]], %[[ALLOC]] : !llvm.ptr // CHECK: llvm.return + + +// ----- + +// Check `fircg.ext_rebox` conversion to LLVM IR dialect + +// Test applying slice on fir.box. Note that the slice is 1D where as the array is 2D. +// subroutine foo(x) +// real :: x(3:, 4:) +// call bar(x(5, 6:80:3)) +// end subroutine + +func private @bar1(!fir.box>) +func @test_rebox_1(%arg0: !fir.box>) { + %c2 = arith.constant 2 : index + %c3 = arith.constant 3 : index + %c4 = arith.constant 4 : index + %c5 = arith.constant 5 : index + %c6 = arith.constant 6 : index + %c80 = arith.constant 80 : index + %0 = fir.undefined index + %3 = fircg.ext_rebox %arg0 origin %c3, %c4[%c5, %0, %0, %c6, %c80, %c3] : (!fir.box>, index, index, index, index, index, index, index, index) -> !fir.box> + fir.call @bar1(%3) : (!fir.box>) -> () + return +} +//CHECK-LABEL: llvm.func @bar1 +//CHECK-LABEL: llvm.func @test_rebox_1 +//CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>> +//CHECK: %[[ONE_1:.*]] = llvm.mlir.constant(1 : i32) : i32 +//CHECK: %[[RESULT_BOX_REF:.*]] = llvm.alloca %[[ONE_1]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> +//CHECK: %[[THREE:.*]] = llvm.mlir.constant(3 : index) : i64 +//CHECK: %[[FOUR:.*]] = llvm.mlir.constant(4 : index) : i64 +//CHECK: %[[FIVE:.*]] = llvm.mlir.constant(5 : index) : i64 +//CHECK: %[[SIX:.*]] = llvm.mlir.constant(6 : index) : i64 +//CHECK: %[[EIGHTY:.*]] = llvm.mlir.constant(80 : index) : i64 +//CHECK: %[[RBOX:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +//CHECK: %[[ELEM_SIZE:.*]] = llvm.mlir.constant(4 : i32) : i32 +//CHECK: %[[FLOAT_TYPE:.*]] = llvm.mlir.constant(25 : i32) : i32 +//CHECK: %[[ELEM_SIZE_I64:.*]] = llvm.sext %[[ELEM_SIZE]] : i32 to i64 +//CHECK: %[[RBOX_TMP1:.*]] = llvm.insertvalue %[[ELEM_SIZE_I64]], %[[RBOX]][1 : i32] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +//CHECK: %[[CFI_VERSION:.*]] = llvm.mlir.constant(20180515 : i32) : i32 +//CHECK: %[[RBOX_TMP2:.*]] = llvm.insertvalue %[[CFI_VERSION]], %[[RBOX_TMP1]][2 : i32] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +//CHECK: %[[RANK:.*]] = llvm.mlir.constant(1 : i32) : i32 +//CHECK: %[[RANK_I8:.*]] = llvm.trunc %[[RANK]] : i32 to i8 +//CHECK: %[[RBOX_TMP3:.*]] = llvm.insertvalue %[[RANK_I8]], %[[RBOX_TMP2]][3 : i32] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +//CHECK: %[[FLOAT_TYPE_I8:.*]] = llvm.trunc %[[FLOAT_TYPE]] : i32 to i8 +//CHECK: %[[RBOX_TMP4:.*]] = llvm.insertvalue %[[FLOAT_TYPE_I8]], %[[RBOX_TMP3]][4 : i32] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +//CHECK: %[[OTHER_ATTR:.*]] = llvm.mlir.constant(0 : i32) : i32 +//CHECK: %[[OTHER_ATTR_I8:.*]] = llvm.trunc %[[OTHER_ATTR]] : i32 to i8 +//CHECK: %[[RBOX_TMP5:.*]] = llvm.insertvalue %[[OTHER_ATTR_I8]], %[[RBOX_TMP4]][5 : i32] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +//CHECK: %[[ADDENDUM:.*]] = llvm.mlir.constant(0 : i32) : i32 +//CHECK: %[[ADDENDUM_I8:.*]] = llvm.trunc %[[ADDENDUM]] : i32 to i8 +//CHECK: %[[RBOX_TMP6:.*]] = llvm.insertvalue %[[ADDENDUM_I8]], %[[RBOX_TMP5]][6 : i32] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +//CHECK: %[[DIM1:.*]] = llvm.mlir.constant(0 : i64) : i64 +//CHECK: %[[GEP_ZERO_1:.*]] = llvm.mlir.constant(0 : i32) : i32 +//CHECK: %[[DIM_IDX_1:.*]] = llvm.mlir.constant(7 : i32) : i32 +//CHECK: %[[LB1_IDX:.*]] = llvm.mlir.constant(2 : i32) : i32 +//CHECK: %[[DIM1_STRIDE_REF:.*]] = llvm.getelementptr %[[ARG0]][%[[GEP_ZERO_1]], %[[DIM_IDX_1]], %[[DIM1]], %[[LB1_IDX]]] : (!llvm.ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>>, i32, i32, i64, i32) -> !llvm.ptr +//CHECK: %[[DIM1_STRIDE:.*]] = llvm.load %[[DIM1_STRIDE_REF]] : !llvm.ptr +//CHECK: %[[DIM2:.*]] = llvm.mlir.constant(1 : i64) : i64 +//CHECK: %[[GEP_ZERO_2:.*]] = llvm.mlir.constant(0 : i32) : i32 +//CHECK: %[[DIM_IDX_2:.*]] = llvm.mlir.constant(7 : i32) : i32 +//CHECK: %[[STRIDE2_IDX:.*]] = llvm.mlir.constant(2 : i32) : i32 +//CHECK: %[[DIM2_STRIDE_REF:.*]] = llvm.getelementptr %[[ARG0]][%[[GEP_ZERO_2]], %[[DIM_IDX_2]], %[[DIM2]], %[[STRIDE2_IDX]]] : (!llvm.ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>>, i32, i32, i64, i32) -> !llvm.ptr +//CHECK: %[[DIM2_STRIDE:.*]] = llvm.load %[[DIM2_STRIDE_REF]] : !llvm.ptr +//CHECK: %[[ZERO_1:.*]] = llvm.mlir.constant(0 : i32) : i32 +//CHECK: %[[ZERO_2:.*]] = llvm.mlir.constant(0 : i32) : i32 +//CHECK: %[[SOURCE_ARRAY_PTR:.*]] = llvm.getelementptr %[[ARG0]][%[[ZERO_1]], %[[ZERO_2]]] : (!llvm.ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>>, i32, i32) -> !llvm.ptr> +//CHECK: %[[SOURCE_ARRAY:.*]] = llvm.load %[[SOURCE_ARRAY_PTR]] : !llvm.ptr> +//CHECK: %[[ZERO_ELEMS:.*]] = llvm.mlir.constant(0 : i64) : i64 +//CHECK: %[[SOURCE_ARRAY_I8PTR:.*]] = llvm.bitcast %[[SOURCE_ARRAY]] : !llvm.ptr to !llvm.ptr +//CHECK: %[[DIM1_LB_DIFF:.*]] = llvm.sub %[[FIVE]], %[[THREE]] : i64 +//CHECK: %[[DIM1_LB_OFFSET:.*]] = llvm.mul %[[DIM1_LB_DIFF]], %[[DIM1_STRIDE]] : i64 +//CHECK: %[[RESULT_PTR_DIM1:.*]] = llvm.getelementptr %[[SOURCE_ARRAY_I8PTR]][%[[DIM1_LB_OFFSET]]] : (!llvm.ptr, i64) -> !llvm.ptr +//CHECK: %[[DIM2_LB_DIFF:.*]] = llvm.sub %[[SIX]], %[[FOUR]] : i64 +//CHECK: %[[DIM2_LB_OFFSET:.*]] = llvm.mul %[[DIM2_LB_DIFF]], %[[DIM2_STRIDE]] : i64 +//CHECK: %[[RESULT_PTR_I8:.*]] = llvm.getelementptr %[[RESULT_PTR_DIM1]][%[[DIM2_LB_OFFSET]]] : (!llvm.ptr, i64) -> !llvm.ptr +//CHECK: %[[RESULT_UB_LB_DIFF:.*]] = llvm.sub %[[EIGHTY]], %[[SIX]] : i64 +//CHECK: %[[RESULT_UB_LB_DIFF_PLUS_STRIDE:.*]] = llvm.add %[[RESULT_UB_LB_DIFF]], %[[THREE]] : i64 +//CHECK: %[[RESULT_NELEMS_TMP:.*]] = llvm.sdiv %[[RESULT_UB_LB_DIFF_PLUS_STRIDE]], %[[THREE]] : i64 +//CHECK: %[[RESULT_IF_NON_ZERO:.*]] = llvm.icmp "sgt" %[[RESULT_NELEMS_TMP]], %[[ZERO_ELEMS]] : i64 +//CHECK: %[[RESULT_NELEMS:.*]] = llvm.select %[[RESULT_IF_NON_ZERO]], %[[RESULT_NELEMS_TMP]], %[[ZERO_ELEMS]] : i1, i64 +//CHECK: %[[RESULT_STRIDE:.*]] = llvm.mul %[[THREE]], %[[DIM2_STRIDE]] : i64 +//CHECK: %[[RESULT_LB:.*]] = llvm.mlir.constant(1 : i64) : i64 +//CHECK: %[[RBOX_TMP7_1:.*]] = llvm.insertvalue %[[RESULT_LB]], %[[RBOX_TMP6]][7 : i32, 0 : i32, 0 : i32] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +//CHECK: %[[RBOX_TMP7_2:.*]] = llvm.insertvalue %[[RESULT_NELEMS]], %[[RBOX_TMP7_1]][7 : i32, 0 : i32, 1 : i32] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +//CHECK: %[[RBOX_TMP7_3:.*]] = llvm.insertvalue %[[RESULT_STRIDE]], %[[RBOX_TMP7_2]][7 : i32, 0 : i32, 2 : i32] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +//CHECK: %[[RESULT_PTR_F32:.*]] = llvm.bitcast %[[RESULT_PTR_I8]] : !llvm.ptr to !llvm.ptr +//CHECK: %[[RESULT_BOX:.*]] = llvm.insertvalue %[[RESULT_PTR_F32]], %[[RBOX_TMP7_3]][0 : i32] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> +//CHECK: llvm.store %[[RESULT_BOX]], %[[RESULT_BOX_REF]] : !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> +//CHECK: llvm.call @bar1(%[[RESULT_BOX_REF]]) : (!llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>>) -> ()