diff --git a/flang/include/flang/Optimizer/CodeGen/CGOps.td b/flang/include/flang/Optimizer/CodeGen/CGOps.td --- a/flang/include/flang/Optimizer/CodeGen/CGOps.td +++ b/flang/include/flang/Optimizer/CodeGen/CGOps.td @@ -176,6 +176,16 @@ let extraClassDeclaration = [{ unsigned getRank(); + + // Shape is optional, but if it exists, it will be at offset 1. + unsigned shapeOffset() { return 1; } + unsigned shiftOffset() { return shapeOffset() + shape().size(); } + unsigned sliceOffset() { return shiftOffset() + shift().size(); } + unsigned subcomponentOffset() { return sliceOffset() + slice().size(); } + unsigned indicesOffset() { + return subcomponentOffset() + subcomponent().size(); + } + unsigned lenParamsOffset() { return indicesOffset() + indices().size(); } }]; } diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "flang/Optimizer/CodeGen/CodeGen.h" +#include "CGOps.h" #include "PassDetail.h" #include "flang/ISO_Fortran_binding.h" #include "flang/Optimizer/Dialect/FIRAttr.h" @@ -61,6 +62,11 @@ return lowerTy().convertType(ty); } + mlir::Type getVoidPtrType() const { + return mlir::LLVM::LLVMPointerType::get( + mlir::IntegerType::get(&lowerTy().getContext(), 8)); + } + mlir::LLVM::ConstantOp genConstantOffset(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, @@ -113,6 +119,16 @@ return rewriter.create(loc, ty, p); } + mlir::Value + loadStrideFromBox(mlir::Location loc, mlir::Value box, unsigned dim, + mlir::ConversionPatternRewriter &rewriter) const { + auto idxTy = lowerTy().indexType(); + auto c0 = genConstantOffset(loc, rewriter, 0); + auto cDims = genConstantOffset(loc, rewriter, kDimsPosInBox); + auto dimValue = genConstantIndex(loc, idxTy, rewriter, dim); + return loadFromOffset(loc, box, c0, cDims, dimValue, 2, idxTy, rewriter); + } + /// Read base address from a fir.box. Returned address has type ty. mlir::Value loadBaseAddrFromBox(mlir::Location loc, mlir::Type ty, mlir::Value box, @@ -153,6 +169,34 @@ loc, mlir::LLVM::ICmpPredicate::ne, maskRes, c0); } + // Get the element type given an LLVM type that is of the form + // [llvm.ptr](llvm.array|llvm.struct)+ and the provided indexes. + static mlir::Type getBoxEleTy(mlir::Type type, + llvm::ArrayRef indexes) { + if (auto t = type.dyn_cast()) + type = t.getElementType(); + for (auto i : indexes) { + if (auto t = type.dyn_cast()) { + assert(!t.isOpaque() && i < t.getBody().size()); + type = t.getBody()[i]; + } else if (auto t = type.dyn_cast()) { + type = t.getElementType(); + } else if (auto t = type.dyn_cast()) { + type = t.getElementType(); + } else { + fir::emitFatalError(mlir::UnknownLoc::get(type.getContext()), + "request for invalid box element type"); + } + } + return type; + } + + // Return LLVM type of the base address given the LLVM type + // of the related descriptor (lowered fir.box type). + static mlir::Type getBaseAddrTypeFromBox(mlir::Type type) { + return getBoxEleTy(type, {0}); + } + template mlir::LLVM::GEPOp genGEP(mlir::Location loc, mlir::Type ty, mlir::ConversionPatternRewriter &rewriter, @@ -1327,6 +1371,165 @@ } }; +/// XArrayCoor is the address arithmetic on a dynamically shaped, sliced, +/// shifted etc. array. +/// (See the static restriction on coordinate_of.) array_coor determines the +/// coordinate (location) of a specific element. +struct XArrayCoorOpConversion + : public FIROpAndTypeConversion { + using FIROpAndTypeConversion::FIROpAndTypeConversion; + + mlir::LogicalResult + doRewrite(fir::cg::XArrayCoorOp coor, mlir::Type ty, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + auto loc = coor.getLoc(); + unsigned rank = coor.getRank(); + assert(coor.indices().size() == rank); + assert(coor.shape().empty() || coor.shape().size() == rank); + assert(coor.shift().empty() || coor.shift().size() == rank); + assert(coor.slice().empty() || coor.slice().size() == 3 * rank); + auto indexOps = coor.indices().begin(); + auto shapeOps = coor.shape().begin(); + auto shiftOps = coor.shift().begin(); + auto sliceOps = coor.slice().begin(); + mlir::Type idxTy = lowerTy().indexType(); + mlir::Value one = genConstantIndex(loc, idxTy, rewriter, 1); + mlir::Value prevExt = one; + mlir::Value zero = genConstantIndex(loc, idxTy, rewriter, 0); + mlir::Value offset = zero; + const bool isShifted = !coor.shift().empty(); + const bool isSliced = !coor.slice().empty(); + const bool baseIsBoxed = coor.memref().getType().isa(); + + // For each dimension of the array, generate the offset calculation. + for (unsigned i = 0; i < rank; + ++i, ++indexOps, ++shapeOps, ++shiftOps, sliceOps += 3) { + mlir::Value index = integerCast(loc, rewriter, idxTy, *indexOps); + mlir::Value lb = + isShifted ? integerCast(loc, rewriter, idxTy, *shiftOps) : one; + mlir::Value step = one; + bool normalSlice = isSliced; + // Compute zero based index in dimension i of the element, applying + // potential triplets and lower bounds. + if (isSliced) { + mlir::Value ub = *(sliceOps + 1); + normalSlice = !mlir::isa_and_nonnull(ub.getDefiningOp()); + if (normalSlice) + step = integerCast(loc, rewriter, idxTy, *(sliceOps + 2)); + } + auto idx = rewriter.create(loc, idxTy, index, lb); + mlir::Value diff = + rewriter.create(loc, idxTy, idx, step); + if (normalSlice) { + mlir::Value sliceLb = integerCast(loc, rewriter, idxTy, *sliceOps); + auto adj = rewriter.create(loc, idxTy, sliceLb, lb); + diff = rewriter.create(loc, idxTy, diff, adj); + } + // Update the offset given the stride and the zero based index `diff` + // that was just computed. + if (baseIsBoxed) { + // Use stride in bytes from the descriptor. + mlir::Value stride = + loadStrideFromBox(loc, adaptor.getOperands()[0], i, rewriter); + auto sc = rewriter.create(loc, idxTy, diff, stride); + offset = rewriter.create(loc, idxTy, sc, offset); + } else { + // Use stride computed at last iteration. + auto sc = rewriter.create(loc, idxTy, diff, prevExt); + offset = rewriter.create(loc, idxTy, sc, offset); + // Compute next stride assuming contiguity of the base array + // (in element number). + auto nextExt = integerCast(loc, rewriter, idxTy, *shapeOps); + prevExt = + rewriter.create(loc, idxTy, prevExt, nextExt); + } + } + + // Add computed offset to the base address. + if (baseIsBoxed) { + // Working with byte offsets. The base address is read from the fir.box. + // and need to be casted to i8* to do the pointer arithmetic. + mlir::Type baseTy = + getBaseAddrTypeFromBox(adaptor.getOperands()[0].getType()); + mlir::Value base = + loadBaseAddrFromBox(loc, baseTy, adaptor.getOperands()[0], rewriter); + mlir::Type voidPtrTy = getVoidPtrType(); + base = rewriter.create(loc, voidPtrTy, base); + llvm::SmallVector args{base, offset}; + auto addr = rewriter.create(loc, voidPtrTy, args); + if (coor.subcomponent().empty()) { + rewriter.replaceOpWithNewOp(coor, baseTy, addr); + return success(); + } + auto casted = rewriter.create(loc, baseTy, addr); + args.clear(); + args.push_back(casted); + args.push_back(zero); + if (!coor.lenParams().empty()) { + // If type parameters are present, then we don't want to use a GEPOp + // as below, as the LLVM struct type cannot be statically defined. + return rewriter.notifyMatchFailure( + coor, "derived type with type parameters not implemented yet"); + } + // TODO: array offset subcomponents must be converted to LLVM's + // row-major layout here. + for (auto i = coor.subcomponentOffset(); i != coor.indicesOffset(); ++i) + args.push_back(adaptor.getOperands()[i]); + rewriter.replaceOpWithNewOp(coor, baseTy, args); + return success(); + } + + // The array was not boxed, so it must be contiguous. off is therefore an + // element offset and the base type is kept in the GEP unless the element + // type size is itself dynamic. + mlir::Value base; + if (coor.subcomponent().empty()) { + // No subcomponent. + if (!coor.lenParams().empty()) { + // Type parameters. Adjust element size explicitly. + auto eleTy = fir::dyn_cast_ptrEleTy(coor.getType()); + assert(eleTy && "result must be a refence-like type"); + if (fir::characterWithDynamicLen(eleTy)) { + assert(coor.lenParams().size() == 1); + auto bitsInChar = lowerTy().getKindMap().getCharacterBitsize( + eleTy.cast().getFKind()); + auto scaling = genConstantIndex(loc, idxTy, rewriter, bitsInChar / 8); + auto scaledBySize = + rewriter.create(loc, idxTy, offset, scaling); + auto length = + integerCast(loc, rewriter, idxTy, + adaptor.getOperands()[coor.lenParamsOffset()]); + offset = rewriter.create(loc, idxTy, scaledBySize, + length); + } else { + return rewriter.notifyMatchFailure( + coor, "compute size of derived type with type parameters not " + "implemented yet"); + } + } + // Cast the base address to a pointer to T. + base = rewriter.create(loc, ty, + adaptor.getOperands()[0]); + } else { + // Operand #0 must have a pointer type. For subcomponent slicing, we + // want to cast away the array type and have a plain struct type. + mlir::Type ty0 = adaptor.getOperands()[0].getType(); + auto ptrTy = ty0.dyn_cast(); + assert(ptrTy && "expected pointer type"); + mlir::Type eleTy = ptrTy.getElementType(); + while (auto arrTy = eleTy.dyn_cast()) + eleTy = arrTy.getElementType(); + auto newTy = mlir::LLVM::LLVMPointerType::get(eleTy); + base = rewriter.create(loc, newTy, + adaptor.getOperands()[0]); + } + SmallVector args = {base, offset}; + args.append(coor.subcomponent().begin(), coor.subcomponent().end()); + rewriter.replaceOpWithNewOp(coor, ty, args); + return success(); + } +}; + // // Primitive operations on Complex types // @@ -1695,7 +1898,8 @@ ShapeOpConversion, ShapeShiftOpConversion, ShiftOpConversion, SliceOpConversion, StoreOpConversion, StringLitOpConversion, SubcOpConversion, UnboxCharOpConversion, UndefOpConversion, - UnreachableOpConversion, ZeroOpConversion>(typeConverter); + UnreachableOpConversion, XArrayCoorOpConversion, ZeroOpConversion>( + typeConverter); mlir::populateStdToLLVMConversionPatterns(typeConverter, pattern); mlir::arith::populateArithmeticToLLVMConversionPatterns(typeConverter, pattern); diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -1336,3 +1336,51 @@ // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ARG0]][%[[C0]], %[[TYPE_POS]]] : (!llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})>>, i32, i32) -> !llvm.ptr // CHECK: %[[LOAD:.*]] = llvm.load %[[GEP]] : !llvm.ptr // CHECK: %{{.*}} = llvm.inttoptr %[[LOAD]] : i{{.*}} to !llvm.ptr + +// ----- + +// Test `fircg.ext_array_coor` conversion. + +// Conversion with only shape and indice. + +func @ext_array_coor0(%arg0: !fir.ref>) { + %c0 = arith.constant 0 : i64 + %1 = fircg.ext_array_coor %arg0(%c0) <%c0> : (!fir.ref>, i64, i64) -> !fir.ref + return +} + +// CHECK-LABEL: llvm.func @ext_array_coor0( +// CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr) +// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[C0_1:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[IDX:.*]] = llvm.sub %[[C0]], %[[C1]] : i64 +// CHECK: %[[DIFF0:.*]] = llvm.mul %[[IDX]], %[[C1]] : i64 +// CHECK: %[[SC:.*]] = llvm.mul %[[DIFF0]], %[[C1]] : i64 +// CHECK: %[[OFFSET:.*]] = llvm.add %[[SC]], %[[C0_1]] : i64 +// CHECK: %[[PEXT:.*]] = llvm.mul %[[C1]], %[[C0]] : i64 +// CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[ARG0]] : !llvm.ptr to !llvm.ptr +// CHECK: %{{.*}} = llvm.getelementptr %[[BITCAST]][%[[OFFSET]]] : (!llvm.ptr, i64) -> !llvm.ptr + +// Conversion with shift and slice. + +func @ext_array_coor1(%arg0: !fir.ref>) { + %c0 = arith.constant 0 : i64 + %1 = fircg.ext_array_coor %arg0(%c0) origin %c0[%c0, %c0, %c0]<%c0> : (!fir.ref>, i64, i64, i64, i64, i64, i64) -> !fir.ref + return +} + +// CHECK-LABEL: llvm.func @ext_array_coor1( +// CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr) +// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[C0_1:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[IDX:.*]] = llvm.sub %[[C0]], %[[C0]] : i64 +// CHECK: %[[DIFF0:.*]] = llvm.mul %[[IDX]], %[[C0]] : i64 +// CHECK: %[[ADJ:.*]] = llvm.sub %[[C0]], %[[C0]] : i64 +// CHECK: %[[DIFF1:.*]] = llvm.add %[[DIFF0]], %[[ADJ]] : i64 +// CHECK: %[[STRIDE:.*]] = llvm.mul %[[DIFF1]], %[[C1]] : i64 +// CHECK: %[[OFFSET:.*]] = llvm.add %[[STRIDE]], %[[C0_1]] : i64 +// CHECK: %[[NEXT_STRIDE:.*]] = llvm.mul %[[C1]], %[[C0]] : i64 +// CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[ARG0]] : !llvm.ptr to !llvm.ptr +// CHECK: %{{.*}} = llvm.getelementptr %[[BITCAST]][%[[OFFSET]]] : (!llvm.ptr, i64) -> !llvm.ptr