Diff 526713

flang/lib/Optimizer/Transforms/LoopVersioning.cpp

Show First 20 Lines • Show All 67 Lines • ▼ Show 20 Lines
} // namespace fir		} // namespace fir

#define DEBUG_TYPE "flang-loop-versioning"		#define DEBUG_TYPE "flang-loop-versioning"

namespace {		namespace {

class LoopVersioningPass		class LoopVersioningPass
: public fir::impl::LoopVersioningBase<LoopVersioningPass> {		: public fir::impl::LoopVersioningBase<LoopVersioningPass> {

public:		public:
void runOnOperation() override;		void runOnOperation() override;
};		};

} // namespace		} // namespace

/// @c replaceOuterUses - replace uses outside of @c op with result of @c		/// @c replaceOuterUses - replace uses outside of @c op with result of @c
/// outerOp		/// outerOp
Show All 15 Lines	void LoopVersioningPass::runOnOperation() {
mlir::func::FuncOp func = getOperation();		mlir::func::FuncOp func = getOperation();

/// @c ArgInfo		/// @c ArgInfo
/// A structure to hold an argument, the size of the argument and dimension		/// A structure to hold an argument, the size of the argument and dimension
/// information.		/// information.
struct ArgInfo {		struct ArgInfo {
mlir::Value *arg;		mlir::Value *arg;
size_t size;		size_t size;
		unsigned rank;
fir::BoxDimsOp dims[CFI_MAX_RANK];		fir::BoxDimsOp dims[CFI_MAX_RANK];
};		};

// First look for arguments with assumed shape = unknown extent in the lowest		// First look for arguments with assumed shape = unknown extent in the lowest
// dimension.		// dimension.
LLVM_DEBUG(llvm::dbgs() << "Func-name:" << func.getSymName() << "\n");		LLVM_DEBUG(llvm::dbgs() << "Func-name:" << func.getSymName() << "\n");
mlir::Block::BlockArgListType args = func.getArguments();		mlir::Block::BlockArgListType args = func.getArguments();
mlir::ModuleOp module = func->getParentOfType<mlir::ModuleOp>();		mlir::ModuleOp module = func->getParentOfType<mlir::ModuleOp>();
fir::KindMapping kindMap = fir::getKindMapping(module);		fir::KindMapping kindMap = fir::getKindMapping(module);
mlir::SmallVector<ArgInfo> argsOfInterest;		mlir::SmallVector<ArgInfo, 4> argsOfInterest;
for (auto &arg : args) {		for (auto &arg : args) {
if (auto seqTy = getAsSequenceType(&arg)) {		if (auto seqTy = getAsSequenceType(&arg)) {
unsigned rank = seqTy.getDimension();		unsigned rank = seqTy.getDimension();
// Currently limited to 1D or 2D arrays as that seems to give good		if (rank > 0 &&
// improvement without excessive increase in code-size, etc.
if (rank > 0 && rank < 3 &&
seqTy.getShape()[0] == fir::SequenceType::getUnknownExtent()) {		seqTy.getShape()[0] == fir::SequenceType::getUnknownExtent()) {
size_t typeSize = 0;		size_t typeSize = 0;
mlir::Type elementType = fir::unwrapSeqOrBoxedSeqType(arg.getType());		mlir::Type elementType = fir::unwrapSeqOrBoxedSeqType(arg.getType());
if (elementType.isa<mlir::FloatType>() \|\|		if (elementType.isa<mlir::FloatType>() \|\|
elementType.isa<mlir::IntegerType>())		elementType.isa<mlir::IntegerType>())
typeSize = elementType.getIntOrFloatBitWidth() / 8;		typeSize = elementType.getIntOrFloatBitWidth() / 8;
else if (auto cty = elementType.dyn_cast<fir::ComplexType>())		else if (auto cty = elementType.dyn_cast<fir::ComplexType>())
typeSize = 2 * cty.getEleType(kindMap).getIntOrFloatBitWidth() / 8;		typeSize = 2 * cty.getEleType(kindMap).getIntOrFloatBitWidth() / 8;
if (typeSize)		if (typeSize)
argsOfInterest.push_back({&arg, typeSize, {}});		argsOfInterest.push_back({&arg, typeSize, rank, {}});
else		else
LLVM_DEBUG(llvm::dbgs() << "Type not supported\n");		LLVM_DEBUG(llvm::dbgs() << "Type not supported\n");

} else {
LLVM_DEBUG(llvm::dbgs() << "Too many dimensions\n");
}		}
}		}
}		}

if (argsOfInterest.empty())		if (argsOfInterest.empty())
return;		return;

struct OpsWithArgs {		struct OpsWithArgs {
mlir::Operation *op;		mlir::Operation *op;
mlir::SmallVector<ArgInfo> argsAndDims;		mlir::SmallVector<ArgInfo, 4> argsAndDims;
};		};
// Now see if those arguments are used inside any loop.		// Now see if those arguments are used inside any loop.
mlir::SmallVector<OpsWithArgs, 4> loopsOfInterest;		mlir::SmallVector<OpsWithArgs, 4> loopsOfInterest;

func.walk([&](fir::DoLoopOp loop) {		func.walk([&](fir::DoLoopOp loop) {
mlir::Block &body = *loop.getBody();		mlir::Block &body = *loop.getBody();
mlir::SmallVector<ArgInfo> argsInLoop;		mlir::SmallVector<ArgInfo, 4> argsInLoop;
body.walk([&](fir::CoordinateOp op) {		body.walk([&](fir::CoordinateOp op) {
// The current operation could be inside another loop than		// The current operation could be inside another loop than
// the one we're currently processing. Skip it, we'll get		// the one we're currently processing. Skip it, we'll get
// to it later.		// to it later.
if (op->getParentOfType<fir::DoLoopOp>() != loop)		if (op->getParentOfType<fir::DoLoopOp>() != loop)
return;		return;
const mlir::Value &operand = op->getOperand(0);		const mlir::Value &operand = op->getOperand(0);
for (auto a : argsOfInterest) {		for (auto a : argsOfInterest) {
Show All 30 Lines	LLVM_DEBUG(llvm::dbgs() << "loopsOfInterest: " << loopsOfInterest.size()
<< "\n");		<< "\n");
for (auto op : loopsOfInterest) {		for (auto op : loopsOfInterest) {
LLVM_DEBUG(op.op->dump());		LLVM_DEBUG(op.op->dump());
builder.setInsertionPoint(op.op);		builder.setInsertionPoint(op.op);

mlir::Value allCompares = nullptr;		mlir::Value allCompares = nullptr;
// Ensure all of the arrays are unit-stride.		// Ensure all of the arrays are unit-stride.
for (auto &arg : op.argsAndDims) {		for (auto &arg : op.argsAndDims) {
		// Fetch all the dimensions of the array, except the last dimension.
fir::SequenceType seqTy = getAsSequenceType(arg.arg);		// Always fetch the first dimension, however, so set ndims = 1 if
unsigned rank = seqTy.getDimension();		// we have one dim
		unsigned ndims = arg.rank;
// We only care about lowest order dimension.		for (unsigned i = 0; i < ndims; i++) {
for (unsigned i = 0; i < rank; i++) {
mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i);		mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i);
arg.dims[i] = builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy,		arg.dims[i] = builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy,
*arg.arg, dimIdx);		*arg.arg, dimIdx);
}		}
		// We only care about lowest order dimension, here.
mlir::Value elemSize =		mlir::Value elemSize =
builder.createIntegerConstant(loc, idxTy, arg.size);		builder.createIntegerConstant(loc, idxTy, arg.size);
mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(		mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
loc, mlir::arith::CmpIPredicate::eq, arg.dims[0].getResult(2),		loc, mlir::arith::CmpIPredicate::eq, arg.dims[0].getResult(2),
elemSize);		elemSize);
if (!allCompares) {		if (!allCompares) {
allCompares = cmp;		allCompares = cmp;
} else {		} else {
Show All 20 Lines	for (auto &arg : op.argsAndDims) {
auto carg = builder.create<fir::ConvertOp>(loc, boxArrTy, *arg.arg);		auto carg = builder.create<fir::ConvertOp>(loc, boxArrTy, *arg.arg);
auto caddr = builder.create<fir::BoxAddrOp>(loc, refArrTy, carg);		auto caddr = builder.create<fir::BoxAddrOp>(loc, refArrTy, carg);
auto insPt = builder.saveInsertionPoint();		auto insPt = builder.saveInsertionPoint();
// Use caddr instead of arg.		// Use caddr instead of arg.
clonedLoop->walk([&](fir::CoordinateOp coop) {		clonedLoop->walk([&](fir::CoordinateOp coop) {
// Reduce the multi-dimensioned index to a single index.		// Reduce the multi-dimensioned index to a single index.
// This is required becase fir arrays do not support multiple dimensions		// This is required becase fir arrays do not support multiple dimensions
// with unknown dimensions at compile time.		// with unknown dimensions at compile time.
		// We then calculate the multidimensional array like this:
		// arr(x, y, z) bedcomes arr(z * stride(2) + y * stride(1) + x)
		// where stride is the distance between elements in the dimensions
		// 0, 1 and 2 or x, y and z.
if (coop->getOperand(0) == *arg.arg &&		if (coop->getOperand(0) == *arg.arg &&
coop->getOperands().size() >= 2) {		coop->getOperands().size() >= 2) {
builder.setInsertionPoint(coop);		builder.setInsertionPoint(coop);
mlir::Value totalIndex = builder.createIntegerConstant(loc, idxTy, 0);		mlir::Value totalIndex;
		for (unsigned i = arg.rank - 1; i > 0; i--) {
// Operand(1) = array; Operand(2) = index1; Operand(3) = index2		// Operand(1) = array; Operand(2) = index1; Operand(3) = index2
for (unsigned i = coop->getOperands().size() - 1; i > 1; i--) {
mlir::Value curIndex =		mlir::Value curIndex =
builder.createConvert(loc, idxTy, coop->getOperand(i));		builder.createConvert(loc, idxTy, coop->getOperand(i + 1));
// First arg is Operand2, so dims[i-2] is 0-based i-1!		// Multiply by the stride of this array. Later we'll divide by the
		// element size.
mlir::Value scale =		mlir::Value scale =
builder.createConvert(loc, idxTy, arg.dims[i - 2].getResult(1));		builder.createConvert(loc, idxTy, arg.dims[i].getResult(2));
totalIndex = builder.create<mlir::arith::AddIOp>(		curIndex =
loc, totalIndex,		builder.create<mlir::arith::MulIOp>(loc, scale, curIndex);
builder.create<mlir::arith::MulIOp>(loc, scale, curIndex));		totalIndex = (totalIndex) ? builder.create<mlir::arith::AddIOp>(
		loc, curIndex, totalIndex)
		: curIndex;
}		}
		mlir::Value elemSize =
		builder.createIntegerConstant(loc, idxTy, arg.size);
		// This is the lowest dimension - which doesn't need scaling
		mlir::Value finalIndex =
		builder.createConvert(loc, idxTy, coop->getOperand(1));
		if (totalIndex) {
totalIndex = builder.create<mlir::arith::AddIOp>(		totalIndex = builder.create<mlir::arith::AddIOp>(
loc, totalIndex,		loc,
builder.createConvert(loc, idxTy, coop->getOperand(1)));		builder.create<mlir::arith::DivSIOp>(loc, totalIndex, elemSize),
		finalIndex);
		} else {
		totalIndex = finalIndex;
		}
		kiranchandramohanUnsubmitted Not Done Reply Inline Actions A comment (with a fortran-level example for 3D) on how the address is computed will be helpful. It can be something simple like a(i,j) = a(iNJ + j) kiranchandramohan:* A comment (with a fortran-level example for 3D) on how the address is computed will be helpful.
auto newOp = builder.create<fir::CoordinateOp>(		auto newOp = builder.create<fir::CoordinateOp>(
loc, builder.getRefType(elementType), caddr,		loc, builder.getRefType(elementType), caddr,
mlir::ValueRange{totalIndex});		mlir::ValueRange{totalIndex});
LLVM_DEBUG(newOp->dump());		LLVM_DEBUG(newOp->dump());
coop->getResult(0).replaceAllUsesWith(newOp->getResult(0));		coop->getResult(0).replaceAllUsesWith(newOp->getResult(0));
coop->erase();		coop->erase();
changed = true;		changed = true;
}		}
Show All 39 Lines

flang/test/Transforms/loop-versioning.fir

Show First 20 Lines • Show All 150 Lines • ▼ Show 20 Lines
// CHECK: %[[ZERO:.*]] = arith.constant 0 : index		// CHECK: %[[ZERO:.*]] = arith.constant 0 : index
// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[Y]], %[[ZERO]]		// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[Y]], %[[ZERO]]
// CHECK: %[[FOUR:.*]] = arith.constant 4 : index		// CHECK: %[[FOUR:.*]] = arith.constant 4 : index
// CHECK: %[[COMP:.*]] = arith.cmpi eq, %[[DIMS]]#2, %[[FOUR]] : index		// CHECK: %[[COMP:.*]] = arith.cmpi eq, %[[DIMS]]#2, %[[FOUR]] : index
// CHECK: fir.if %[[COMP]] {		// CHECK: fir.if %[[COMP]] {
// CHECK: %[[CONV:.]] = fir.convert %[[Y]] : {{.}}		// CHECK: %[[CONV:.]] = fir.convert %[[Y]] : {{.}}
// CHECK: %[[BOX_ADDR:.]] = fir.box_addr %[[CONV]] : {{.}}		// CHECK: %[[BOX_ADDR:.]] = fir.box_addr %[[CONV]] : {{.}}
// CHECK: fir.do_loop %[[INDEX:.]] = {{.}}		// CHECK: fir.do_loop %[[INDEX:.]] = {{.}}
// CHECK: %[[IND_PLUS_1:.]] = arith.addi %{{.}}, %[[INDEX]]		// CHECK: %[[YADDR:.*]] = fir.coordinate_of %[[BOX_ADDR]], %[[INDEX]]
// CHECK: %[[YADDR:.*]] = fir.coordinate_of %[[BOX_ADDR]], %[[IND_PLUS_1]]
// CHECK: %[[YINT:.]] = fir.load %[[YADDR]] : {{.}}		// CHECK: %[[YINT:.]] = fir.load %[[YADDR]] : {{.}}
// CHECK: %[[YINDEX:.*]] = fir.convert %[[YINT]]		// CHECK: %[[YINDEX:.*]] = fir.convert %[[YINT]]
// CHECK: %[[XADDR:.]] = fir.array_coor %[[X]] [%{{.}}] %[[YINDEX]]		// CHECK: %[[XADDR:.]] = fir.array_coor %[[X]] [%{{.}}] %[[YINDEX]]
// CHECK: fir.call @Func(%[[XADDR]])		// CHECK: fir.call @Func(%[[XADDR]])
// CHECK-NEXT: }		// CHECK-NEXT: }
// CHECK-NEXT: } else {		// CHECK-NEXT: } else {
// CHECK: fir.do_loop %[[INDEX2:.]] = {{.}}		// CHECK: fir.do_loop %[[INDEX2:.]] = {{.}}
// CHECK: %[[YADDR2:.*]] = fir.coordinate_of %[[Y]], %[[INDEX2]]		// CHECK: %[[YADDR2:.*]] = fir.coordinate_of %[[Y]], %[[INDEX2]]
▲ Show 20 Lines • Show All 95 Lines • ▼ Show 20 Lines
// CHECK: fir.do_loop		// CHECK: fir.do_loop
// CHECL: %[[FOUR:.*]] = arith.constant 4 : index		// CHECL: %[[FOUR:.*]] = arith.constant 4 : index
// CHECK: %[[COMP:.]] = arith.cmpi {{.}}, %[[FOUR]]		// CHECK: %[[COMP:.]] = arith.cmpi {{.}}, %[[FOUR]]
// CHECK: fir.if %[[COMP]] -> {{.*}} {		// CHECK: fir.if %[[COMP]] -> {{.*}} {
// CHECK: %[[CONV:.*]] = fir.convert %[[B]] :		// CHECK: %[[CONV:.*]] = fir.convert %[[B]] :
// CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[CONV]]		// CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[CONV]]
// CHECK: %[[RES:.]] = fir.do_loop {{.}} {		// CHECK: %[[RES:.]] = fir.do_loop {{.}} {
// CHECK: %[[ADDR:.]] = fir.coordinate_of %[[BOX_ADDR]], %{{.}}		// CHECK: %[[ADDR:.]] = fir.coordinate_of %[[BOX_ADDR]], %{{.}}
// CHECK: %45 = fir.load %[[ADDR]] : !fir.ref<f32>		// CHECK: %{{.*}} = fir.load %[[ADDR]] : !fir.ref<f32>
// CHECK: }		// CHECK: }
// CHECK: fir.result %[[RES]] : {{.*}}		// CHECK: fir.result %[[RES]] : {{.*}}
// CHECK: } else {		// CHECK: } else {
// CHECK: %[[RES2:.*]] = fir.do_loop		// CHECK: %[[RES2:.*]] = fir.do_loop
// CHECK: %{{.}} = fir.coordinate_of %[[B]], %{{.}}		// CHECK: %{{.}} = fir.coordinate_of %[[B]], %{{.}}
// CHECK: }		// CHECK: }
// CHECK: fir.result %[[RES2]]		// CHECK: fir.result %[[RES2]]
// CHECK: }		// CHECK: }
▲ Show 20 Lines • Show All 69 Lines • ▼ Show 20 Lines	// end subroutine sum2d
}		}

// Note this only checks the expected transformation, not the entire generated code:		// Note this only checks the expected transformation, not the entire generated code:
// CHECK-LABEL: func.func @sum2d(		// CHECK-LABEL: func.func @sum2d(
// CHECK-SAME: %[[ARG0:.]]: !fir.box<!fir.array<?x?xf64>> {{.}})		// CHECK-SAME: %[[ARG0:.]]: !fir.box<!fir.array<?x?xf64>> {{.}})
// Only inner loop should be verisoned.		// Only inner loop should be verisoned.
// CHECK: fir.do_loop		// CHECK: fir.do_loop
// CHECK: %[[ZERO:.*]] = arith.constant 0 : index		// CHECK: %[[ZERO:.*]] = arith.constant 0 : index
// CHECK: %[[DIMS:.]]:3 = fir.box_dims %[[ARG0]], %[[ZERO]] : {{.}}		// CHECK: %[[DIMS0:.]]:3 = fir.box_dims %[[ARG0]], %[[ZERO]] : {{.}}
		// CHECK: %[[ONE:.*]] = arith.constant 1 : index
		// CHECK: %[[DIMS1:.]]:3 = fir.box_dims %[[ARG0]], %[[ONE]] : {{.}}
// CHECK: %[[SIZE:.*]] = arith.constant 8 : index		// CHECK: %[[SIZE:.*]] = arith.constant 8 : index
// CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[DIMS]]#2, %[[SIZE]]		// CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[DIMS0]]#2, %[[SIZE]]
// CHECK: %[[IF_RES:.]]:2 = fir.if %[[CMP]] -> {{.}}		// CHECK: %[[IF_RES:.]]:2 = fir.if %[[CMP]] -> {{.}}
// CHECK: %[[NEWARR:.*]] = fir.convert %[[ARG0]]		// CHECK: %[[NEWARR:.*]] = fir.convert %[[ARG0]]
// CHECK: %[[BOXADDR:.]] = fir.box_addr %[[NEWARR]] : {{.}} -> !fir.ref<!fir.array<?xf64>>		// CHECK: %[[BOXADDR:.]] = fir.box_addr %[[NEWARR]] : {{.}} -> !fir.ref<!fir.array<?xf64>>
// CHECK: %[[LOOP_RES:.]]:2 = fir.do_loop {{.}}		// CHECK: %[[LOOP_RES:.]]:2 = fir.do_loop {{.}}
// Check the 2D -> 1D coordinate conversion, should have a multiply and a final add.		// Check the 2D -> 1D coordinate conversion, should have a multiply and a final add.
// Some other operations are checked to synch the different parts.		// Some other operations are checked to synch the different parts.
// CHECK: arith.muli %[[DIMS]]#1, {{.*}}		// CHECK: %[[OUTER_IDX:.]] = arith.muli %[[DIMS1]]#2, {{.}}
// CHECK: %[[OUTER_IDX:.]] = arith.addi {{.}}		// CHECK: %[[ITEMSIZE:.*]] = arith.constant 8 : index
// CHECK: %[[INNER_IDX:.]] = fir.convert {{.}}		// CHECK: %[[INNER_IDX:.]] = fir.convert {{.}}
// CHECK: %[[C2D:.*]] = arith.addi %[[OUTER_IDX]], %[[INNER_IDX]]		// CHECK: %[[OUTER_DIV:.*]] = arith.divsi %[[OUTER_IDX]], %[[ITEMSIZE]]
		// CHECK: %[[C2D:.*]] = arith.addi %[[OUTER_DIV]], %[[INNER_IDX]]
// CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %[[C2D]] : (!fir.ref<!fir.array<?xf64>>, index) -> !fir.ref<f64>		// CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %[[C2D]] : (!fir.ref<!fir.array<?xf64>>, index) -> !fir.ref<f64>
// CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref<f64>		// CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref<f64>
// CHECK: fir.result %{{.}}, %{{.}}		// CHECK: fir.result %{{.}}, %{{.}}
// CHECK: }		// CHECK: }
// CHECK fir.result %[[LOOP_RES]]#0, %[[LOOP_RES]]#1		// CHECK fir.result %[[LOOP_RES]]#0, %[[LOOP_RES]]#1
// CHECK: } else {		// CHECK: } else {
// CHECK: %[[LOOP_RES2:.]]:2 = fir.do_loop {{.}}		// CHECK: %[[LOOP_RES2:.]]:2 = fir.do_loop {{.}}
// CHECK: %[[COORD2:.]] = fir.coordinate_of %[[ARG0]], %{{.}} : (!fir.box<!fir.array<?x?xf64>>, i64, i64) -> !fir.ref<f64>		// CHECK: %[[COORD2:.]] = fir.coordinate_of %[[ARG0]], %{{.}} : (!fir.box<!fir.array<?x?xf64>>, i64, i64) -> !fir.ref<f64>
// CHECK: %{{.*}}= fir.load %[[COORD2]] : !fir.ref<f64>		// CHECK: %{{.*}}= fir.load %[[COORD2]] : !fir.ref<f64>
// CHECK: fir.result %{{.}}, %{{.}}		// CHECK: fir.result %{{.}}, %{{.}}
// CHECK: }		// CHECK: }
// CHECK fir.result %[[LOOP_RES2]]#0, %[[LOOP_RES2]]#1		// CHECK fir.result %[[LOOP_RES2]]#0, %[[LOOP_RES2]]#1
// CHECK: }		// CHECK: }
// CHECK: fir.store %[[IF_RES]]#1 to %{{.*}}		// CHECK: fir.store %[[IF_RES]]#1 to %{{.*}}
// CHECK: return		// CHECK: return

		// -----

		// subroutine sum3d(a, nx, ny, nz)
		// real*8 :: a(:, :, :)
		// integer :: nx, ny, nz
		// real*8 :: sum
		// integer :: i, j, k
		// sum = 0
		// do k=1,nz
		// do j=1,ny
		// do i=0,nx
		// sum = sum + a(i, j, k)
		// end do
		// end do
		// end do
		// end subroutine sum3d


		func.func @sum3d(%arg0: !fir.box<!fir.array<?x?x?xf64>> {fir.bindc_name = "a"}, %arg1: !fir.ref<i32> {fir.bindc_name = "nx"}, %arg2: !fir.ref<i32> {fir.bindc_name = "ny"}, %arg3: !fir.ref<i32> {fir.bindc_name = "nz"}) {
		%0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMmoduleFsum3dEi"}
		%1 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QMmoduleFsum3dEj"}
		%2 = fir.alloca i32 {bindc_name = "k", uniq_name = "_QMmoduleFsum3dEk"}
		%3 = fir.alloca f64 {bindc_name = "sum", uniq_name = "_QMmoduleFsum3dEsum"}
		%cst = arith.constant 0.000000e+00 : f64
		fir.store %cst to %3 : !fir.ref<f64>
		%c1_i32 = arith.constant 1 : i32
		%4 = fir.convert %c1_i32 : (i32) -> index
		%5 = fir.load %arg3 : !fir.ref<i32>
		%6 = fir.convert %5 : (i32) -> index
		%c1 = arith.constant 1 : index
		%7 = fir.convert %4 : (index) -> i32
		%8:2 = fir.do_loop %arg4 = %4 to %6 step %c1 iter_args(%arg5 = %7) -> (index, i32) {
		fir.store %arg5 to %2 : !fir.ref<i32>
		%c1_i32_0 = arith.constant 1 : i32
		%9 = fir.convert %c1_i32_0 : (i32) -> index
		%10 = fir.load %arg2 : !fir.ref<i32>
		%11 = fir.convert %10 : (i32) -> index
		%c1_1 = arith.constant 1 : index
		%12 = fir.convert %9 : (index) -> i32
		%13:2 = fir.do_loop %arg6 = %9 to %11 step %c1_1 iter_args(%arg7 = %12) -> (index, i32) {
		fir.store %arg7 to %1 : !fir.ref<i32>
		%c0_i32 = arith.constant 0 : i32
		%18 = fir.convert %c0_i32 : (i32) -> index
		%19 = fir.load %arg1 : !fir.ref<i32>
		%20 = fir.convert %19 : (i32) -> index
		%c1_2 = arith.constant 1 : index
		%21 = fir.convert %18 : (index) -> i32
		%22:2 = fir.do_loop %arg8 = %18 to %20 step %c1_2 iter_args(%arg9 = %21) -> (index, i32) {
		fir.store %arg9 to %0 : !fir.ref<i32>
		%27 = fir.load %3 : !fir.ref<f64>
		%28 = fir.load %0 : !fir.ref<i32>
		%29 = fir.convert %28 : (i32) -> i64
		%c1_i64 = arith.constant 1 : i64
		%30 = arith.subi %29, %c1_i64 : i64
		%31 = fir.load %1 : !fir.ref<i32>
		%32 = fir.convert %31 : (i32) -> i64
		%c1_i64_3 = arith.constant 1 : i64
		%33 = arith.subi %32, %c1_i64_3 : i64
		%34 = fir.load %2 : !fir.ref<i32>
		%35 = fir.convert %34 : (i32) -> i64
		%c1_i64_4 = arith.constant 1 : i64
		%36 = arith.subi %35, %c1_i64_4 : i64
		%37 = fir.coordinate_of %arg0, %30, %33, %36 : (!fir.box<!fir.array<?x?x?xf64>>, i64, i64, i64) -> !fir.ref<f64>
		%38 = fir.load %37 : !fir.ref<f64>
		%39 = arith.addf %27, %38 fastmath<contract> : f64
		fir.store %39 to %3 : !fir.ref<f64>
		%40 = arith.addi %arg8, %c1_2 : index
		%41 = fir.convert %c1_2 : (index) -> i32
		%42 = fir.load %0 : !fir.ref<i32>
		%43 = arith.addi %42, %41 : i32
		fir.result %40, %43 : index, i32
		}
		fir.store %22#1 to %0 : !fir.ref<i32>
		%23 = arith.addi %arg6, %c1_1 : index
		%24 = fir.convert %c1_1 : (index) -> i32
		%25 = fir.load %1 : !fir.ref<i32>
		%26 = arith.addi %25, %24 : i32
		fir.result %23, %26 : index, i32
		}
		fir.store %13#1 to %1 : !fir.ref<i32>
		%14 = arith.addi %arg4, %c1 : index
		%15 = fir.convert %c1 : (index) -> i32
		%16 = fir.load %2 : !fir.ref<i32>
		%17 = arith.addi %16, %15 : i32
		fir.result %14, %17 : index, i32
		}
		fir.store %8#1 to %2 : !fir.ref<i32>
		return
		}

		// Note this only checks the expected transformation, not the entire generated code:
		// CHECK-LABEL: func.func @sum3d(
		// CHECK-SAME: %[[ARG0:.]]: !fir.box<!fir.array<?x?x?xf64>> {{.}})
		// Only inner loop should be verisoned.
		// CHECK: fir.do_loop
		// CHECK: %[[ZERO:.*]] = arith.constant 0 : index
		// CHECK: %[[DIMS0:.]]:3 = fir.box_dims %[[ARG0]], %[[ZERO]] : {{.}}
		// CHECK: %[[ONE:.*]] = arith.constant 1 : index
		// CHECK: %[[DIMS1:.]]:3 = fir.box_dims %[[ARG0]], %[[ONE]] : {{.}}
		// CHECK: %[[TWO:.*]] = arith.constant 2 : index
		// CHECK: %[[DIMS2:.]]:3 = fir.box_dims %[[ARG0]], %[[TWO]] : {{.}}
		// CHECK: %[[SIZE:.*]] = arith.constant 8 : index
		// CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[DIMS0]]#2, %[[SIZE]]
		// CHECK: %[[IF_RES:.]]:2 = fir.if %[[CMP]] -> {{.}}
		// CHECK: %[[NEWARR:.*]] = fir.convert %[[ARG0]]
		// CHECK: %[[BOXADDR:.]] = fir.box_addr %[[NEWARR]] : {{.}} -> !fir.ref<!fir.array<?xf64>>
		// CHECK: %[[LOOP_RES:.]]:2 = fir.do_loop {{.}}
		// Check the 3D -> 1D coordinate conversion, should have a multiply and a final add.
		// Some other operations are checked to synch the different parts.
		// CHECK: %[[OUTER_IDX:.]] = arith.muli %[[DIMS2]]#2, {{.}}
		// CHECK: %[[MIDDLE_IDX:.]] = arith.muli %[[DIMS1]]#2, {{.}}
		// CHECK: %[[MIDDLE_SUM:.*]] = arith.addi %[[MIDDLE_IDX]], %[[OUTER_IDX]]
		// CHECK: %[[ITEMSIZE:.*]] = arith.constant 8 : index
		// CHECK: %[[INNER_IDX:.]] = fir.convert {{.}}
		// CHECK: %[[MIDDLE_DIV:.*]] = arith.divsi %[[MIDDLE_SUM]], %[[ITEMSIZE]]
		// CHECK: %[[C3D:.*]] = arith.addi %[[MIDDLE_DIV]], %[[INNER_IDX]]
		// CHECK: %[[COORD:.*]] = fir.coordinate_of %[[BOXADDR]], %[[C3D]] : (!fir.ref<!fir.array<?xf64>>, index) -> !fir.ref<f64>
		// CHECK: %{{.*}} = fir.load %[[COORD]] : !fir.ref<f64>
		// CHECK: fir.result %{{.}}, %{{.}}
		// CHECK: }
		// CHECK fir.result %[[LOOP_RES]]#0, %[[LOOP_RES]]#1
		// CHECK: } else {
		// CHECK: %[[LOOP_RES2:.]]:2 = fir.do_loop {{.}}
		// CHECK: %[[COORD2:.]] = fir.coordinate_of %[[ARG0]], %{{.}} : (!fir.box<!fir.array<?x?x?xf64>>, i64, i64, i64) -> !fir.ref<f64>
		// CHECK: %{{.*}}= fir.load %[[COORD2]] : !fir.ref<f64>
		// CHECK: fir.result %{{.}}, %{{.}}
		// CHECK: }
		// CHECK fir.result %[[LOOP_RES2]]#0, %[[LOOP_RES2]]#1
		// CHECK: }
		// CHECK: fir.store %[[IF_RES]]#1 to %{{.*}}
		// CHECK: return

} // End module		} // End module

This is an archive of the discontinued LLVM Phabricator instance.

[FLANG] Support all arrays for LoopVersioning
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 526713

flang/lib/Optimizer/Transforms/LoopVersioning.cpp

flang/test/Transforms/loop-versioning.fir

This is an archive of the discontinued LLVM Phabricator instance.

[FLANG] Support all arrays for LoopVersioningClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 526713

flang/lib/Optimizer/Transforms/LoopVersioning.cpp

flang/test/Transforms/loop-versioning.fir

[FLANG] Support all arrays for LoopVersioning
ClosedPublic