Diff 492769

flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp

Show First 20 Lines • Show All 67 Lines • ▼ Show 20 Lines	public:
mlir::func::FuncOp getOrCreateFunction(fir::FirOpBuilder &builder,		mlir::func::FuncOp getOrCreateFunction(fir::FirOpBuilder &builder,
const mlir::StringRef &basename,		const mlir::StringRef &basename,
FunctionTypeGeneratorTy typeGenerator,		FunctionTypeGeneratorTy typeGenerator,
FunctionBodyGeneratorTy bodyGenerator);		FunctionBodyGeneratorTy bodyGenerator);
void runOnOperation() override;		void runOnOperation() override;
void getDependentDialects(mlir::DialectRegistry &registry) const override;		void getDependentDialects(mlir::DialectRegistry &registry) const override;

private:		private:
/// Helper function to replace a reduction type of call with its		/// Helper functions to replace a reduction type of call with its
/// simplified form. The actual function is generated using a callback		/// simplified form. The actual function is generated using a callback
/// function.		/// function.
/// \p call is the call to be replaced		/// \p call is the call to be replaced
/// \p kindMap is used to create FIROpBuilder		/// \p kindMap is used to create FIROpBuilder
/// \p genBodyFunc is the callback that builds the replacement function		/// \p genBodyFunc is the callback that builds the replacement function
void simplifyReduction(fir::CallOp call, const fir::KindMapping &kindMap,		void simplifyIntOrFloatReduction(fir::CallOp call,
		const fir::KindMapping &kindMap,
		GenReductionBodyTy genBodyFunc);
		void simplifyLogicalReduction(fir::CallOp call,
		const fir::KindMapping &kindMap,
GenReductionBodyTy genBodyFunc);		GenReductionBodyTy genBodyFunc);
		void simplifyReductionBody(fir::CallOp call, const fir::KindMapping &kindMap,
		GenReductionBodyTy genBodyFunc,
		fir::FirOpBuilder &builder,
		const mlir::StringRef &basename);
};		};

} // namespace		} // namespace

/// Create FirOpBuilder with the provided \p op insertion point		/// Create FirOpBuilder with the provided \p op insertion point
/// and \p kindMap additionally inheriting FastMathFlags from \p op.		/// and \p kindMap additionally inheriting FastMathFlags from \p op.
static fir::FirOpBuilder		static fir::FirOpBuilder
getSimplificationBuilder(mlir::Operation *op, const fir::KindMapping &kindMap) {		getSimplificationBuilder(mlir::Operation *op, const fir::KindMapping &kindMap) {
Show All 34 Lines
using BodyOpGeneratorTy = llvm::function_ref<mlir::Value(		using BodyOpGeneratorTy = llvm::function_ref<mlir::Value(
fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,		fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
mlir::Value)>;		mlir::Value)>;
using InitValGeneratorTy = llvm::function_ref<mlir::Value(		using InitValGeneratorTy = llvm::function_ref<mlir::Value(
fir::FirOpBuilder &, mlir::Location, const mlir::Type &)>;		fir::FirOpBuilder &, mlir::Location, const mlir::Type &)>;

/// Generate the reduction loop into \p funcOp.		/// Generate the reduction loop into \p funcOp.
///		///
		/// \p elementType is the type of the elements in the input array,
		/// which may be different to the return type.
/// \p initVal is a function, called to get the initial value for		/// \p initVal is a function, called to get the initial value for
		LeporacanthicusUnsubmitted Done Reply Inline Actions Need to document the new argument. Leporacanthicus: Need to document the new argument.
/// the reduction value		/// the reduction value
/// \p genBody is called to fill in the actual reduciton operation		/// \p genBody is called to fill in the actual reduciton operation
/// for example add for SUM, MAX for MAXVAL, etc.		/// for example add for SUM, MAX for MAXVAL, etc.
/// \p rank is the rank of the input argument.		/// \p rank is the rank of the input argument.
static void genReductionLoop(fir::FirOpBuilder &builder,		static void genReductionLoop(fir::FirOpBuilder &builder, mlir::Type elementType,
		LeporacanthicusUnsubmitted Not Done Reply Inline Actions Nit: Did you clang-format this? I'm suspicious the elementType is sticking out further than most other things... Leporacanthicus: Nit: Did you clang-format this? I'm suspicious the elementType is sticking out further than…
		SBallantyneAuthorUnsubmitted Done Reply Inline Actions This is after clang-format, and if i move elementType to another line on its own clang-format will put it back to the previous line. SBallantyne: This is after clang-format, and if i move elementType to another line on its own clang-format…
mlir::func::FuncOp &funcOp,		mlir::func::FuncOp &funcOp,
InitValGeneratorTy initVal,		InitValGeneratorTy initVal,
BodyOpGeneratorTy genBody, unsigned rank) {		BodyOpGeneratorTy genBody, unsigned rank) {
auto loc = mlir::UnknownLoc::get(builder.getContext());		auto loc = mlir::UnknownLoc::get(builder.getContext());
mlir::Type elementType = funcOp.getResultTypes()[0];
builder.setInsertionPointToEnd(funcOp.addEntryBlock());		builder.setInsertionPointToEnd(funcOp.addEntryBlock());

mlir::IndexType idxTy = builder.getIndexType();		mlir::IndexType idxTy = builder.getIndexType();

mlir::Block::BlockArgListType args = funcOp.front().getArguments();		mlir::Block::BlockArgListType args = funcOp.front().getArguments();
mlir::Value arg = args[0];		mlir::Value arg = args[0];

mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);		mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);

fir::SequenceType::Shape flatShape(rank,		fir::SequenceType::Shape flatShape(rank,
fir::SequenceType::getUnknownExtent());		fir::SequenceType::getUnknownExtent());
mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);		mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
mlir::Type boxArrTy = fir::BoxType::get(arrTy);		mlir::Type boxArrTy = fir::BoxType::get(arrTy);
mlir::Value array = builder.create<fir::ConvertOp>(loc, boxArrTy, arg);		mlir::Value array = builder.create<fir::ConvertOp>(loc, boxArrTy, arg);
mlir::Value init = initVal(builder, loc, elementType);		mlir::Type resultType = funcOp.getResultTypes()[0];
		mlir::Value init = initVal(builder, loc, resultType);

llvm::SmallVector<mlir::Value, 15> bounds;		llvm::SmallVector<mlir::Value, 15> bounds;

assert(rank > 0 && "rank cannot be zero");		assert(rank > 0 && "rank cannot be zero");
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);		mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);

// Compute all the upper bounds before the loop nest.		// Compute all the upper bounds before the loop nest.
// It is not strictly necessary for performance, since the loop nest		// It is not strictly necessary for performance, since the loop nest
Show All 34 Lines	static void genReductionLoop(fir::FirOpBuilder &builder, mlir::Type elementType,
std::reverse(indices.begin(), indices.end());		std::reverse(indices.begin(), indices.end());

// We are in the innermost loop: generate the reduction body.		// We are in the innermost loop: generate the reduction body.
mlir::Type eleRefTy = builder.getRefType(elementType);		mlir::Type eleRefTy = builder.getRefType(elementType);
mlir::Value addr =		mlir::Value addr =
builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices);		builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices);
mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);		mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);

mlir::Value reductionVal = genBody(builder, loc, elementType, elem, init);		mlir::Value reductionVal = genBody(builder, loc, elementType, elem, init);
		LeporacanthicusUnsubmitted Done Reply Inline Actions Spurious whitespace change. Leporacanthicus: Spurious whitespace change.

// Unwind the loop nest and insert ResultOp on each level		// Unwind the loop nest and insert ResultOp on each level
// to return the updated value of the reduction to the enclosing		// to return the updated value of the reduction to the enclosing
// loops.		// loops.
for (unsigned i = 0; i < rank; ++i) {		for (unsigned i = 0; i < rank; ++i) {
auto result = builder.create<fir::ResultOp>(loc, reductionVal);		auto result = builder.create<fir::ResultOp>(loc, reductionVal);
// Proceed to the outer loop.		// Proceed to the outer loop.
auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());		auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines	if (elementType.isa<mlir::FloatType>())
return builder.create<mlir::arith::AddFOp>(loc, elem1, elem2);		return builder.create<mlir::arith::AddFOp>(loc, elem1, elem2);
if (elementType.isa<mlir::IntegerType>())		if (elementType.isa<mlir::IntegerType>())
return builder.create<mlir::arith::AddIOp>(loc, elem1, elem2);		return builder.create<mlir::arith::AddIOp>(loc, elem1, elem2);

llvm_unreachable("unsupported type");		llvm_unreachable("unsupported type");
return {};		return {};
};		};

genReductionLoop(builder, funcOp, zero, genBodyOp, rank);		mlir::Type elementType = funcOp.getResultTypes()[0];

		genReductionLoop(builder, elementType, funcOp, zero, genBodyOp, rank);
		LeporacanthicusUnsubmitted Not Done Reply Inline Actions Since this is the same as resultType, you don't need to fetch it again. Leporacanthicus: Since this is the same as resultType, you don't need to fetch it again.
		SBallantyneAuthorUnsubmitted Done Reply Inline Actions This is in a different function and doesn't have the resultType variable available to it. SBallantyne: This is in a different function and doesn't have the resultType variable available to it.
}		}

static void genRuntimeMaxvalBody(fir::FirOpBuilder &builder,		static void genRuntimeMaxvalBody(fir::FirOpBuilder &builder,
mlir::func::FuncOp &funcOp, unsigned rank) {		mlir::func::FuncOp &funcOp, unsigned rank) {
auto init = [](fir::FirOpBuilder builder, mlir::Location loc,		auto init = [](fir::FirOpBuilder builder, mlir::Location loc,
mlir::Type elementType) {		mlir::Type elementType) {
if (auto ty = elementType.dyn_cast<mlir::FloatType>()) {		if (auto ty = elementType.dyn_cast<mlir::FloatType>()) {
const llvm::fltSemantics &sem = ty.getFloatSemantics();		const llvm::fltSemantics &sem = ty.getFloatSemantics();
Show All 11 Lines	auto genBodyOp = [](fir::FirOpBuilder builder, mlir::Location loc,
if (elementType.isa<mlir::FloatType>())		if (elementType.isa<mlir::FloatType>())
return builder.create<mlir::arith::MaxFOp>(loc, elem1, elem2);		return builder.create<mlir::arith::MaxFOp>(loc, elem1, elem2);
if (elementType.isa<mlir::IntegerType>())		if (elementType.isa<mlir::IntegerType>())
return builder.create<mlir::arith::MaxSIOp>(loc, elem1, elem2);		return builder.create<mlir::arith::MaxSIOp>(loc, elem1, elem2);

llvm_unreachable("unsupported type");		llvm_unreachable("unsupported type");
return {};		return {};
};		};
genReductionLoop(builder, funcOp, init, genBodyOp, rank);
		mlir::Type elementType = funcOp.getResultTypes()[0];

		genReductionLoop(builder, elementType, funcOp, init, genBodyOp, rank);
		}

		static void genRuntimeCountBody(fir::FirOpBuilder &builder,
		mlir::func::FuncOp &funcOp, unsigned rank) {
		auto zero = [](fir::FirOpBuilder builder, mlir::Location loc,
		mlir::Type elementType) {
		unsigned bits = elementType.getIntOrFloatBitWidth();
		int64_t zeroInt = llvm::APInt::getZero(bits).getSExtValue();
		return builder.createIntegerConstant(loc, elementType, zeroInt);
		};

		auto genBodyOp = [](fir::FirOpBuilder builder, mlir::Location loc,
		mlir::Type elementType, mlir::Value elem1,
		mlir::Value elem2) -> mlir::Value {
		auto zero32 = builder.createIntegerConstant(loc, builder.getI32Type(), 0);
		auto zero64 = builder.createIntegerConstant(loc, builder.getI64Type(), 0);
		vzakhariUnsubmitted Done Reply Inline Actions It looks like you assume that logical TRUE is 1 - I am not sure if this is correct. I would rather reproduce the logic from `flang/runtime/tools.h`, and assume that everything that is not 0 is TRUE: // Utility for dealing with elemental LOGICAL arguments inline bool IsLogicalElementTrue( const Descriptor &logical, const SubscriptValue at[]) { // A LOGICAL value is false if and only if all of its bytes are zero. const char p{logical.Element<char>(at)}; for (std::size_t j{logical.ElementBytes()}; j-- > 0; ++p) { if (p) { return true; } } return false; } vzakhari: It looks like you assume that logical TRUE is 1 - I am not sure if this is correct. I would…
		auto one64 = builder.createIntegerConstant(loc, builder.getI64Type(), 1);

		auto compare = builder.create<mlir::arith::CmpIOp>(
		loc, mlir::arith::CmpIPredicate::eq, elem1, zero32);
		auto select =
		builder.create<mlir::arith::SelectOp>(loc, compare, zero64, one64);
		vzakhariUnsubmitted Not Done Reply Inline Actions I think `one64` and `zero64` need to be swapped. vzakhari: I think `one64` and `zero64` need to be swapped.
		SBallantyneAuthorUnsubmitted Done Reply Inline Actions The function does work as it is currently, though it is strange that the ordering here is swapped compared to the mlir generated: %7 = arith.select %6, %c0_i64_1, %c1_i64 : i64 Changing it around does swap the values in the mlir, leading to the wrong result being calculated builder.create<mlir::arith::SelectOp>(loc, compare, zero64, one64); -> %7 = arith.select %6, %c1_i64, %c0_i64_1 : i64 SBallantyne: The function does work as it is currently, though it is strange that the ordering here is…
		SBallantyneAuthorUnsubmitted Done Reply Inline Actions Nevermind, i somehow didn't notice that one64 was declared using 0 and zero64 was delcared using 1 SBallantyne: Nevermind, i somehow didn't notice that one64 was declared using 0 and zero64 was delcared…
		return builder.create<mlir::arith::AddIOp>(loc, select, elem2);
		};

		mlir::Type elementType = builder.getI32Type();

		genReductionLoop(builder, elementType, funcOp, zero, genBodyOp, rank);
}		}

/// Generate function type for the simplified version of RTNAME(DotProduct)		/// Generate function type for the simplified version of RTNAME(DotProduct)
/// operating on the given \p elementType.		/// operating on the given \p elementType.
static mlir::FunctionType genRuntimeDotType(fir::FirOpBuilder &builder,		static mlir::FunctionType genRuntimeDotType(fir::FirOpBuilder &builder,
const mlir::Type &elementType) {		const mlir::Type &elementType) {
mlir::Type boxType = fir::BoxType::get(builder.getNoneType());		mlir::Type boxType = fir::BoxType::get(builder.getNoneType());
return mlir::FunctionType::get(builder.getContext(), {boxType, boxType},		return mlir::FunctionType::get(builder.getContext(), {boxType, boxType},
▲ Show 20 Lines • Show All 216 Lines • ▼ Show 20 Lines	do {
// box type to another box type.		// box type to another box type.
auto boxType = val.getType().cast<fir::BoxType>();		auto boxType = val.getType().cast<fir::BoxType>();
auto elementType = fir::unwrapSeqOrBoxedSeqType(boxType);		auto elementType = fir::unwrapSeqOrBoxedSeqType(boxType);
if (!elementType.isa<mlir::NoneType>())		if (!elementType.isa<mlir::NoneType>())
return elementType;		return elementType;
} while (true);		} while (true);
}		}

void SimplifyIntrinsicsPass::simplifyReduction(fir::CallOp call,		void SimplifyIntrinsicsPass::simplifyIntOrFloatReduction(
const fir::KindMapping &kindMap,		fir::CallOp call, const fir::KindMapping &kindMap,
GenReductionBodyTy genBodyFunc) {		GenReductionBodyTy genBodyFunc) {
mlir::SymbolRefAttr callee = call.getCalleeAttr();
mlir::Operation::operand_range args = call.getArgs();
// args[1] and args[2] are source filename and line number, ignored.		// args[1] and args[2] are source filename and line number, ignored.
		mlir::Operation::operand_range args = call.getArgs();

const mlir::Value &dim = args[3];		const mlir::Value &dim = args[3];
const mlir::Value &mask = args[4];		const mlir::Value &mask = args[4];
// dim is zero when it is absent, which is an implementation		// dim is zero when it is absent, which is an implementation
// detail in the runtime library.		// detail in the runtime library.

bool dimAndMaskAbsent = isZero(dim) && isOperandAbsent(mask);		bool dimAndMaskAbsent = isZero(dim) && isOperandAbsent(mask);
unsigned rank = getDimCount(args[0]);		unsigned rank = getDimCount(args[0]);
if (dimAndMaskAbsent && rank > 0) {
mlir::Location loc = call.getLoc();
fir::FirOpBuilder builder{getSimplificationBuilder(call, kindMap)};
std::string fmfString{getFastMathFlagsString(builder)};

// Support only floating point and integer results now.		if (!(dimAndMaskAbsent && rank > 0))
		return;

mlir::Type resultType = call.getResult(0).getType();		mlir::Type resultType = call.getResult(0).getType();

if (!resultType.isa<mlir::FloatType>() &&		if (!resultType.isa<mlir::FloatType>() &&
!resultType.isa<mlir::IntegerType>())		!resultType.isa<mlir::IntegerType>())
return;		return;

auto argType = getArgElementType(args[0]);		auto argType = getArgElementType(args[0]);
if (!argType)		if (!argType)
return;		return;
assert(*argType == resultType &&		assert(*argType == resultType &&
"Argument/result types mismatch in reduction");		"Argument/result types mismatch in reduction");

		mlir::SymbolRefAttr callee = call.getCalleeAttr();

		fir::FirOpBuilder builder{getSimplificationBuilder(call, kindMap)};
		std::string fmfString{getFastMathFlagsString(builder)};
		std::string funcName =
		(mlir::Twine{callee.getLeafReference().getValue(), "x"} +
		mlir::Twine{rank} +
		// We must mangle the generated function name with FastMathFlags
		// value.
		(fmfString.empty() ? mlir::Twine{} : mlir::Twine{"_", fmfString}))
		.str();

		simplifyReductionBody(call, kindMap, genBodyFunc, builder, funcName);
		}

		void SimplifyIntrinsicsPass::simplifyLogicalReduction(
		fir::CallOp call, const fir::KindMapping &kindMap,
		GenReductionBodyTy genBodyFunc) {

		mlir::Operation::operand_range args = call.getArgs();
		const mlir::Value &dim = args[3];

		if (!isZero(dim))
		return;

		unsigned rank = getDimCount(args[0]);
		mlir::SymbolRefAttr callee = call.getCalleeAttr();

		fir::FirOpBuilder builder{getSimplificationBuilder(call, kindMap)};
		std::string funcName =
		(mlir::Twine{callee.getLeafReference().getValue(), "x"} +
		mlir::Twine{rank})
		vzakhariUnsubmitted Not Done Reply Inline Actions nit: the final `mlir::Twine{}` is redundant. vzakhari: nit: the final `mlir::Twine{}` is redundant.
		.str();

		simplifyReductionBody(call, kindMap, genBodyFunc, builder, funcName);
		}

		void SimplifyIntrinsicsPass::simplifyReductionBody(
		fir::CallOp call, const fir::KindMapping &kindMap,
		vzakhariUnsubmitted Not Done Reply Inline Actions Since `_FortranACount` does not have the element type in its name, we have to add it explicitly, otherwise, all calls will map to the same simplified version regardless of the element type. Please add a test with different logical kinds. vzakhari: Since `_FortranACount` does not have the element type in its name, we have to add it explicitly…
		SBallantyneAuthorUnsubmitted Not Done Reply Inline Actions That is intended, the compiler will deal with different sized logicals by converting them to logical<4> before passing to count, for runtime or simplified. I can still add the test if needed, but this code isn't responsible for handling this, and if that area breaks regular count will also break. SBallantyne: That is intended, the compiler will deal with different sized logicals by converting them to…
		vzakhariUnsubmitted Not Done Reply Inline Actions Thank you for the clarification! I did not know that we always create a copy for the `Count` call. I think this is a performance issue, since we do not really need this copy. I would suggest fixing the mangling in this commit, and getting rid of the copy in another commit. vzakhari: Thank you for the clarification! I did not know that we always create a copy for the `Count`…
		SBallantyneAuthorUnsubmitted Done Reply Inline Actions I agree that it is wasteful to convert everything to logical<4> but i think that including the kind of logical in the call name and getting rid of the conversion would be better kept in the same commit, rather than split over two commits - it doesn't cause any issues with this current version and wouldn't provide any benefit yet either. SBallantyne: I agree that it is wasteful to convert everything to logical<4> but i think that including the…
		vzakhariUnsubmitted Not Done Reply Inline Actions Ok, works for me. In addition, it does not make much sense to mangle the logical versions with FMF, since they do not have any FP operations. I think we'd better fix this in the current commit. vzakhari: Ok, works for me. In addition, it does not make much sense to mangle the logical versions with…
		GenReductionBodyTy genBodyFunc, fir::FirOpBuilder &builder,
		const mlir::StringRef &funcName) {

		mlir::Operation::operand_range args = call.getArgs();

		mlir::Type resultType = call.getResult(0).getType();
		unsigned rank = getDimCount(args[0]);

		mlir::Location loc = call.getLoc();

auto typeGenerator = [&resultType](fir::FirOpBuilder &builder) {		auto typeGenerator = [&resultType](fir::FirOpBuilder &builder) {
return genNoneBoxType(builder, resultType);		return genNoneBoxType(builder, resultType);
};		};
auto bodyGenerator = [&rank, &genBodyFunc](fir::FirOpBuilder &builder,		auto bodyGenerator = [&rank, &genBodyFunc](fir::FirOpBuilder &builder,
mlir::func::FuncOp &funcOp) {		mlir::func::FuncOp &funcOp) {
genBodyFunc(builder, funcOp, rank);		genBodyFunc(builder, funcOp, rank);
};		};
// Mangle the function name with the rank value as "x<rank>".		// Mangle the function name with the rank value as "x<rank>".
std::string funcName =
(mlir::Twine{callee.getLeafReference().getValue(), "x"} +
mlir::Twine{rank} +
// We must mangle the generated function name with FastMathFlags
// value.
(fmfString.empty() ? mlir::Twine{} : mlir::Twine{"_", fmfString}))
.str();
mlir::func::FuncOp newFunc =		mlir::func::FuncOp newFunc =
getOrCreateFunction(builder, funcName, typeGenerator, bodyGenerator);		getOrCreateFunction(builder, funcName, typeGenerator, bodyGenerator);
auto newCall =		auto newCall =
builder.create<fir::CallOp>(loc, newFunc, mlir::ValueRange{args[0]});		builder.create<fir::CallOp>(loc, newFunc, mlir::ValueRange{args[0]});
call->replaceAllUsesWith(newCall.getResults());		call->replaceAllUsesWith(newCall.getResults());
call->dropAllReferences();		call->dropAllReferences();
call->erase();		call->erase();
}		}
}

void SimplifyIntrinsicsPass::runOnOperation() {		void SimplifyIntrinsicsPass::runOnOperation() {
LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n");		LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n");
mlir::ModuleOp module = getOperation();		mlir::ModuleOp module = getOperation();
fir::KindMapping kindMap = fir::getKindMapping(module);		fir::KindMapping kindMap = fir::getKindMapping(module);
module.walk([&](mlir::Operation *op) {		module.walk([&](mlir::Operation *op) {
if (auto call = mlir::dyn_cast<fir::CallOp>(op)) {		if (auto call = mlir::dyn_cast<fir::CallOp>(op)) {
if (mlir::SymbolRefAttr callee = call.getCalleeAttr()) {		if (mlir::SymbolRefAttr callee = call.getCalleeAttr()) {
mlir::StringRef funcName = callee.getLeafReference().getValue();		mlir::StringRef funcName = callee.getLeafReference().getValue();
// Replace call to runtime function for SUM when it has single		// Replace call to runtime function for SUM when it has single
// argument (no dim or mask argument) for 1D arrays with either		// argument (no dim or mask argument) for 1D arrays with either
// Integer4 or Real8 types. Other forms are ignored.		// Integer4 or Real8 types. Other forms are ignored.
// The new function is added to the module.		// The new function is added to the module.
//		//
// Prototype for runtime call (from sum.cpp):		// Prototype for runtime call (from sum.cpp):
// RTNAME(Sum<T>)(const Descriptor &x, const char *source, int line,		// RTNAME(Sum<T>)(const Descriptor &x, const char *source, int line,
// int dim, const Descriptor *mask)		// int dim, const Descriptor *mask)
//		//
if (funcName.startswith(RTNAME_STRING(Sum))) {		if (funcName.startswith(RTNAME_STRING(Sum))) {
simplifyReduction(call, kindMap, genRuntimeSumBody);		simplifyIntOrFloatReduction(call, kindMap, genRuntimeSumBody);
return;		return;
}		}
if (funcName.startswith(RTNAME_STRING(DotProduct))) {		if (funcName.startswith(RTNAME_STRING(DotProduct))) {
LLVM_DEBUG(llvm::dbgs() << "Handling " << funcName << "\n");		LLVM_DEBUG(llvm::dbgs() << "Handling " << funcName << "\n");
LLVM_DEBUG(llvm::dbgs() << "Call operation:\n"; op->dump();		LLVM_DEBUG(llvm::dbgs() << "Call operation:\n"; op->dump();
llvm::dbgs() << "\n");		llvm::dbgs() << "\n");
mlir::Operation::operand_range args = call.getArgs();		mlir::Operation::operand_range args = call.getArgs();
const mlir::Value &v1 = args[0];		const mlir::Value &v1 = args[0];
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines	if (auto call = mlir::dyn_cast<fir::CallOp>(op)) {
call->dropAllReferences();		call->dropAllReferences();
call->erase();		call->erase();

LLVM_DEBUG(llvm::dbgs() << "Replaced with:\n"; newCall.dump();		LLVM_DEBUG(llvm::dbgs() << "Replaced with:\n"; newCall.dump();
llvm::dbgs() << "\n");		llvm::dbgs() << "\n");
return;		return;
}		}
if (funcName.startswith(RTNAME_STRING(Maxval))) {		if (funcName.startswith(RTNAME_STRING(Maxval))) {
simplifyReduction(call, kindMap, genRuntimeMaxvalBody);		simplifyIntOrFloatReduction(call, kindMap, genRuntimeMaxvalBody);
		return;
		}
		if (funcName.startswith(RTNAME_STRING(Count))) {
		simplifyLogicalReduction(call, kindMap, genRuntimeCountBody);
return;		return;
}		}
}		}
}		}
});		});
		LeporacanthicusUnsubmitted Done Reply Inline Actions You'll want a return here. Mostly to match the rest of the code... Leporacanthicus: You'll want a return here. Mostly to match the rest of the code...
LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");		LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
}		}

void SimplifyIntrinsicsPass::getDependentDialects(		void SimplifyIntrinsicsPass::getDependentDialects(
mlir::DialectRegistry &registry) const {		mlir::DialectRegistry &registry) const {
// LLVM::LinkageAttr creation requires that LLVM dialect is loaded.		// LLVM::LinkageAttr creation requires that LLVM dialect is loaded.
registry.insert<mlir::LLVM::LLVMDialect>();		registry.insert<mlir::LLVM::LLVMDialect>();
}		}
std::unique_ptr<mlir::Pass> fir::createSimplifyIntrinsicsPass() {		std::unique_ptr<mlir::Pass> fir::createSimplifyIntrinsicsPass() {
return std::make_unique<SimplifyIntrinsicsPass>();		return std::make_unique<SimplifyIntrinsicsPass>();
}		}

flang/test/Transforms/simplifyintrinsics.fir

	Show First 20 Lines • Show All 1,092 Lines • ▼ Show 20 Lines
	// CHECK-LABEL: @sum_1d_real_contract_reassoc			// CHECK-LABEL: @sum_1d_real_contract_reassoc
	// CHECK: fir.call @_FortranASumReal8x1_reassoc_contract_simplified(%5) fastmath<reassoc,contract>			// CHECK: fir.call @_FortranASumReal8x1_reassoc_contract_simplified(%5) fastmath<reassoc,contract>
	// CHECK-LABEL: @sum_1d_real_fast			// CHECK-LABEL: @sum_1d_real_fast
	// CHECK: fir.call @_FortranASumReal8x1_fast_simplified(%5) fastmath<fast>			// CHECK: fir.call @_FortranASumReal8x1_fast_simplified(%5) fastmath<fast>
	// CHECK-LABEL: func.func private @_FortranASumReal8x1_reassoc_contract_simplified			// CHECK-LABEL: func.func private @_FortranASumReal8x1_reassoc_contract_simplified
	// CHECK: arith.addf %{{.}}, %{{.}} fastmath<reassoc,contract> : f64			// CHECK: arith.addf %{{.}}, %{{.}} fastmath<reassoc,contract> : f64
	// CHECK-LABEL: func.func private @_FortranASumReal8x1_fast_simplified			// CHECK-LABEL: func.func private @_FortranASumReal8x1_fast_simplified
	// CHECK: arith.addf %{{.}}, %{{.}} fastmath<fast> : f64			// CHECK: arith.addf %{{.}}, %{{.}} fastmath<fast> : f64

				// -----
				// Ensure count is simplified in valid case

				func.func @_QMtestPcount_generate_mask(%arg0: !fir.ref<f32> {fir.bindc_name = "a"}) -> i32 {
				%0 = fir.alloca i32 {bindc_name = "count_generate_mask", uniq_name = "_QMtestFcount_generate_maskEcount_generate_mask"}
				%c10 = arith.constant 10 : index
				%1 = fir.alloca !fir.array<10x!fir.logical<4>> {bindc_name = "mask", uniq_name = "_QMtestFcount_generate_maskEmask"}
				%2 = fir.shape %c10 : (index) -> !fir.shape<1>
				%3 = fir.embox %1(%2) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<4>>>
				%c0 = arith.constant 0 : index
				%4 = fir.address_of(@_QQcl.2E2F746573746661696C2E66393000) : !fir.ref<!fir.char<1,15>>
				%c10_i32 = arith.constant 10 : i32
				%5 = fir.convert %3 : (!fir.box<!fir.array<10x!fir.logical<4>>>) -> !fir.box<none>
				%6 = fir.convert %4 : (!fir.ref<!fir.char<1,15>>) -> !fir.ref<i8>
				%7 = fir.convert %c0 : (index) -> i32
				%8 = fir.call @_FortranACount(%5, %6, %c10_i32, %7) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32, i32) -> i64
				%9 = fir.convert %8 : (i64) -> i32
				fir.store %9 to %0 : !fir.ref<i32>
				%10 = fir.load %0 : !fir.ref<i32>
				return %10 : i32
				}
				func.func private @_FortranACount(!fir.box<none>, !fir.ref<i8>, i32, i32) -> i64 attributes {fir.runtime}
				fir.global linkonce @_QQcl.2E2F746573746661696C2E66393000 constant : !fir.char<1,15> {
				%0 = fir.string_lit "./test.f90\00"(15) : !fir.char<1,15>
				fir.has_value %0 : !fir.char<1,15>
				}

				// CHECK-LABEL: func.func @_QMtestPcount_generate_mask(
				// CHECK-SAME: %[[A:.*]]: !fir.ref<f32> {fir.bindc_name = "a"}) -> i32 {
				// CHECK: %[[SHAPE:.]] = fir.shape %{{.}} : (index) -> !fir.shape<1>
				// CHECK: %[[A_BOX_LOGICAL:.]] = fir.embox %{{.}}(%[[SHAPE]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<4>>>
				// CHECK: %[[A_BOX_NONE:.*]] = fir.convert %[[A_BOX_LOGICAL]] : (!fir.box<!fir.array<10x!fir.logical<4>>>) -> !fir.box<none>
				// CHECK-NOT: fir.call @_FortranACount({{.*}})
				// CHECK: %[[RES:.*]] = fir.call @_FortranACountx1_simplified(%[[A_BOX_NONE]]) fastmath<contract> : (!fir.box<none>) -> i64
				// CHECK-NOT: fir.call @_FortranACount({{.*}})
				// CHECK: return %{{.*}} : i32
				// CHECK: }
				// CHECK: func.func private @_FortranACount(!fir.box<none>, !fir.ref<i8>, i32, i32) -> i64 attributes {fir.runtime}

				// CHECK-LABEL: func.func private @_FortranACountx1_simplified(
				// CHECK-SAME: %[[ARR:.*]]: !fir.box<none>) -> i64 attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
				// CHECK: %[[C_INDEX0:.*]] = arith.constant 0 : index
				// CHECK: %[[ARR_BOX_I32:.*]] = fir.convert %[[ARR]] : (!fir.box<none>) -> !fir.box<!fir.array<?xi32>>
				// CHECK: %[[IZERO:.*]] = arith.constant 0 : i64
				// CHECK: %[[C_INDEX1:.*]] = arith.constant 1 : index
				// CHECK: %[[DIMIDX_0:.*]] = arith.constant 0 : index
				// CHECK: %[[DIMS:.*]]:3 = fir.box_dims %[[ARR_BOX_I32]], %[[DIMIDX_0]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
				// CHECK: %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[C_INDEX1]] : index
				// CHECK: %[[RES:.]] = fir.do_loop %[[ITER:.]] = %[[C_INDEX0]] to %[[EXTENT]] step %[[C_INDEX1]] iter_args(%[[COUNT:.*]] = %[[IZERO]]) -> (i64) {
				// CHECK: %[[ITEM:.*]] = fir.coordinate_of %[[ARR_BOX_I32]], %[[ITER]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
				// CHECK: %[[ITEM_VAL:.*]] = fir.load %[[ITEM]] : !fir.ref<i32>
				// CHECK: %[[I32_0:.*]] = arith.constant 0 : i32
				// CHECK: %[[I64_0:.*]] = arith.constant 0 : i64
				// CHECK: %[[I64_1:.*]] = arith.constant 1 : i64
				// CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[ITEM_VAL]], %[[I32_0]] : i32
				// CHECK: %[[SELECT:.*]] = arith.select %[[CMP]], %[[I64_0]], %[[I64_1]] : i64
				// CHECK: %[[NEW_COUNT:.*]] = arith.addi %[[SELECT]], %[[COUNT]] : i64
				// CHECK: fir.result %[[NEW_COUNT]] : i64
				// CHECK: }
				// CHECK: return %[[RES:.*]] : i64
				// CHECK: }

				// -----
				// Ensure count isn't simplified when given dim argument

				func.func @_QMtestPcount_generate_mask(%arg0: !fir.ref<!fir.array<10x10x!fir.logical<4>>> {fir.bindc_name = "mask"}) -> !fir.array<10xi32> {
				%0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
				%c10 = arith.constant 10 : index
				%c10_0 = arith.constant 10 : index
				%c10_1 = arith.constant 10 : index
				%1 = fir.alloca !fir.array<10xi32> {bindc_name = "res", uniq_name = "_QMtestFcount_generate_maskEres"}
				%2 = fir.shape %c10_1 : (index) -> !fir.shape<1>
				%3 = fir.array_load %1(%2) : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.array<10xi32>
				%c2_i32 = arith.constant 2 : i32
				%4 = fir.shape %c10, %c10_0 : (index, index) -> !fir.shape<2>
				%5 = fir.embox %arg0(%4) : (!fir.ref<!fir.array<10x10x!fir.logical<4>>>, !fir.shape<2>) -> !fir.box<!fir.array<10x10x!fir.logical<4>>>
				%c4 = arith.constant 4 : index
				%6 = fir.zero_bits !fir.heap<!fir.array<?xi32>>
				%c0 = arith.constant 0 : index
				%7 = fir.shape %c0 : (index) -> !fir.shape<1>
				%8 = fir.embox %6(%7) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
				fir.store %8 to %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
				%9 = fir.address_of(@_QQcl.2E2F746573746661696C2E66393000) : !fir.ref<!fir.char<1,15>>
				%c11_i32 = arith.constant 11 : i32
				%10 = fir.convert %0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
				%11 = fir.convert %5 : (!fir.box<!fir.array<10x10x!fir.logical<4>>>) -> !fir.box<none>
				%12 = fir.convert %c4 : (index) -> i32
				%13 = fir.convert %9 : (!fir.ref<!fir.char<1,15>>) -> !fir.ref<i8>
				%14 = fir.call @_FortranACountDim(%10, %11, %c2_i32, %12, %13, %c11_i32) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, i32, !fir.ref<i8>, i32) -> none
				%15 = fir.load %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
				%c0_2 = arith.constant 0 : index
				%16:3 = fir.box_dims %15, %c0_2 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
				%17 = fir.box_addr %15 : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
				%18 = fir.shape_shift %16#0, %16#1 : (index, index) -> !fir.shapeshift<1>
				%19 = fir.array_load %17(%18) : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.array<?xi32>
				%c1 = arith.constant 1 : index
				%c0_3 = arith.constant 0 : index
				%20 = arith.subi %c10_1, %c1 : index
				%21 = fir.do_loop %arg1 = %c0_3 to %20 step %c1 unordered iter_args(%arg2 = %3) -> (!fir.array<10xi32>) {
				%23 = fir.array_fetch %19, %arg1 : (!fir.array<?xi32>, index) -> i32
				%24 = fir.array_update %arg2, %23, %arg1 : (!fir.array<10xi32>, i32, index) -> !fir.array<10xi32>
				fir.result %24 : !fir.array<10xi32>
				}
				fir.array_merge_store %3, %21 to %1 : !fir.array<10xi32>, !fir.array<10xi32>, !fir.ref<!fir.array<10xi32>>
				fir.freemem %17 : !fir.heap<!fir.array<?xi32>>
				%22 = fir.load %1 : !fir.ref<!fir.array<10xi32>>
				return %22 : !fir.array<10xi32>
				}
				func.func private @_FortranACountDim(!fir.ref<!fir.box<none>>, !fir.box<none>, i32, i32, !fir.ref<i8>, i32) -> none attributes {fir.runtime}

				// CHECK-LABEL: func.func @_QMtestPcount_generate_mask(
				// CHECK-SAME: %[[A:.*]]: !fir.ref<!fir.array<10x10x!fir.logical<4>>> {fir.bindc_name = "mask"}) -> !fir.array<10xi32> {
				// CHECK-NOT fir.call @_FortranACountDim_simplified({{.*}})
				// CHECK: %[[RES:.]] = fir.call @_FortranACountDim({{.}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, i32, !fir.ref<i8>, i32) -> none
				// CHECK-NOT fir.call @_FortranACountDim_simplified({{.*}})

This is an archive of the discontinued LLVM Phabricator instance.

[flang] Add Count to simplified intrinsics
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 492769

flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp

flang/test/Transforms/simplifyintrinsics.fir

This is an archive of the discontinued LLVM Phabricator instance.

[flang] Add Count to simplified intrinsicsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 492769

flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp

flang/test/Transforms/simplifyintrinsics.fir

[flang] Add Count to simplified intrinsics
ClosedPublic