Index: flang/include/flang/Optimizer/CodeGen/CGPasses.td =================================================================== --- flang/include/flang/Optimizer/CodeGen/CGPasses.td +++ flang/include/flang/Optimizer/CodeGen/CGPasses.td @@ -23,10 +23,14 @@ will also convert ops in the standard and FIRCG dialects. }]; let constructor = "::fir::createFIRToLLVMPass()"; - let dependentDialects = ["mlir::LLVM::LLVMDialect"]; + let dependentDialects = [ + "mlir::LLVM::LLVMDialect", "mlir::DLTIDialect" + ]; let options = [ Option<"forcedTargetTriple", "target", "std::string", /*default=*/"", "Override module's target triple.">, + Option<"forcedDataLayout", "datalayout", "std::string", /*default=*/"", + "Override module's data layout.">, Option<"applyTBAA", "apply-tbaa", "bool", /*default=*/"false", "Attach TBAA tags to memory accessing operations."> ]; Index: flang/lib/Optimizer/CodeGen/CMakeLists.txt =================================================================== --- flang/lib/Optimizer/CodeGen/CMakeLists.txt +++ flang/lib/Optimizer/CodeGen/CMakeLists.txt @@ -28,6 +28,7 @@ MLIRBuiltinToLLVMIRTranslation MLIRLLVMToLLVMIRTranslation MLIRTargetLLVMIRExport + MLIRTargetLLVMIRImport LINK_COMPONENTS AsmParser Index: flang/lib/Optimizer/CodeGen/CodeGen.cpp =================================================================== --- flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -21,6 +21,7 @@ #include "flang/Optimizer/Support/TypeCode.h" #include "flang/Optimizer/Support/Utils.h" #include "flang/Semantics/runtime-type-info.h" +#include "mlir/Analysis/DataLayoutAnalysis.h" #include "mlir/Conversion/ArithCommon/AttrToLLVMConverter.h" #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" #include "mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h" @@ -34,16 +35,20 @@ #include "mlir/Conversion/OpenACCToLLVM/ConvertOpenACCToLLVM.h" #include "mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h" #include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h" +#include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/OpenACC/OpenACC.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Matchers.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" +#include "mlir/Target/LLVMIR/Import.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/IR/DataLayout.h" #include namespace fir { @@ -119,8 +124,10 @@ class FIROpConversion : public mlir::ConvertOpToLLVMPattern { public: explicit FIROpConversion(fir::LLVMTypeConverter &lowering, + const mlir::DataLayout &dataLayout, const fir::FIRToLLVMPassOptions &options) - : mlir::ConvertOpToLLVMPattern(lowering), options(options) {} + : mlir::ConvertOpToLLVMPattern(lowering), options(options), + dataLayout(dataLayout) {} protected: mlir::Type convertType(mlir::Type ty) const { @@ -348,14 +355,28 @@ } // Generate an alloca of size 1 and type \p toTy. - mlir::LLVM::AllocaOp + mlir::Value genAllocaWithType(mlir::Location loc, mlir::Type toTy, unsigned alignment, mlir::ConversionPatternRewriter &rewriter) const { auto thisPt = rewriter.saveInsertionPoint(); mlir::LLVM::LLVMFuncOp func = getFuncForAllocaInsert(rewriter); rewriter.setInsertionPointToStart(&func.front()); auto size = genI32Constant(loc, rewriter, 1); - auto al = rewriter.create(loc, toTy, size, alignment); + + mlir::Attribute memSpace = dataLayout.getAllocaMemorySpace(); + mlir::Type allocaTy = toTy; + if (memSpace != mlir::Attribute()) { + unsigned addrSpace = memSpace.cast().getUInt(); + allocaTy = mlir::LLVM::LLVMPointerType::get( + toTy.cast().getElementType(), addrSpace); + } + + mlir::Value al = + rewriter.create(loc, allocaTy, size, alignment); + + if (allocaTy != toTy) + al = rewriter.create(loc, toTy, al); + rewriter.restoreInsertionPoint(thisPt); return al; } @@ -371,6 +392,7 @@ } const fir::FIRToLLVMPassOptions &options; + const mlir::DataLayout &dataLayout; }; /// FIR conversion pattern template @@ -506,14 +528,27 @@ size = rewriter.create( loc, ity, size, integerCast(loc, rewriter, ity, operands[i])); } - if (ty == resultTy) { + + mlir::Attribute memSpace = dataLayout.getAllocaMemorySpace(); + mlir::Type allocaTy = ty; + if (memSpace != mlir::Attribute()) { + unsigned addrSpace = memSpace.cast().getUInt(); + allocaTy = mlir::LLVM::LLVMPointerType::get( + ty.cast().getElementType(), addrSpace); + } + + if (allocaTy == resultTy) { // Do not emit the bitcast if ty and resultTy are the same. - rewriter.replaceOpWithNewOp(alloc, ty, size, + rewriter.replaceOpWithNewOp(alloc, allocaTy, size, alloc->getAttrs()); } else { - auto al = rewriter.create(loc, ty, size, + auto al = rewriter.create(loc, allocaTy, size, alloc->getAttrs()); - rewriter.replaceOpWithNewOp(alloc, resultTy, al); + if (allocaTy != ty) + rewriter.replaceOpWithNewOp(alloc, + resultTy, al); + else + rewriter.replaceOpWithNewOp(alloc, resultTy, al); } return mlir::success(); } @@ -3030,7 +3065,7 @@ auto storeOp = rewriter.create(loc, boxValue, newBoxStorage); attachTBAATag(storeOp, boxTy, boxTy, nullptr); - rewriter.replaceOp(load, newBoxStorage.getResult()); + rewriter.replaceOp(load, newBoxStorage); } else { mlir::Type loadTy = convertType(load.getType()); auto loadOp = rewriter.create( @@ -3638,8 +3673,9 @@ template struct MustBeDeadConversion : public FIROpConversion { explicit MustBeDeadConversion(fir::LLVMTypeConverter &lowering, + const mlir::DataLayout &dataLayout, const fir::FIRToLLVMPassOptions &options) - : FIROpConversion(lowering, options) {} + : FIROpConversion(lowering, dataLayout, options) {} using OpAdaptor = typename FromOp::Adaptor; mlir::LogicalResult @@ -3747,6 +3783,17 @@ if (!forcedTargetTriple.empty()) fir::setTargetTriple(mod, forcedTargetTriple); + if (!forcedDataLayout.empty()) { + llvm::DataLayout dl(forcedDataLayout); + mlir::MLIRContext *context = mod.getContext(); + mod->setAttr( + mlir::LLVM::LLVMDialect::getDataLayoutAttrName(), + mlir::StringAttr::get(context, dl.getStringRepresentation())); + mlir::DataLayoutSpecInterface dlSpec = + mlir::translateDataLayout(dl, context); + mod->setAttr(mlir::DLTIDialect::kDataLayoutAttrName, dlSpec); + } + // Run dynamic pass pipeline for converting Math dialect // operations into other dialects (llvm, func, etc.). // Some conversions of Math operations cannot be done @@ -3773,8 +3820,11 @@ return signalPassFailure(); auto *context = getModule().getContext(); + fir::LLVMTypeConverter typeConverter{getModule(), options.applyTBAA || applyTBAA}; + const auto &layouts = getAnalysis(); + const mlir::DataLayout dataLayout = layouts.getAtOrAbove(mod); mlir::RewritePatternSet pattern(context); pattern.insert< AbsentOpConversion, AddcOpConversion, AddrOfOpConversion, @@ -3798,8 +3848,8 @@ SubcOpConversion, TypeDescOpConversion, UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion, UnreachableOpConversion, UnrealizedConversionCastOpConversion, XArrayCoorOpConversion, - XEmboxOpConversion, XReboxOpConversion, ZeroOpConversion>(typeConverter, - options); + XEmboxOpConversion, XReboxOpConversion, ZeroOpConversion>( + typeConverter, dataLayout, options); mlir::populateFuncToLLVMConversionPatterns(typeConverter, pattern); mlir::populateOpenACCToLLVMConversionPatterns(typeConverter, pattern); mlir::populateOpenMPToLLVMConversionPatterns(typeConverter, pattern); Index: flang/lib/Optimizer/CodeGen/Target.cpp =================================================================== --- flang/lib/Optimizer/CodeGen/Target.cpp +++ flang/lib/Optimizer/CodeGen/Target.cpp @@ -17,7 +17,6 @@ #include "flang/Optimizer/Support/FatalError.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/TypeRange.h" - #define DEBUG_TYPE "flang-codegen-target" using namespace fir; Index: flang/test/Fir/convert-to-llvm.fir =================================================================== --- flang/test/Fir/convert-to-llvm.fir +++ flang/test/Fir/convert-to-llvm.fir @@ -1,10 +1,11 @@ -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" %s | FileCheck %s -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu" %s | FileCheck %s -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gn" %s | FileCheck %s +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" %s | FileCheck -check-prefixes=CHECK,GENERIC %s +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu" %s | FileCheck -check-prefixes=CHECK,GENERIC %s +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck -check-prefixes=CHECK,GENERIC %s +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gn" %s | FileCheck -check-prefixes=CHECK,GENERIC %s +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa, datalayout=e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s //============================================================================= -// SUMMARY: Tests for FIR --> LLVM MLIR conversion independent of the target +// SUMMARY: Tests for FIR --> LLVM MLIR //============================================================================= // Test simple global LLVM conversion @@ -928,12 +929,11 @@ // CHECK-LABEL: llvm.func @test_load_box( // CHECK-SAME: %[[arg0:.*]]: !llvm.ptr>) { // CHECK-NEXT: %[[c1:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK-NEXT: %[[box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE]])> +// GENERIC-NEXT: %[[box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE]])> +// AMDGPU-NEXT: %[[alloca_box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE]])>{{.*}} -> !llvm.ptr, 5> +// AMDGPU-NEXT: %[[box_copy:.*]] = llvm.addrspacecast %[[alloca_box_copy]] : !llvm.ptr, 5> to !llvm.ptr> // CHECK-NEXT: %[[box_val:.*]] = llvm.load %[[arg0]] : !llvm.ptr> // CHECK-NEXT: llvm.store %[[box_val]], %[[box_copy]] : !llvm.ptr> -// CHECK-NEXT: llvm.call @takes_box(%[[box_copy]]) : (!llvm.ptr>) -> () -// CHECK-NEXT: llvm.return -// CHECK-NEXT: } // ----- @@ -1074,7 +1074,9 @@ // CHECK-LABEL: llvm.func @alloca_one() -> !llvm.ptr // CHECK: [[N:%.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[N]] x i32 +// GENERIC: [[A:%.*]] = llvm.alloca [[N]] x i32 +// AMDGPU: [[AA:%.*]] = llvm.alloca [[N]] x i32{{.*}} -> !llvm.ptr +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr to !llvm.ptr // CHECK: llvm.return [[A]] : !llvm.ptr // ----- @@ -1091,7 +1093,9 @@ // CHECK: [[N:%.*]] = llvm.mlir.constant(100 : index) : i64 // CHECK: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: [[TOTAL:%.*]] = llvm.mul [[ONE]], [[N]] : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[TOTAL]] x i32 +// GENERIC: [[A:%.*]] = llvm.alloca [[TOTAL]] x i32 +// AMDGPU: [[AA:%.*]] = llvm.alloca [[TOTAL]] x i32{{.*}} -> !llvm.ptr +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr to !llvm.ptr // CHECK: llvm.return [[A]] : !llvm.ptr // ----- @@ -1105,7 +1109,9 @@ // CHECK-LABEL: llvm.func @alloca_ptr_to_array() -> !llvm.ptr> // CHECK: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[ONE]] x !llvm.ptr +// GENERIC: [[A:%.*]] = llvm.alloca [[ONE]] x !llvm.ptr +// AMDGPU: [[AA:%.*]] = llvm.alloca [[ONE]] x !llvm.ptr{{.*}} -> !llvm.ptr, 5> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr, 5> to !llvm.ptr> // CHECK: llvm.return [[A]] : !llvm.ptr> // ----- @@ -1119,11 +1125,13 @@ // CHECK-LABEL: llvm.func @alloca_char_array // CHECK-SAME: ([[L:%.*]]: i32, [[E:%.*]]: i64) -> !llvm.ptr -// CHECK-DAG: [[UNUSEDONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECKC-DAG: [[UNUSEDONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK-DAG: [[LCAST:%.*]] = llvm.sext [[L]] : i32 to i64 // CHECK: [[PROD1:%.*]] = llvm.mul [[LCAST]], [[E]] : i64 // CHECK: [[PROD2:%.*]] = llvm.mul [[PROD1]], [[E]] : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[PROD2]] x i8 {in_type = !fir.array> +// GENERIC: [[A:%.*]] = llvm.alloca [[PROD2]] x i8 {in_type = !fir.array> +// AMDGPU: [[AA:%.*]] = llvm.alloca [[PROD2]] x i8 {in_type = !fir.array>{{.*}} -> !llvm.ptr +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr to !llvm.ptr // CHECK: return [[A]] : !llvm.ptr // ----- @@ -1140,7 +1148,9 @@ // CHECK-DAG: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: [[PROD1:%.*]] = llvm.mul [[ONE]], [[E]] : i64 // CHECK: [[PROD2:%.*]] = llvm.mul [[PROD1]], [[E]] : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[PROD2]] x !llvm.array<8 x i8> {in_type = !fir.array> +// GENERIC: [[A:%.*]] = llvm.alloca [[PROD2]] x !llvm.array<8 x i8> {in_type = !fir.array> +// AMDGPU: [[AA:%.*]] = llvm.alloca [[PROD2]] x !llvm.array<8 x i8> {in_type = !fir.array>{{.*}} -> !llvm.ptr, 5> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr, 5> to !llvm.ptr> // CHECK: return [[A]] : !llvm.ptr> // ----- @@ -1164,8 +1174,10 @@ // CHECK-SAME: ([[ARG0:%.*]]: i32, [[ARG1:%.*]]: i16) // CHECK-SAME: -> !llvm.ptr> // CHECK: [[SIZE:%.*]] = llvm.call @_QTtP.mem.size([[ARG0]], [[ARG1]]) : (i32, i16) -> i64 -// CHECK: [[ALLOC:%.*]] = llvm.alloca [[SIZE]] x i8 -// CHECK: [[A:%.*]] = llvm.bitcast [[ALLOC]] : !llvm.ptr to !llvm.ptr> +// GENERIC: [[ALLOC:%.*]] = llvm.alloca [[SIZE]] x i8 +// GENERIC: [[A:%.*]] = llvm.bitcast [[ALLOC]] : !llvm.ptr to !llvm.ptr> +// AMDGPU: [[ALLOC:%.*]] = llvm.alloca [[SIZE]] x i8{{.*}} -> !llvm.ptr +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[ALLOC]] : !llvm.ptr to !llvm.ptr> // CHECK: llvm.return [[A]] : !llvm.ptr> // ----- @@ -1184,7 +1196,9 @@ // CHECK: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: [[MUL1:%.*]] = llvm.mul [[ONE]], [[OP1]] : i64 // CHECK: [[TOTAL:%.*]] = llvm.mul [[MUL1]], [[OP2]] : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<32 x array<16 x array<8 x f32> +// GENERIC: [[A:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<32 x array<16 x array<8 x f32> +// AMDGPU: [[AA:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<32 x array<16 x array<8 x f32>{{.*}} -> !llvm.ptr>>, 5> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr>>, 5> to !llvm.ptr>>> // CHECK: llvm.return [[A]] : !llvm.ptr // ----- @@ -1203,7 +1217,9 @@ // CHECK: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: [[MUL1:%.*]] = llvm.mul [[ONE]], [[OP1]] : i64 // CHECK: [[TOTAL:%.*]] = llvm.mul [[MUL1]], [[OP2]] : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<9 x array<8 x f32> +// GENERIC: [[A:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<9 x array<8 x f32> +// AMDGPU: [[AA:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<9 x array<8 x f32>{{.*}} -> !llvm.ptr>, 5> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr>, 5> to !llvm.ptr>> // CHECK: llvm.return [[A]] : !llvm.ptr // ----- @@ -1223,7 +1239,9 @@ // CHECK: [[PROD1:%.*]] = llvm.mul [[ONE]], [[FIXED]] : i64 // CHECK: [[PROD2:%.*]] = llvm.mul [[PROD1]], [[A]] : i64 // CHECK: [[PROD3:%.*]] = llvm.mul [[PROD2]], [[B]] : i64 -// CHECK: [[RES:%.*]] = llvm.alloca [[PROD3]] x !llvm.array<4 x i32> {in_type = !fir.array<4x?x3x?x5xi32> +// GENERIC: [[RES:%.*]] = llvm.alloca [[PROD3]] x !llvm.array<4 x i32> {in_type = !fir.array<4x?x3x?x5xi32> +// AMDGPU: [[AA:%.*]] = llvm.alloca [[PROD3]] x !llvm.array<4 x i32> {in_type = !fir.array<4x?x3x?x5xi32>{{.*}} -> !llvm.ptr, 5> +// AMDGPU: [[RES:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr, 5> to !llvm.ptr> // CHECK: llvm.return [[RES]] : !llvm.ptr> // ----- @@ -1562,7 +1580,9 @@ // CHECK-LABEL: func @embox0( // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr> // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> {alignment = 8 : i64} : (i32) -> !llvm.ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})>> +// GENERIC: %[[ALLOCA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> {alignment = 8 : i64} : (i32) -> !llvm.ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})>> +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> {alignment = 8 : i64} : (i32) -> !llvm.ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})>, 5> +// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr>, i64, i32, i8, i8, i8, i8)>, 5> to !llvm.ptr>, i64, i32, i8, i8, i8, i8)>> // CHECK: %[[NULL:.*]] = llvm.mlir.null : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] // CHECK: %[[I64_ELEM_SIZE:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 @@ -1702,12 +1722,15 @@ // CHECK-LABEL: llvm.func @embox1 // CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(42 : i32) : i32 // CHECK: %[[TYPE_CODE_I8:.*]] = llvm.trunc %[[TYPE_CODE]] : i32 to i8 -// CHECK: %{{.*}} = llvm.insertvalue %[[TYPE_CODE_I8]], %{{.*}}[4] : !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> +// CHECK: %[[INSERT_1:.*]] = llvm.insertvalue %[[TYPE_CODE_I8]], %{{.*}}[4] : !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> +// CHECK: %[[TYPE_CODE_2:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[TYPE_CODE_I8_2:.*]] = llvm.trunc %[[TYPE_CODE_2]] : i32 to i8 +// CHECK: %[[INSERT_2:.*]] = llvm.insertvalue %[[TYPE_CODE_I8_2]], %[[INSERT_1]][5] : !llvm.struct<(ptr>, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> // CHECK: %[[F18ADDENDUM:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[F18ADDENDUM_I8:.*]] = llvm.trunc %[[F18ADDENDUM]] : i32 to i8 -// CHECK: %{{.*}} = llvm.insertvalue %[[F18ADDENDUM_I8]], %17[6] : !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> +// CHECK: %{{.*}} = llvm.insertvalue %[[F18ADDENDUM_I8]], %[[INSERT_2]][6] : !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> // CHECK: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr -// CHECK: %[[TDESC_CAST:.*]] = llvm.bitcast %21 : !llvm.ptr to !llvm.ptr +// CHECK: %[[TDESC_CAST:.*]] = llvm.bitcast %[[TDESC]] : !llvm.ptr to !llvm.ptr // CHECK: %{{.*}} = llvm.insertvalue %[[TDESC_CAST]], %{{.*}}[7] : !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> // ----- @@ -1764,7 +1787,10 @@ // CHECK-LABEL: llvm.func @no_reassoc( // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr) { // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[ALLOC:.*]] = llvm.alloca %[[C1]] x i32 {in_type = i32, operand_segment_sizes = array} : (i64) -> !llvm.ptr +// GENERIC: %[[ALLOC:.*]] = llvm.alloca %[[C1]] x i32 {in_type = i32, operand_segment_sizes = array} : (i64) -> !llvm.ptr +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[C1]] x i32 {in_type = i32, operand_segment_sizes = array} : (i64) -> !llvm.ptr +// AMDGPU: %[[ALLOC:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr to !llvm.ptr + // CHECK: %[[LOAD:.*]] = llvm.load %[[ARG0]] : !llvm.ptr // CHECK: llvm.store %[[LOAD]], %[[ALLOC]] : !llvm.ptr // CHECK: llvm.return @@ -1784,7 +1810,9 @@ // CHECK-LABEL: llvm.func @xembox0( // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr // CHECK: %[[ALLOCA_SIZE:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>> +// GENERIC: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>> +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>, 5> +// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, 5> to !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64 // CHECK: %[[NULL:.*]] = llvm.mlir.null : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] @@ -1873,7 +1901,9 @@ // CHECK-LABEL: llvm.func @_QPsb( // CHECK-SAME: %[[N:.*]]: i64, %[[SH1:.*]]: i64, %[[SH2:.*]]: i64) { // CHECK: %[[ALLOCA_SIZE:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)>> +// GENERIC: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)>> +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)>, 5> +// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, 5> to !llvm.ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>> // CHECK: %[[C4:.*]] = llvm.mlir.constant(4 : index) : i64 // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64 // CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) : i64 @@ -1884,7 +1914,9 @@ // CHECK: %[[C1_0:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[ARR_SIZE_TMP1:.*]] = llvm.mul %[[C1_0]], %[[N1]] : i64 // CHECK: %[[ARR_SIZE:.*]] = llvm.mul %[[ARR_SIZE_TMP1]], %[[N2]] : i64 -// CHECK: %[[ARR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr", in_type = !fir.array, operand_segment_sizes = array, uniq_name = "_QFsbEarr"} : (i64) -> !llvm.ptr +// GENERIC: %[[ARR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr", in_type = !fir.array, operand_segment_sizes = array, uniq_name = "_QFsbEarr"} : (i64) -> !llvm.ptr +// AMDGPU: %[[AR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr", in_type = !fir.array, operand_segment_sizes = array, uniq_name = "_QFsbEarr"} : (i64) -> !llvm.ptr +// AMDGPU: %[[ARR:.*]] = llvm.addrspacecast %[[AR]] : !llvm.ptr to !llvm.ptr // CHECK: %[[NULL:.*]] = llvm.mlir.null : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] // CHECK: %[[ELEM_LEN_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 @@ -1933,7 +1965,7 @@ // CHECK: %[[STRIDE_MUL:.*]] = llvm.mul %[[PREV_DIM]], %[[C1]] : i64 // CHECK: %[[BOX12:.*]] = llvm.insertvalue %[[STRIDE_MUL]], %[[BOX11]][7, 1, 2] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> // CHECK: %[[BASE_PTR:.*]] = llvm.getelementptr %[[ARR]][%[[PTR_OFFSET0]]] : (!llvm.ptr, i64) -> !llvm.ptr -// CHECK: %[[ADDR_BITCAST:.*]] = llvm.bitcast %[[BASE_PTR]] : !llvm.ptr to !llvm.ptr +// CHECK: %[[ADDR_BITCAST:.*]] = llvm.bitcast %[[BASE_PTR]] : !llvm.ptr to !llvm.ptr // CHECK: %[[BOX13:.*]] = llvm.insertvalue %[[ADDR_BITCAST]], %[[BOX12]][0] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> // CHECK: llvm.store %[[BOX13]], %[[ALLOCA]] : !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)>> @@ -1955,15 +1987,21 @@ // CHECK-LABEL: llvm.func @_QPtest_dt_slice // CHECK: %[[ALLOCA_SIZE:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>> +// GENERIC: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>> +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>, 5> +// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, 5> to !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> // CHECK: %[[C20:.*]] = llvm.mlir.constant(20 : index) : i64 // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[C10:.*]] = llvm.mlir.constant(10 : i64) : i64 // CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64 // CHECK: %[[ALLOCA_SIZE_V:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[V:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFtest_dt_sliceEv"} : (i64) -> !llvm.ptr +// GENERIC: %[[V:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFtest_dt_sliceEv"} : (i64) -> !llvm.ptr +// AMDGPU: %[[AB:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFtest_dt_sliceEv"} : (i64) -> !llvm.ptr +// AMDGPU: %[[V:.*]] = llvm.addrspacecast %[[AB]] : !llvm.ptr to !llvm.ptr // CHECK: %[[ALLOCA_SIZE_X:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[X:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x", in_type = !fir.array<20x!fir.type<_QFtest_dt_sliceTt{i:i32,j:i32}>>, operand_segment_sizes = array, uniq_name = "_QFtest_dt_sliceEx"} : (i64) -> !llvm.ptr>> +// GENERIC: %[[X:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x", in_type = !fir.array<20x!fir.type<_QFtest_dt_sliceTt{i:i32,j:i32}>>, operand_segment_sizes = array, uniq_name = "_QFtest_dt_sliceEx"} : (i64) -> !llvm.ptr>> +// AMDGPU: %[[AC:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x", in_type = !fir.array<20x!fir.type<_QFtest_dt_sliceTt{i:i32,j:i32}>>, operand_segment_sizes = array, uniq_name = "_QFtest_dt_sliceEx"} : (i64) -> !llvm.ptr>, 5> +// AMDGPU: %[[X:.*]] = llvm.addrspacecast %[[AC]] : !llvm.ptr>, 5> to !llvm.ptr>> // CHECK: %[[NULL:.*]] = llvm.mlir.null : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] // CHECK: %[[ELEM_LEN_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 @@ -2002,7 +2040,7 @@ // CHECK: %[[ADDR_BITCAST:.*]] = llvm.bitcast %[[BASE_PTR]] : !llvm.ptr to !llvm.ptr // CHECK: %[[BOX10:.*]] = llvm.insertvalue %[[ADDR_BITCAST]], %[[BOX9]][0] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> // CHECK: llvm.store %[[BOX10]], %[[ALLOCA]] : !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>> -// CHECK: llvm.call @_QPtest_dt_callee(%1) : (!llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>>) -> () +// CHECK: llvm.call @_QPtest_dt_callee(%[[ALLOCA]]) : (!llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>>) -> () // ----- @@ -2234,7 +2272,9 @@ //CHECK-LABEL: llvm.func @test_rebox_1 //CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>> //CHECK: %[[ONE_1:.*]] = llvm.mlir.constant(1 : i32) : i32 -//CHECK: %[[RESULT_BOX_REF:.*]] = llvm.alloca %[[ONE_1]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> +//GENERIC: %[[RESULT_BOX_REF:.*]] = llvm.alloca %[[ONE_1]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> +//AMDGPU: %[[AA:.*]] = llvm.alloca %[[ONE_1]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, 5> +//AMDGPU: %[[RESULT_BOX_REF:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, 5> to !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> //CHECK: %[[THREE:.*]] = llvm.mlir.constant(3 : index) : i64 //CHECK: %[[FOUR:.*]] = llvm.mlir.constant(4 : index) : i64 //CHECK: %[[FIVE:.*]] = llvm.mlir.constant(5 : index) : i64 @@ -2307,7 +2347,9 @@ //CHECK-LABEL: llvm.func @foo //CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr)>>, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>, ptr, array<1 x i64>)>> //CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : i32 -//CHECK: %[[RESULT_BOX_REF:.*]] = llvm.alloca %[[ONE]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> +//GENERIC: %[[RESULT_BOX_REF:.*]] = llvm.alloca %[[ONE]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> +//AMDGPU: %[[AA:.*]] = llvm.alloca %[[ONE]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, 5> +//AMDGPU: %[[RESULT_BOX_REF:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, 5> to !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> //CHECK: %[[RESULT_LB:.*]] = llvm.mlir.constant(3 : i64) : i64 //CHECK: %[[RESULT_UB:.*]] = llvm.mlir.constant(60 : i64) : i64 //CHECK: %[[RESULT_STRIDE:.*]] = llvm.mlir.constant(9 : i64) : i64