Index: flang/lib/Optimizer/CodeGen/CodeGen.cpp =================================================================== --- flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -105,10 +105,14 @@ class FIROpConversion : public mlir::ConvertOpToLLVMPattern { public: explicit FIROpConversion(fir::LLVMTypeConverter &lowering, + mlir::ModuleOp &module, const fir::FIRToLLVMPassOptions &options, const BindingTables &bindingTables) : mlir::ConvertOpToLLVMPattern(lowering), options(options), - bindingTables(bindingTables) {} + bindingTables(bindingTables), + specifics(fir::CodeGenSpecifics::get(module.getContext(), + fir::getTargetTriple(module), + fir::getKindMapping(module))) {} protected: mlir::Type convertType(mlir::Type ty) const { @@ -336,14 +340,25 @@ } // Generate an alloca of size 1 and type \p toTy. - mlir::LLVM::AllocaOp + mlir::Value genAllocaWithType(mlir::Location loc, mlir::Type toTy, unsigned alignment, mlir::ConversionPatternRewriter &rewriter) const { auto thisPt = rewriter.saveInsertionPoint(); mlir::LLVM::LLVMFuncOp func = getFuncForAllocaInsert(rewriter); rewriter.setInsertionPointToStart(&func.front()); auto size = genI32Constant(loc, rewriter, 1); - auto al = rewriter.create(loc, toTy, size, alignment); + auto addrSpace = specifics->getAllocaAddressSpace(); + auto allocaTy = toTy; + if (addrSpace != specifics->getGenericAddressSpace()) + allocaTy = mlir::LLVM::LLVMPointerType::get( + toTy.cast().getElementType(), addrSpace); + + mlir::Value al = + rewriter.create(loc, allocaTy, size, alignment); + + if (allocaTy != toTy) + al = rewriter.create(loc, toTy, al); + rewriter.restoreInsertionPoint(thisPt); return al; } @@ -359,6 +374,7 @@ const fir::FIRToLLVMPassOptions &options; const BindingTables &bindingTables; + const std::unique_ptr specifics; }; /// FIR conversion pattern template @@ -494,14 +510,26 @@ size = rewriter.create( loc, ity, size, integerCast(loc, rewriter, ity, operands[i])); } - if (ty == resultTy) { + + auto addrSpace = specifics->getAllocaAddressSpace(); + auto allocaTy = ty; + if (addrSpace != specifics->getGenericAddressSpace()) { + allocaTy = mlir::LLVM::LLVMPointerType::get( + ty.cast().getElementType(), addrSpace); + } + + if (allocaTy == resultTy) { // Do not emit the bitcast if ty and resultTy are the same. - rewriter.replaceOpWithNewOp(alloc, ty, size, + rewriter.replaceOpWithNewOp(alloc, allocaTy, size, alloc->getAttrs()); } else { - auto al = rewriter.create(loc, ty, size, + auto al = rewriter.create(loc, allocaTy, size, alloc->getAttrs()); - rewriter.replaceOpWithNewOp(alloc, resultTy, al); + if (allocaTy != ty) + rewriter.replaceOpWithNewOp(alloc, + resultTy, al); + else + rewriter.replaceOpWithNewOp(alloc, resultTy, al); } return mlir::success(); } @@ -3105,7 +3133,7 @@ auto storeOp = rewriter.create(loc, boxValue, newBoxStorage); attachTBAATag(storeOp, boxTy, boxTy, nullptr); - rewriter.replaceOp(load, newBoxStorage.getResult()); + rewriter.replaceOp(load, newBoxStorage); } else { mlir::Type loadTy = convertType(load.getType()); auto loadOp = rewriter.create( @@ -3668,9 +3696,10 @@ template struct MustBeDeadConversion : public FIROpConversion { explicit MustBeDeadConversion(fir::LLVMTypeConverter &lowering, + mlir::ModuleOp &module, const fir::FIRToLLVMPassOptions &options, const BindingTables &bindingTables) - : FIROpConversion(lowering, options, bindingTables) {} + : FIROpConversion(lowering, module, options, bindingTables) {} using OpAdaptor = typename FromOp::Adaptor; mlir::LogicalResult @@ -3820,7 +3849,7 @@ SubcOpConversion, TypeDescOpConversion, UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion, UnreachableOpConversion, XArrayCoorOpConversion, XEmboxOpConversion, XReboxOpConversion, - ZeroOpConversion>(typeConverter, options, bindingTables); + ZeroOpConversion>(typeConverter, getModule(), options, bindingTables); mlir::populateFuncToLLVMConversionPatterns(typeConverter, pattern); mlir::populateOpenMPToLLVMConversionPatterns(typeConverter, pattern); mlir::arith::populateArithToLLVMConversionPatterns(typeConverter, pattern); Index: flang/lib/Optimizer/CodeGen/Target.h =================================================================== --- flang/lib/Optimizer/CodeGen/Target.h +++ flang/lib/Optimizer/CodeGen/Target.h @@ -143,6 +143,12 @@ // Returns width in bits of C/C++ 'int' type size. virtual unsigned char getCIntTypeWidth() const = 0; + // Returns the generic address space + virtual unsigned getGenericAddressSpace() const = 0; + + // Returns the address space that alloca uses + virtual unsigned getAllocaAddressSpace() const = 0; + protected: mlir::MLIRContext &context; llvm::Triple triple; Index: flang/lib/Optimizer/CodeGen/Target.cpp =================================================================== --- flang/lib/Optimizer/CodeGen/Target.cpp +++ flang/lib/Optimizer/CodeGen/Target.cpp @@ -120,6 +120,14 @@ // for AVR and MSP430 (see TargetInfo initializations // in clang/lib/Basic/Targets). unsigned char getCIntTypeWidth() const override { return 32; } + + // The generic address space is 0 + unsigned getGenericAddressSpace() const override { return 0; } + + // The default address space is the generic space + unsigned getAllocaAddressSpace() const override { + return getGenericAddressSpace(); + } }; } // namespace @@ -489,6 +497,9 @@ TODO(loc, "handle complex return types"); return marshal; } + + // The address space for AMDGPU alloca is 5 + unsigned getAllocaAddressSpace() const override { return 5; } }; } // namespace Index: flang/test/Fir/convert-to-llvm.fir =================================================================== --- flang/test/Fir/convert-to-llvm.fir +++ flang/test/Fir/convert-to-llvm.fir @@ -1,10 +1,11 @@ -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" %s | FileCheck %s -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu" %s | FileCheck %s -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck %s -// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gn" %s | FileCheck %s +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" %s | FileCheck -check-prefixes=CHECK,GENERIC %s +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu" %s | FileCheck -check-prefixes=CHECK,GENERIC %s +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=i386-unknown-linux-gnu" %s | FileCheck -check-prefixes=CHECK,GENERIC %s +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=powerpc64le-unknown-linux-gn" %s | FileCheck -check-prefixes=CHECK,GENERIC %s +// RUN: fir-opt --split-input-file --fir-to-llvm-ir="target=amdgcn-amd-amdhsa" %s | FileCheck -check-prefixes=CHECK,AMDGPU %s //============================================================================= -// SUMMARY: Tests for FIR --> LLVM MLIR conversion independent of the target +// SUMMARY: Tests for FIR --> LLVM MLIR //============================================================================= // Test simple global LLVM conversion @@ -879,12 +880,11 @@ // CHECK-LABEL: llvm.func @test_load_box( // CHECK-SAME: %[[arg0:.*]]: !llvm.ptr>) { // CHECK-NEXT: %[[c1:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK-NEXT: %[[box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE]])> +// GENERIC-NEXT: %[[box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE]])> +// AMDGPU-NEXT: %[[alloca_box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE]])> +// AMDGPU-NEXT: %[[box_copy:.*]] = llvm.addrspacecast %[[alloca_box_copy]] : !llvm.ptr, 5> to !llvm.ptr> // CHECK-NEXT: %[[box_val:.*]] = llvm.load %[[arg0]] : !llvm.ptr> // CHECK-NEXT: llvm.store %[[box_val]], %[[box_copy]] : !llvm.ptr> -// CHECK-NEXT: llvm.call @takes_box(%[[box_copy]]) : (!llvm.ptr>) -> () -// CHECK-NEXT: llvm.return -// CHECK-NEXT: } // ----- @@ -1025,7 +1025,9 @@ // CHECK-LABEL: llvm.func @alloca_one() -> !llvm.ptr // CHECK: [[N:%.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[N]] x i32 +// GENERIC: [[A:%.*]] = llvm.alloca [[N]] x i32 +// AMDGPU: [[AA:%.*]] = llvm.alloca [[N]] x i32 +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr to !llvm.ptr // CHECK: llvm.return [[A]] : !llvm.ptr // ----- @@ -1042,7 +1044,9 @@ // CHECK: [[N:%.*]] = llvm.mlir.constant(100 : index) : i64 // CHECK: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: [[TOTAL:%.*]] = llvm.mul [[ONE]], [[N]] : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[TOTAL]] x i32 +// GENERIC: [[A:%.*]] = llvm.alloca [[TOTAL]] x i32 +// AMDGPU: [[AA:%.*]] = llvm.alloca [[TOTAL]] x i32 +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr to !llvm.ptr // CHECK: llvm.return [[A]] : !llvm.ptr // ----- @@ -1056,7 +1060,9 @@ // CHECK-LABEL: llvm.func @alloca_ptr_to_array() -> !llvm.ptr> // CHECK: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[ONE]] x !llvm.ptr +// GENERIC: [[A:%.*]] = llvm.alloca [[ONE]] x !llvm.ptr +// AMDGPU: [[AA:%.*]] = llvm.alloca [[ONE]] x !llvm.ptr +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr, 5> to !llvm.ptr> // CHECK: llvm.return [[A]] : !llvm.ptr> // ----- @@ -1070,11 +1076,13 @@ // CHECK-LABEL: llvm.func @alloca_char_array // CHECK-SAME: ([[L:%.*]]: i32, [[E:%.*]]: i64) -> !llvm.ptr -// CHECK-DAG: [[UNUSEDONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECKC-DAG: [[UNUSEDONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK-DAG: [[LCAST:%.*]] = llvm.sext [[L]] : i32 to i64 // CHECK: [[PROD1:%.*]] = llvm.mul [[LCAST]], [[E]] : i64 // CHECK: [[PROD2:%.*]] = llvm.mul [[PROD1]], [[E]] : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[PROD2]] x i8 {in_type = !fir.array> +// GENERIC: [[A:%.*]] = llvm.alloca [[PROD2]] x i8 {in_type = !fir.array> +// AMDGPU: [[AA:%.*]] = llvm.alloca [[PROD2]] x i8 {in_type = !fir.array> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr to !llvm.ptr // CHECK: return [[A]] : !llvm.ptr // ----- @@ -1091,7 +1099,9 @@ // CHECK-DAG: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: [[PROD1:%.*]] = llvm.mul [[ONE]], [[E]] : i64 // CHECK: [[PROD2:%.*]] = llvm.mul [[PROD1]], [[E]] : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[PROD2]] x !llvm.array<8 x i8> {in_type = !fir.array> +// GENERIC: [[A:%.*]] = llvm.alloca [[PROD2]] x !llvm.array<8 x i8> {in_type = !fir.array> +// AMDGPU: [[AA:%.*]] = llvm.alloca [[PROD2]] x !llvm.array<8 x i8> {in_type = !fir.array> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr, 5> to !llvm.ptr> // CHECK: return [[A]] : !llvm.ptr> // ----- @@ -1115,8 +1125,10 @@ // CHECK-SAME: ([[ARG0:%.*]]: i32, [[ARG1:%.*]]: i16) // CHECK-SAME: -> !llvm.ptr> // CHECK: [[SIZE:%.*]] = llvm.call @_QTtP.mem.size([[ARG0]], [[ARG1]]) : (i32, i16) -> i64 -// CHECK: [[ALLOC:%.*]] = llvm.alloca [[SIZE]] x i8 -// CHECK: [[A:%.*]] = llvm.bitcast [[ALLOC]] : !llvm.ptr to !llvm.ptr> +// GENERIC: [[ALLOC:%.*]] = llvm.alloca [[SIZE]] x i8 +// GENERIC: [[A:%.*]] = llvm.bitcast [[ALLOC]] : !llvm.ptr to !llvm.ptr> +// AMDGPU: [[ALLOC:%.*]] = llvm.alloca [[SIZE]] x i8 +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[ALLOC]] : !llvm.ptr to !llvm.ptr> // CHECK: llvm.return [[A]] : !llvm.ptr> // ----- @@ -1135,7 +1147,9 @@ // CHECK: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: [[MUL1:%.*]] = llvm.mul [[ONE]], [[OP1]] : i64 // CHECK: [[TOTAL:%.*]] = llvm.mul [[MUL1]], [[OP2]] : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<32 x array<16 x array<8 x f32> +// GENERIC: [[A:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<32 x array<16 x array<8 x f32> +// AMDGPU: [[AA:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<32 x array<16 x array<8 x f32> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr>>, 5> to !llvm.ptr>>> // CHECK: llvm.return [[A]] : !llvm.ptr // ----- @@ -1154,7 +1168,9 @@ // CHECK: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: [[MUL1:%.*]] = llvm.mul [[ONE]], [[OP1]] : i64 // CHECK: [[TOTAL:%.*]] = llvm.mul [[MUL1]], [[OP2]] : i64 -// CHECK: [[A:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<9 x array<8 x f32> +// GENERIC: [[A:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<9 x array<8 x f32> +// AMDGPU: [[AA:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<9 x array<8 x f32> +// AMDGPU: [[A:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr>, 5> to !llvm.ptr>> // CHECK: llvm.return [[A]] : !llvm.ptr // ----- @@ -1174,7 +1190,9 @@ // CHECK: [[PROD1:%.*]] = llvm.mul [[ONE]], [[FIXED]] : i64 // CHECK: [[PROD2:%.*]] = llvm.mul [[PROD1]], [[A]] : i64 // CHECK: [[PROD3:%.*]] = llvm.mul [[PROD2]], [[B]] : i64 -// CHECK: [[RES:%.*]] = llvm.alloca [[PROD3]] x !llvm.array<4 x i32> {in_type = !fir.array<4x?x3x?x5xi32> +// GENERIC: [[RES:%.*]] = llvm.alloca [[PROD3]] x !llvm.array<4 x i32> {in_type = !fir.array<4x?x3x?x5xi32> +// AMDGPU: [[AA:%.*]] = llvm.alloca [[PROD3]] x !llvm.array<4 x i32> {in_type = !fir.array<4x?x3x?x5xi32> +// AMDGPU: [[RES:%.*]] = llvm.addrspacecast [[AA]] : !llvm.ptr, 5> to !llvm.ptr> // CHECK: llvm.return [[RES]] : !llvm.ptr> // ----- @@ -1513,7 +1531,9 @@ // CHECK-LABEL: func @embox0( // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr> // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> {alignment = 8 : i64} : (i32) -> !llvm.ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})>> +// GENERIC: %[[ALLOCA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> {alignment = 8 : i64} : (i32) -> !llvm.ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})>> +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> {alignment = 8 : i64} : (i32) -> !llvm.ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})>, 5> +// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr>, i64, i32, i8, i8, i8, i8)>, 5> to !llvm.ptr>, i64, i32, i8, i8, i8, i8)>> // CHECK: %[[NULL:.*]] = llvm.mlir.null : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] // CHECK: %[[I64_ELEM_SIZE:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 @@ -1653,12 +1673,15 @@ // CHECK-LABEL: llvm.func @embox1 // CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(42 : i32) : i32 // CHECK: %[[TYPE_CODE_I8:.*]] = llvm.trunc %[[TYPE_CODE]] : i32 to i8 -// CHECK: %{{.*}} = llvm.insertvalue %[[TYPE_CODE_I8]], %{{.*}}[4] : !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> +// CHECK: %[[INSERT_1:.*]] = llvm.insertvalue %[[TYPE_CODE_I8]], %{{.*}}[4] : !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> +// CHECK: %[[TYPE_CODE_2:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[TYPE_CODE_I8_2:.*]] = llvm.trunc %[[TYPE_CODE_2]] : i32 to i8 +// CHECK: %[[INSERT_2:.*]] = llvm.insertvalue %[[TYPE_CODE_I8_2]], %[[INSERT_1]][5] : !llvm.struct<(ptr>, i64, i32, i8, i8, i8, i8, ptr, array<1 x i64>)> // CHECK: %[[F18ADDENDUM:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[F18ADDENDUM_I8:.*]] = llvm.trunc %[[F18ADDENDUM]] : i32 to i8 -// CHECK: %{{.*}} = llvm.insertvalue %[[F18ADDENDUM_I8]], %17[6] : !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> +// CHECK: %{{.*}} = llvm.insertvalue %[[F18ADDENDUM_I8]], %[[INSERT_2]][6] : !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> // CHECK: %[[TDESC:.*]] = llvm.mlir.addressof @_QMtest_dinitE.dt.tseq : !llvm.ptr -// CHECK: %[[TDESC_CAST:.*]] = llvm.bitcast %21 : !llvm.ptr to !llvm.ptr +// CHECK: %[[TDESC_CAST:.*]] = llvm.bitcast %[[TDESC]] : !llvm.ptr to !llvm.ptr // CHECK: %{{.*}} = llvm.insertvalue %[[TDESC_CAST]], %{{.*}}[7] : !llvm.struct<(ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> // ----- @@ -1715,7 +1738,10 @@ // CHECK-LABEL: llvm.func @no_reassoc( // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr) { // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[ALLOC:.*]] = llvm.alloca %[[C1]] x i32 {in_type = i32, operand_segment_sizes = array} : (i64) -> !llvm.ptr +// GENERIC: %[[ALLOC:.*]] = llvm.alloca %[[C1]] x i32 {in_type = i32, operand_segment_sizes = array} : (i64) -> !llvm.ptr +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[C1]] x i32 {in_type = i32, operand_segment_sizes = array} : (i64) -> !llvm.ptr +// AMDGPU: %[[ALLOC:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr to !llvm.ptr + // CHECK: %[[LOAD:.*]] = llvm.load %[[ARG0]] : !llvm.ptr // CHECK: llvm.store %[[LOAD]], %[[ALLOC]] : !llvm.ptr // CHECK: llvm.return @@ -1735,7 +1761,9 @@ // CHECK-LABEL: llvm.func @xembox0( // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr // CHECK: %[[ALLOCA_SIZE:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>> +// GENERIC: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>> +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>, 5> +// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, 5> to !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64 // CHECK: %[[NULL:.*]] = llvm.mlir.null : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] @@ -1824,7 +1852,9 @@ // CHECK-LABEL: llvm.func @_QPsb( // CHECK-SAME: %[[N:.*]]: i64, %[[SH1:.*]]: i64, %[[SH2:.*]]: i64) { // CHECK: %[[ALLOCA_SIZE:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)>> +// GENERIC: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)>> +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)>, 5> +// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, 5> to !llvm.ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>> // CHECK: %[[C4:.*]] = llvm.mlir.constant(4 : index) : i64 // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64 // CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) : i64 @@ -1835,7 +1865,9 @@ // CHECK: %[[C1_0:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[ARR_SIZE_TMP1:.*]] = llvm.mul %[[C1_0]], %[[N1]] : i64 // CHECK: %[[ARR_SIZE:.*]] = llvm.mul %[[ARR_SIZE_TMP1]], %[[N2]] : i64 -// CHECK: %[[ARR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr", in_type = !fir.array, operand_segment_sizes = array, uniq_name = "_QFsbEarr"} : (i64) -> !llvm.ptr +// GENERIC: %[[ARR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr", in_type = !fir.array, operand_segment_sizes = array, uniq_name = "_QFsbEarr"} : (i64) -> !llvm.ptr +// AMDGPU: %[[AR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr", in_type = !fir.array, operand_segment_sizes = array, uniq_name = "_QFsbEarr"} : (i64) -> !llvm.ptr +// AMDGPU: %[[ARR:.*]] = llvm.addrspacecast %[[AR]] : !llvm.ptr to !llvm.ptr // CHECK: %[[NULL:.*]] = llvm.mlir.null : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] // CHECK: %[[ELEM_LEN_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 @@ -1884,7 +1916,7 @@ // CHECK: %[[STRIDE_MUL:.*]] = llvm.mul %[[PREV_DIM]], %[[C1]] : i64 // CHECK: %[[BOX12:.*]] = llvm.insertvalue %[[STRIDE_MUL]], %[[BOX11]][7, 1, 2] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> // CHECK: %[[BASE_PTR:.*]] = llvm.getelementptr %[[ARR]][%[[PTR_OFFSET0]]] : (!llvm.ptr, i64) -> !llvm.ptr -// CHECK: %[[ADDR_BITCAST:.*]] = llvm.bitcast %[[BASE_PTR]] : !llvm.ptr to !llvm.ptr +// CHECK: %[[ADDR_BITCAST:.*]] = llvm.bitcast %[[BASE_PTR]] : !llvm.ptr to !llvm.ptr // CHECK: %[[BOX13:.*]] = llvm.insertvalue %[[ADDR_BITCAST]], %[[BOX12]][0] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> // CHECK: llvm.store %[[BOX13]], %[[ALLOCA]] : !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)>> @@ -1906,15 +1938,21 @@ // CHECK-LABEL: llvm.func @_QPtest_dt_slice // CHECK: %[[ALLOCA_SIZE:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>> +// GENERIC: %[[ALLOCA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>> +// AMDGPU: %[[AA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>, 5> +// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, 5> to !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> // CHECK: %[[C20:.*]] = llvm.mlir.constant(20 : index) : i64 // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[C10:.*]] = llvm.mlir.constant(10 : i64) : i64 // CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64 // CHECK: %[[ALLOCA_SIZE_V:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[V:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFtest_dt_sliceEv"} : (i64) -> !llvm.ptr +// GENERIC: %[[V:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFtest_dt_sliceEv"} : (i64) -> !llvm.ptr +// AMDGPU: %[[AB:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFtest_dt_sliceEv"} : (i64) -> !llvm.ptr +// AMDGPU: %[[V:.*]] = llvm.addrspacecast %[[AB]] : !llvm.ptr to !llvm.ptr // CHECK: %[[ALLOCA_SIZE_X:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[X:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x", in_type = !fir.array<20x!fir.type<_QFtest_dt_sliceTt{i:i32,j:i32}>>, operand_segment_sizes = array, uniq_name = "_QFtest_dt_sliceEx"} : (i64) -> !llvm.ptr>> +// GENERIC: %[[X:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x", in_type = !fir.array<20x!fir.type<_QFtest_dt_sliceTt{i:i32,j:i32}>>, operand_segment_sizes = array, uniq_name = "_QFtest_dt_sliceEx"} : (i64) -> !llvm.ptr>> +// AMDGPU: %[[AC:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x", in_type = !fir.array<20x!fir.type<_QFtest_dt_sliceTt{i:i32,j:i32}>>, operand_segment_sizes = array, uniq_name = "_QFtest_dt_sliceEx"} : (i64) -> !llvm.ptr>, 5> +// AMDGPU: %[[X:.*]] = llvm.addrspacecast %[[AC]] : !llvm.ptr>, 5> to !llvm.ptr>> // CHECK: %[[NULL:.*]] = llvm.mlir.null : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] // CHECK: %[[ELEM_LEN_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 @@ -1953,7 +1991,7 @@ // CHECK: %[[ADDR_BITCAST:.*]] = llvm.bitcast %[[BASE_PTR]] : !llvm.ptr to !llvm.ptr // CHECK: %[[BOX10:.*]] = llvm.insertvalue %[[ADDR_BITCAST]], %[[BOX9]][0] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> // CHECK: llvm.store %[[BOX10]], %[[ALLOCA]] : !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>> -// CHECK: llvm.call @_QPtest_dt_callee(%1) : (!llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>>) -> () +// CHECK: llvm.call @_QPtest_dt_callee(%[[ALLOCA]]) : (!llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>>) -> () // ----- @@ -2185,7 +2223,9 @@ //CHECK-LABEL: llvm.func @test_rebox_1 //CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>> //CHECK: %[[ONE_1:.*]] = llvm.mlir.constant(1 : i32) : i32 -//CHECK: %[[RESULT_BOX_REF:.*]] = llvm.alloca %[[ONE_1]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> +//GENERIC: %[[RESULT_BOX_REF:.*]] = llvm.alloca %[[ONE_1]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> +//AMDGPU: %[[AA:.*]] = llvm.alloca %[[ONE_1]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, 5> +//AMDGPU: %[[RESULT_BOX_REF:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, 5> to !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> //CHECK: %[[THREE:.*]] = llvm.mlir.constant(3 : index) : i64 //CHECK: %[[FOUR:.*]] = llvm.mlir.constant(4 : index) : i64 //CHECK: %[[FIVE:.*]] = llvm.mlir.constant(5 : index) : i64 @@ -2258,7 +2298,9 @@ //CHECK-LABEL: llvm.func @foo //CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr)>>, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>, ptr, array<1 x i64>)>> //CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : i32 -//CHECK: %[[RESULT_BOX_REF:.*]] = llvm.alloca %[[ONE]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> +//GENERIC: %[[RESULT_BOX_REF:.*]] = llvm.alloca %[[ONE]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> +//AMDGPU: %[[AA:.*]] = llvm.alloca %[[ONE]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, 5> +//AMDGPU: %[[RESULT_BOX_REF:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, 5> to !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> //CHECK: %[[RESULT_LB:.*]] = llvm.mlir.constant(3 : i64) : i64 //CHECK: %[[RESULT_UB:.*]] = llvm.mlir.constant(60 : i64) : i64 //CHECK: %[[RESULT_STRIDE:.*]] = llvm.mlir.constant(9 : i64) : i64