Index: lib/Target/AMDGPU/AMDGPUFrameLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUFrameLowering.h +++ lib/Target/AMDGPU/AMDGPUFrameLowering.h @@ -34,9 +34,6 @@ /// values to the stack. unsigned getStackWidth(const MachineFunction &MF) const; - int getFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg) const override; - bool hasFP(const MachineFunction &MF) const override { return false; } Index: lib/Target/AMDGPU/AMDGPUFrameLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUFrameLowering.cpp +++ lib/Target/AMDGPU/AMDGPUFrameLowering.cpp @@ -12,11 +12,6 @@ //===----------------------------------------------------------------------===// #include "AMDGPUFrameLowering.h" -#include "AMDGPURegisterInfo.h" -#include "AMDGPUSubtarget.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/Support/MathExtras.h" using namespace llvm; AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl, @@ -69,34 +64,3 @@ // T1.W = stack[1].w return 1; } - -/// \returns The number of registers allocated for \p FI. -int AMDGPUFrameLowering::getFrameIndexReference(const MachineFunction &MF, - int FI, - unsigned &FrameReg) const { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - const AMDGPURegisterInfo *RI - = MF.getSubtarget().getRegisterInfo(); - - // Fill in FrameReg output argument. - FrameReg = RI->getFrameRegister(MF); - - // Start the offset at 2 so we don't overwrite work group information. - // XXX: We should only do this when the shader actually uses this - // information. - unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4); - int UpperBound = FI == -1 ? MFI.getNumObjects() : FI; - - for (int i = MFI.getObjectIndexBegin(); i < UpperBound; ++i) { - OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(i)); - OffsetBytes += MFI.getObjectSize(i); - // Each register holds 4 bytes, so we must always align the offset to at - // least 4 bytes, so that 2 frame objects won't share the same register. - OffsetBytes = alignTo(OffsetBytes, 4); - } - - if (FI != -1) - OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(FI)); - - return OffsetBytes / (getStackWidth(MF) * 4); -} Index: lib/Target/AMDGPU/R600FrameLowering.h =================================================================== --- lib/Target/AMDGPU/R600FrameLowering.h +++ lib/Target/AMDGPU/R600FrameLowering.h @@ -25,6 +25,8 @@ MachineBasicBlock &MBB) const override {} void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override {} + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; }; } // end namespace llvm Index: lib/Target/AMDGPU/R600FrameLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600FrameLowering.cpp +++ lib/Target/AMDGPU/R600FrameLowering.cpp @@ -8,7 +8,43 @@ //==-----------------------------------------------------------------------===// #include "R600FrameLowering.h" +#include "AMDGPUSubtarget.h" +#include "R600RegisterInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; R600FrameLowering::~R600FrameLowering() = default; + +/// \returns The number of registers allocated for \p FI. +int R600FrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const R600RegisterInfo *RI + = MF.getSubtarget().getRegisterInfo(); + + // Fill in FrameReg output argument. + FrameReg = RI->getFrameRegister(MF); + + // Start the offset at 2 so we don't overwrite work group information. + // FIXME: We should only do this when the shader actually uses this + // information. + unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4); + int UpperBound = FI == -1 ? MFI.getNumObjects() : FI; + + for (int i = MFI.getObjectIndexBegin(); i < UpperBound; ++i) { + OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(i)); + OffsetBytes += MFI.getObjectSize(i); + // Each register holds 4 bytes, so we must always align the offset to at + // least 4 bytes, so that 2 frame objects won't share the same register. + OffsetBytes = alignTo(OffsetBytes, 4); + } + + if (FI != -1) + OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(FI)); + + return OffsetBytes / (getStackWidth(MF) * 4); +} Index: lib/Target/AMDGPU/SIFrameLowering.h =================================================================== --- lib/Target/AMDGPU/SIFrameLowering.h +++ lib/Target/AMDGPU/SIFrameLowering.h @@ -30,6 +30,8 @@ MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; void processFunctionBeforeFrameFinalized( MachineFunction &MF, Index: lib/Target/AMDGPU/SIFrameLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIFrameLowering.cpp +++ lib/Target/AMDGPU/SIFrameLowering.cpp @@ -371,6 +371,14 @@ } +int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const { + const SIRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + + FrameReg = RI->getFrameRegister(MF); + return MF.getFrameInfo().getObjectOffset(FI); +} + void SIFrameLowering::processFunctionBeforeFrameFinalized( MachineFunction &MF, RegScavenger *RS) const { Index: test/DebugInfo/AMDGPU/variable-locations-dwarf-v1.ll =================================================================== --- /dev/null +++ test/DebugInfo/AMDGPU/variable-locations-dwarf-v1.ll @@ -0,0 +1,74 @@ +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s + +; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x09> 03 00 00 00 00 10 01 16 18 ) +; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x09> 03 00 00 00 00 10 01 16 18 ) +; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x06> 91 00 10 00 16 18 ) +; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x06> 91 08 10 00 16 18 ) + +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +@GlobA = common addrspace(1) global i32 addrspace(1)* null, align 4, !dbg !0 +@GlobB = common addrspace(1) global i32 addrspace(1)* null, align 4, !dbg !6 + +define amdgpu_kernel void @test( + i32 addrspace(1)* %A, + i32 addrspace(1)* %B) !dbg !15 { +entry: + %A.addr = alloca i32 addrspace(1)*, align 4 + %B.addr = alloca i32 addrspace(1)*, align 4 + store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4 + call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !22, metadata !23), !dbg !24 + store i32 addrspace(1)* %B, i32 addrspace(1)** %B.addr, align 4 + call void @llvm.dbg.declare(metadata i32 addrspace(1)** %B.addr, metadata !25, metadata !23), !dbg !26 + %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* @GlobA, align 4, !dbg !27 + %1 = load i32, i32 addrspace(1)* %0, align 4, !dbg !28 + %2 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !29 + store i32 %1, i32 addrspace(1)* %2, align 4, !dbg !30 + %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* @GlobB, align 4, !dbg !31 + %4 = load i32, i32 addrspace(1)* %3, align 4, !dbg !32 + %5 = load i32 addrspace(1)*, i32 addrspace(1)** %B.addr, align 4, !dbg !33 + store i32 %4, i32 addrspace(1)* %5, align 4, !dbg !34 + ret void, !dbg !35 +} + +!llvm.dbg.cu = !{!2} +!opencl.ocl.version = !{!11} +!llvm.module.flags = !{!12, !13} +!llvm.ident = !{!14} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !10) +!1 = distinct !DIGlobalVariable(name: "GlobA", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) +!3 = !DIFile(filename: "test.cl", directory: "/some/random/directory") +!4 = !{} +!5 = !{!0, !6} +!6 = !DIGlobalVariableExpression(var: !7, expr: !10) +!7 = distinct !DIGlobalVariable(name: "GlobB", scope: !2, file: !3, line: 3, type: !8, isLocal: false, isDefinition: true) +!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, addressSpace: 1) +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef) +!11 = !{i32 2, i32 0} +!12 = !{i32 2, !"Dwarf Version", i32 1} +!13 = !{i32 2, !"Debug Info Version", i32 3} +!14 = !{!""} +!15 = distinct !DISubprogram(name: "test", scope: !3, file: !3, line: 5, type: !16, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !2, variables: !4) +!16 = !DISubroutineType(types: !17) +!17 = !{null, !8, !8} +!18 = !{i32 1, i32 1} +!19 = !{!"none", !"none"} +!20 = !{!"int*", !"int*"} +!21 = !{!"", !""} +!22 = !DILocalVariable(name: "A", arg: 1, scope: !15, file: !3, line: 5, type: !8) +!23 = !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef) +!24 = !DILocation(line: 5, column: 33, scope: !15) +!25 = !DILocalVariable(name: "B", arg: 2, scope: !15, file: !3, line: 5, type: !8) +!26 = !DILocation(line: 5, column: 48, scope: !15) +!27 = !DILocation(line: 6, column: 9, scope: !15) +!28 = !DILocation(line: 6, column: 8, scope: !15) +!29 = !DILocation(line: 6, column: 4, scope: !15) +!30 = !DILocation(line: 6, column: 6, scope: !15) +!31 = !DILocation(line: 7, column: 9, scope: !15) +!32 = !DILocation(line: 7, column: 8, scope: !15) +!33 = !DILocation(line: 7, column: 4, scope: !15) +!34 = !DILocation(line: 7, column: 6, scope: !15) +!35 = !DILocation(line: 8, column: 1, scope: !15) Index: test/DebugInfo/AMDGPU/variable-locations.ll =================================================================== --- /dev/null +++ test/DebugInfo/AMDGPU/variable-locations.ll @@ -0,0 +1,93 @@ +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s + +; CHECK: {{.*}}DW_TAG_variable +; CHECK-NEXT: DW_AT_name {{.*}}"GlobA" +; CHECK-NEXT: DW_AT_type +; CHECK-NEXT: DW_AT_external +; CHECK-NEXT: DW_AT_decl_file +; CHECK-NEXT: DW_AT_decl_line +; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x09> 03 00 00 00 00 10 01 16 18 ) + +; CHECK: {{.*}}DW_TAG_variable +; CHECK-NEXT: DW_AT_name {{.*}}"GlobB" +; CHECK-NEXT: DW_AT_type +; CHECK-NEXT: DW_AT_external +; CHECK-NEXT: DW_AT_decl_file +; CHECK-NEXT: DW_AT_decl_line +; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x09> 03 00 00 00 00 10 01 16 18 ) + +; CHECK: {{.*}}DW_TAG_formal_parameter +; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x06> 91 00 10 00 16 18 ) +; CHECK-NEXT: DW_AT_name {{.*}}"A" + +; CHECK: {{.*}}DW_TAG_formal_parameter +; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x06> 91 08 10 00 16 18 ) +; CHECK-NEXT: DW_AT_name {{.*}}"B" + +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +@GlobA = common addrspace(1) global i32 addrspace(1)* null, align 4, !dbg !0 +@GlobB = common addrspace(1) global i32 addrspace(1)* null, align 4, !dbg !6 + +define amdgpu_kernel void @test( + i32 addrspace(1)* %A, + i32 addrspace(1)* %B) !dbg !15 { +entry: + %A.addr = alloca i32 addrspace(1)*, align 4 + %B.addr = alloca i32 addrspace(1)*, align 4 + store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4 + call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !22, metadata !23), !dbg !24 + store i32 addrspace(1)* %B, i32 addrspace(1)** %B.addr, align 4 + call void @llvm.dbg.declare(metadata i32 addrspace(1)** %B.addr, metadata !25, metadata !23), !dbg !26 + %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* @GlobA, align 4, !dbg !27 + %1 = load i32, i32 addrspace(1)* %0, align 4, !dbg !28 + %2 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !29 + store i32 %1, i32 addrspace(1)* %2, align 4, !dbg !30 + %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* @GlobB, align 4, !dbg !31 + %4 = load i32, i32 addrspace(1)* %3, align 4, !dbg !32 + %5 = load i32 addrspace(1)*, i32 addrspace(1)** %B.addr, align 4, !dbg !33 + store i32 %4, i32 addrspace(1)* %5, align 4, !dbg !34 + ret void, !dbg !35 +} + +!llvm.dbg.cu = !{!2} +!opencl.ocl.version = !{!11} +!llvm.module.flags = !{!12, !13} +!llvm.ident = !{!14} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !10) +!1 = distinct !DIGlobalVariable(name: "GlobA", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) +!3 = !DIFile(filename: "test.cl", directory: "/some/random/directory") +!4 = !{} +!5 = !{!0, !6} +!6 = !DIGlobalVariableExpression(var: !7, expr: !10) +!7 = distinct !DIGlobalVariable(name: "GlobB", scope: !2, file: !3, line: 3, type: !8, isLocal: false, isDefinition: true) +!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, addressSpace: 1) +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef) +!11 = !{i32 2, i32 0} +!12 = !{i32 2, !"Dwarf Version", i32 2} +!13 = !{i32 2, !"Debug Info Version", i32 3} +!14 = !{!""} +!15 = distinct !DISubprogram(name: "test", scope: !3, file: !3, line: 5, type: !16, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !2, variables: !4) +!16 = !DISubroutineType(types: !17) +!17 = !{null, !8, !8} +!18 = !{i32 1, i32 1} +!19 = !{!"none", !"none"} +!20 = !{!"int*", !"int*"} +!21 = !{!"", !""} +!22 = !DILocalVariable(name: "A", arg: 1, scope: !15, file: !3, line: 5, type: !8) +!23 = !DIExpression(DW_OP_constu, 0, DW_OP_swap, DW_OP_xderef) +!24 = !DILocation(line: 5, column: 33, scope: !15) +!25 = !DILocalVariable(name: "B", arg: 2, scope: !15, file: !3, line: 5, type: !8) +!26 = !DILocation(line: 5, column: 48, scope: !15) +!27 = !DILocation(line: 6, column: 9, scope: !15) +!28 = !DILocation(line: 6, column: 8, scope: !15) +!29 = !DILocation(line: 6, column: 4, scope: !15) +!30 = !DILocation(line: 6, column: 6, scope: !15) +!31 = !DILocation(line: 7, column: 9, scope: !15) +!32 = !DILocation(line: 7, column: 8, scope: !15) +!33 = !DILocation(line: 7, column: 4, scope: !15) +!34 = !DILocation(line: 7, column: 6, scope: !15) +!35 = !DILocation(line: 8, column: 1, scope: !15)