Index: llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h =================================================================== --- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -36,6 +36,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/NativeFormatting.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include @@ -97,13 +98,16 @@ raw_ostream &O; NVPTXAsmPrinter &AP; bool EmitGeneric; + bool PackedAggr; public: - AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP) + AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP, + bool Packed = false) : size(size), buffer(size), O(O), AP(AP) { curpos = 0; numSymbols = 0; EmitGeneric = AP.EmitGeneric; + PackedAggr = Packed; } // Copy Num bytes from Ptr. @@ -144,51 +148,71 @@ for (unsigned i = 0; i < size; i++) { if (i) O << ", "; - O << (unsigned int) buffer[i]; + O << (unsigned int)buffer[i]; } - } else { - // print out in 4-bytes or 8-bytes - unsigned int pos = 0; - unsigned int nSym = 0; - unsigned int nextSymbolPos = symbolPosInBuffer[nSym]; - unsigned int nBytes = 4; - if (static_cast(AP.TM).is64Bit()) - nBytes = 8; - for (pos = 0; pos < size; pos += nBytes) { - if (pos) + return; + } + // print out in bytes for packed aggregate or in 4/8-bytes otherwise + unsigned int pos = 0; + unsigned int nSym = 0; + unsigned int nextSymbolPos = symbolPosInBuffer[nSym]; + unsigned int nBytes = + static_cast(AP.TM).is64Bit() ? 8 : 4; + auto printSymbolPacked = [=](std::function printSymbol) { + if (!this->PackedAggr) { + printSymbol(); + return; + } + for (unsigned i = 0; i < nBytes; ++i) { + if (i) O << ", "; - if (pos == nextSymbolPos) { - const Value *v = Symbols[nSym]; - const Value *v0 = SymbolsBeforeStripping[nSym]; - if (const GlobalValue *GVar = dyn_cast(v)) { - MCSymbol *Name = AP.getSymbol(GVar); - PointerType *PTy = dyn_cast(v0->getType()); - bool IsNonGenericPointer = false; // Is v0 a non-generic pointer? - if (PTy && PTy->getAddressSpace() != 0) { - IsNonGenericPointer = true; - } - if (EmitGeneric && !isa(v) && !IsNonGenericPointer) { + llvm::write_hex(O, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper); + O << "("; + printSymbol(); + O << ")"; + } + }; + while (pos < size) { + if (pos) + O << ", "; + if (pos == nextSymbolPos) { + const Value *v = Symbols[nSym]; + const Value *v0 = SymbolsBeforeStripping[nSym]; + if (const GlobalValue *GVar = dyn_cast(v)) { + MCSymbol *Name = AP.getSymbol(GVar); + PointerType *PTy = dyn_cast(v0->getType()); + // Is v0 a non-generic pointer? + bool IsNonGenericPointer = PTy && PTy->getAddressSpace() != 0; + if (EmitGeneric && !isa(v) && !IsNonGenericPointer) { + printSymbolPacked([=]() { O << "generic("; Name->print(O, AP.MAI); O << ")"; - } else { - Name->print(O, AP.MAI); - } - } else if (const ConstantExpr *CExpr = dyn_cast(v0)) { - const MCExpr *Expr = + }); + } else { + printSymbolPacked([=]() { Name->print(O, AP.MAI); }); + } + } else if (const ConstantExpr *CExpr = dyn_cast(v0)) { + const MCExpr *Expr = AP.lowerConstantForGV(cast(CExpr), false); - AP.printMCExpr(*Expr, O); - } else - llvm_unreachable("symbol type unknown"); - nSym++; - if (nSym >= numSymbols) - nextSymbolPos = size + 1; - else - nextSymbolPos = symbolPosInBuffer[nSym]; - } else if (nBytes == 4) - O << *(unsigned int *)(&buffer[pos]); + printSymbolPacked([=]() { AP.printMCExpr(*Expr, O); }); + } else + llvm_unreachable("symbol type unknown"); + nSym++; + if (nSym >= numSymbols) + nextSymbolPos = size + 1; else - O << *(unsigned long long *)(&buffer[pos]); + nextSymbolPos = symbolPosInBuffer[nSym]; + pos += nBytes; + } else if (PackedAggr) { + O << (unsigned int)buffer[pos]; + pos++; + } else if (nBytes == 4) { + O << *(unsigned int *)(&buffer[pos]); + pos += 4; + } else { + O << *(unsigned long long *)(&buffer[pos]); + pos += 8; } } } Index: llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp =================================================================== --- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -960,6 +960,31 @@ } } +// Recursively iterate through the structure to see +// if any inner structure is packed. +static bool isPackedTy(Type *Ty) { + switch (Ty->getTypeID()) { + case Type::StructTyID: { + StructType *STy = cast(Ty); + if (STy->isPacked()) + return true; + unsigned NumElems = STy->getNumElements(); + for (unsigned i = 0; i < NumElems; ++i) { + Type *ETy = STy->getElementType(i); + if (isPackedTy(ETy)) + return true; + } + return false; + } + case Type::ArrayTyID: { + ArrayType *ATy = cast(Ty); + return isPackedTy(ATy->getElementType()); + } + default: + return false; + } +} + void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O, bool processDemoted, const NVPTXSubtarget &STI) { @@ -1165,15 +1190,30 @@ GVar->hasInitializer()) { const Constant *Initializer = GVar->getInitializer(); if (!isa(Initializer) && !Initializer->isNullValue()) { - AggBuffer aggBuffer(ElementSize, O, *this); + bool Packed = isPackedTy(ETy); + AggBuffer aggBuffer(ElementSize, O, *this, Packed); bufferAggregateConstant(Initializer, &aggBuffer); if (aggBuffer.numSymbols) { - if (static_cast(TM).is64Bit()) { + if (Packed) { + if (!STI.hasMaskOperator()) + report_fatal_error( + "initialized packed aggregate with pointers '" + + GVar->getName() + + "' requires the mask() operator which is supported from " + "PTX ISA version 7.1"); + O << " .u8 " << *getSymbol(GVar) << "[" << ElementSize; + } else if (static_cast(TM).is64Bit()) { + assert(ElementSize % 8 == 0 && + "Size of initialized struct with pointers not multiple " + "of pointersize"); O << " .u64 "; getSymbol(GVar)->print(O, MAI); O << "["; O << ElementSize / 8; } else { + assert(ElementSize % 4 == 0 && + "Size of initialized struct with pointers not multiple " + "of pointersize"); O << " .u32 "; getSymbol(GVar)->print(O, MAI); O << "["; Index: llvm/lib/Target/NVPTX/NVPTXSubtarget.h =================================================================== --- llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -77,6 +77,7 @@ bool hasImageHandles() const; bool hasFP16Math() const { return SmVersion >= 53; } bool allowFP16Math() const; + bool hasMaskOperator() const { return PTXVersion >= 71; } unsigned int getSmVersion() const { return SmVersion; } std::string getTargetName() const { return TargetName; } Index: llvm/test/CodeGen/NVPTX/packed-aggr.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/NVPTX/packed-aggr.ll @@ -0,0 +1,66 @@ +; RUN: not --crash llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx70 2>&1 | \ +; RUN: FileCheck %s --check-prefix=ERR +; ERR: initialized packed aggregate with pointers 's' requires the mask() operator which is supported from PTX ISA version 7.1 + +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx71 | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK32 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx71 | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK64 +; RUN: %if ptxas-11.1 %{ llc < %s -march=nvptx -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %} +; RUN: %if ptxas-11.1 %{ llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=+ptx71 | %ptxas-verify %} + +;; Test that packed structs with symbol references are represented using the +;; mask() operator. + +declare void @func() +@p = addrspace(1) global i8 0 +; CHECK: .extern .func func +; CHECK: .u8 p; + +%t = type <{ i16, i8*, i8, void ()*, i8*, i32 }> +@s = addrspace(1) global %t <{ +; CHECK32: .global .align 1 .u8 s[19] = { +; CHECK64: .global .align 1 .u8 s[31] = { + i16 12, +; CHECK-SAME: 12, 0, + i8* addrspacecast (i8 addrspace(1)* @p to i8*), +; CHECK-SAME: 0xFF(generic(p)), 0xFF00(generic(p)), 0xFF0000(generic(p)), 0xFF000000(generic(p)), +; CHECK64-SAME: 0xFF00000000(generic(p)), 0xFF0000000000(generic(p)), 0xFF000000000000(generic(p)), 0xFF00000000000000(generic(p)), + i8 34, +; CHECK-SAME: 34 + void ()* @func, +; CHECK-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func), +; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func), + i8* addrspacecast (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* @p, i32 3) to i8*), +; CHECK-SAME: 0xFF(generic(p)+3), 0xFF00(generic(p)+3), 0xFF0000(generic(p)+3), 0xFF000000(generic(p)+3), +; CHECK64-SAME: 0xFF00000000(generic(p)+3), 0xFF0000000000(generic(p)+3), 0xFF000000000000(generic(p)+3), 0xFF00000000000000(generic(p)+3), + i32 56 }>, align 1 +; CHECK-SAME: 56, 0, 0, 0}; + +;; Test that a nested packed struct is handled as expected. + +%ti = type <{ void ()* }> +%to = type { i8, %ti, i32 } +@n = addrspace(1) global %to { +; CHECK32: .global .align 8 .u8 n[12] = { +; CHECK64: .global .align 8 .u8 n[16] = { + i8 12, +; CHECK-SAME: 12, + %ti <{ void()* @func }>, +; CHECK-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func), +; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func), + i32 34} +; CHECK-SAME: 0, 0, 0, +; CHECK-SAME: 34, 0, 0, 0}; + +;; Test that a packed struct inside an array is handled as expected. + +@a = addrspace(1) global [2 x %ti] [%ti <{ void()* @func }>, %ti <{ void()* @func }> ] +; CHECK32: .global .align 8 .u8 a[8] = { +; CHECK32-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func), +; CHECK32-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func)}; +; CHECK64: .global .align 8 .u8 a[16] = { +; CHECK64-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func), +; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func), +; CHECK64-SAME: 0xFF(func), 0xFF00(func), 0xFF0000(func), 0xFF000000(func), +; CHECK64-SAME: 0xFF00000000(func), 0xFF0000000000(func), 0xFF000000000000(func), 0xFF00000000000000(func)};